Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions src/SIL.Machine/Corpora/ScrVersExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System.Collections.Generic;
using SIL.Scripture;

namespace SIL.Machine.Corpora
{
public static class ScrVersExtensions
{
/// <summary>
/// Gets a list of references (verse references) for the specified book.
/// </summary>
public static IEnumerable<VerseRef> GetReferencesForBook(this ScrVers scrVers, int bookNum)
{
List<VerseRef> references = new List<VerseRef>();
int lastChapter = scrVers.GetLastChapter(bookNum);

for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++)
{
int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum);

for (int verseNum = 1; verseNum <= lastVerse; verseNum++)
{
int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum);
if (!scrVers.IsExcluded(bbbcccvvv))
{
references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers));
}
}
}

return references;
}

public static IEnumerable<VerseRef> AllIncludedVerses(this ScrVers scrVers)
{
for (int book = 1; book <= scrVers.GetLastBook(); book++)
{
if (!Canon.IsCanonical(book) || (book > 86 && book < 93))
continue;
for (int chapter = 1; chapter <= scrVers.GetLastChapter(book); chapter++)
{
VerseRef? firstVerse = scrVers.FirstIncludedVerse(book, chapter);
bool yieldedFirstVerse = false;
for (int verseNumber = 2; verseNumber <= scrVers.GetLastVerse(book, chapter); verseNumber++)
{
VerseRef verse = new VerseRef(book, chapter, verseNumber, scrVers);
if (scrVers.IsExcluded(verse.BBBCCCVVV))
continue;
if (!yieldedFirstVerse && firstVerse != null)
{
yield return (VerseRef)firstVerse;
yieldedFirstVerse = true;
}
yield return verse;
}
}
}
}

public static bool HasCrossBookMappings(this ScrVers scrVers, ScrVers referenceVersification = null)
{
if (referenceVersification == null)
referenceVersification = ScrVers.Original;
foreach (VerseRef verseRef in scrVers.AllIncludedVerses())
{
VerseRef standardRef = verseRef;
standardRef.ChangeVersification(referenceVersification);
if (verseRef.BookNum != standardRef.BookNum)
return true;
}
return false;
}
}
}
16 changes: 10 additions & 6 deletions src/SIL.Machine/Corpora/TextCorpusEnumerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator<TextRow>
private readonly IEnumerator<TextRow> _enumerator;
private readonly bool _isScripture = false;
private readonly Queue<TextRow> _verseRows;
private readonly ScrVers _versification;
private readonly ScrVers _refVersification;
private TextRow _current;
private bool _isEnumerating = false;
Expand All @@ -19,6 +20,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator<TextRow>
public TextCorpusEnumerator(IEnumerator<TextRow> enumerator, ScrVers refVersification, ScrVers versification)
{
_enumerator = enumerator;
_versification = versification;
_refVersification = refVersification;
_isScripture = refVersification != null && versification != null && refVersification != versification;
_verseRows = new Queue<TextRow>();
Expand Down Expand Up @@ -67,18 +69,20 @@ protected override void DisposeManagedResources()

private void CollectVerses()
{
bool hasCrossBookMappings = _versification.HasCrossBookMappings(_refVersification);

var rowList = new List<(ScriptureRef Ref, TextRow Row)>();
bool outOfOrder = false;
bool versesOutOfOrder = false;
ScriptureRef prevRefRef = ScriptureRef.Empty;
int rangeStartOffset = -1;
do
{
TextRow row = _enumerator.Current;
var refRef = (ScriptureRef)row.Ref;
if (!prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum)
refRef = refRef.ChangeVersification(_refVersification);
if (!hasCrossBookMappings && !prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum)
break;

refRef = refRef.ChangeVersification(_refVersification);
// convert one-to-many versification mapping to a verse range
if (refRef.Equals(prevRefRef))
{
Expand Down Expand Up @@ -106,13 +110,13 @@ private void CollectVerses()
rangeStartOffset = -1;
}
rowList.Add((refRef, row));
if (!outOfOrder && refRef.CompareTo(prevRefRef) < 0)
outOfOrder = true;
if (!versesOutOfOrder && refRef.CompareTo(prevRefRef) < 0)
versesOutOfOrder = true;
prevRefRef = refRef;
_enumeratorHasMoreData = _enumerator.MoveNext();
} while (_enumeratorHasMoreData);

if (outOfOrder)
if (versesOutOfOrder)
rowList.Sort((x, y) => x.Ref.CompareTo(y.Ref));

foreach ((ScriptureRef _, TextRow row) in rowList)
Expand Down
106 changes: 106 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1238,6 +1238,112 @@ public void GetRows_DifferentVersificationsWithVerseSegments()
Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split()));
}

[Test]
public void GetRows_DifferentVersificationsWithCrossBookMappings()
{
var sourceCorpus = new DictionaryTextCorpus(
new MemoryText(
"DAN",
new[]
{
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:23", ScrVers.Original),
"DAN source chapter three, verse twenty three ."
),
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:24", ScrVers.Original),
"DAN source chapter three, verse twenty four ."
),
}
),
new MemoryText(
"S3Y",
new[]
{
TextRow(
"S3Y",
ScriptureRef.Parse("S3Y 1:1", ScrVers.Original),
"S3Y source chapter one, verse one ."
),
TextRow(
"S3Y",
ScriptureRef.Parse("S3Y 1:68", ScrVers.Original),
"S3Y source chapter one, verse sixty eight ."
),
}
)
)
{
Versification = ScrVers.Original,
};

var targetCorpus = new DictionaryTextCorpus(
new MemoryText(
"DAN",
new[]
{
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox),
"DAN target chapter three, verse twenty three ."
),
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox),
"DAN target chapter three, verse twenty four ."
),
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox),
"DAN target chapter three, verse ninety ."
),
TextRow(
"DAN",
ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox),
"DAN target chapter three, verse ninety one ."
),
}
)
)
{
Versification = ScrVers.RussianOrthodox,
};

// Russian Orthodox vs. Original
// DAN 3:24-90 = DAG 3:24-90
// DAN 3:91-100 = DAN 3:24-33
// Original
// S3Y 1:1-29 = DAG 3:24-52
// ...
// S3Y 1:38-68 = DAG 3:60-90

var parallelCorpus = sourceCorpus.AlignRows(targetCorpus, allSourceRows: true);
ParallelTextRow[] rows = parallelCorpus.ToArray();
Assert.That(rows.Length, Is.EqualTo(4));

Assert.That(rows[0].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.Original) }));
Assert.That(rows[0].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox) }));
Assert.That(rows[0].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty three .".Split()));
Assert.That(rows[0].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty three .".Split()));

Assert.That(rows[1].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.Original) }));
Assert.That(rows[1].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox) }));
Assert.That(rows[1].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty four .".Split()));
Assert.That(rows[1].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety one .".Split()));

Assert.That(rows[2].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:1", ScrVers.Original) }));
Assert.That(rows[2].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox) }));
Assert.That(rows[2].SourceSegment, Is.EqualTo("S3Y source chapter one, verse one .".Split()));
Assert.That(rows[2].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty four .".Split()));

Assert.That(rows[3].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:68", ScrVers.Original) }));
Assert.That(rows[3].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox) }));
Assert.That(rows[3].SourceSegment, Is.EqualTo("S3Y source chapter one, verse sixty eight .".Split()));
Assert.That(rows[3].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety .".Split()));
}

[Test]
public void GetRows_DifferentVersificationsWithExtraVerse()
{
Expand Down
31 changes: 0 additions & 31 deletions tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs

This file was deleted.

33 changes: 33 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using NUnit.Framework;
using SIL.Scripture;

namespace SIL.Machine.Corpora;

[TestFixture]
public class ScrVersExtensionsTests
{
[Test]
public void AllIncludedVerses()
{
List<VerseRef> originalVerses = ScrVers.Original.AllIncludedVerses().ToList();
Assert.That(originalVerses, Has.Count.EqualTo(41899));
Assert.That(originalVerses[21899].BBBCCCVVV, Is.EqualTo(27003024));
List<VerseRef> englishVerses = ScrVers.English.AllIncludedVerses().ToList();
Assert.That(englishVerses, Has.Count.EqualTo(38393));
Assert.That(englishVerses[englishVerses.Count - 1].BBBCCCVVV, Is.EqualTo(123001020));
List<VerseRef> russianOrthodoxVerses = ScrVers.RussianOrthodox.AllIncludedVerses().ToList();
Assert.That(russianOrthodoxVerses, Has.Count.EqualTo(37280));
Assert.That(russianOrthodoxVerses[russianOrthodoxVerses.Count - 1].BBBCCCVVV, Is.EqualTo(83001015));
}

[Test]
public void HasCrossBookMappings()
{
Assert.That(!ScrVers.Original.HasCrossBookMappings());
Assert.That(ScrVers.English.HasCrossBookMappings());
Assert.That(ScrVers.RussianOrthodox.HasCrossBookMappings());
Assert.That(!ScrVers.RussianProtestant.HasCrossBookMappings());
Assert.That(ScrVers.Vulgate.HasCrossBookMappings());
Assert.That(ScrVers.Vulgate.HasCrossBookMappings(ScrVers.English));
}
}
Loading