diff --git a/src/SIL.Machine/Corpora/ScrVersExtensions.cs b/src/SIL.Machine/Corpora/ScrVersExtensions.cs
new file mode 100644
index 00000000..7114b10c
--- /dev/null
+++ b/src/SIL.Machine/Corpora/ScrVersExtensions.cs
@@ -0,0 +1,73 @@
+using System.Collections.Generic;
+using SIL.Scripture;
+
+namespace SIL.Machine.Corpora
+{
+ public static class ScrVersExtensions
+ {
+ ///
+ /// Gets a list of references (verse references) for the specified book.
+ ///
+ public static IEnumerable GetReferencesForBook(this ScrVers scrVers, int bookNum)
+ {
+ List references = new List();
+ int lastChapter = scrVers.GetLastChapter(bookNum);
+
+ for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++)
+ {
+ int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum);
+
+ for (int verseNum = 1; verseNum <= lastVerse; verseNum++)
+ {
+ int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum);
+ if (!scrVers.IsExcluded(bbbcccvvv))
+ {
+ references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers));
+ }
+ }
+ }
+
+ return references;
+ }
+
+ public static IEnumerable AllIncludedVerses(this ScrVers scrVers)
+ {
+ for (int book = 1; book <= scrVers.GetLastBook(); book++)
+ {
+ if (!Canon.IsCanonical(book) || (book > 86 && book < 93))
+ continue;
+ for (int chapter = 1; chapter <= scrVers.GetLastChapter(book); chapter++)
+ {
+ VerseRef? firstVerse = scrVers.FirstIncludedVerse(book, chapter);
+ bool yieldedFirstVerse = false;
+ for (int verseNumber = 2; verseNumber <= scrVers.GetLastVerse(book, chapter); verseNumber++)
+ {
+ VerseRef verse = new VerseRef(book, chapter, verseNumber, scrVers);
+ if (scrVers.IsExcluded(verse.BBBCCCVVV))
+ continue;
+ if (!yieldedFirstVerse && firstVerse != null)
+ {
+ yield return (VerseRef)firstVerse;
+ yieldedFirstVerse = true;
+ }
+ yield return verse;
+ }
+ }
+ }
+ }
+
+ public static bool HasCrossBookMappings(this ScrVers scrVers, ScrVers referenceVersification = null)
+ {
+ if (referenceVersification == null)
+ referenceVersification = ScrVers.Original;
+ foreach (VerseRef verseRef in scrVers.AllIncludedVerses())
+ {
+ VerseRef standardRef = verseRef;
+ standardRef.ChangeVersification(referenceVersification);
+ if (verseRef.BookNum != standardRef.BookNum)
+ return true;
+ }
+ return false;
+ }
+ }
+}
diff --git a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs
index 7d9547f5..6ad0222e 100644
--- a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs
+++ b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs
@@ -11,6 +11,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator
private readonly IEnumerator _enumerator;
private readonly bool _isScripture = false;
private readonly Queue _verseRows;
+ private readonly ScrVers _versification;
private readonly ScrVers _refVersification;
private TextRow _current;
private bool _isEnumerating = false;
@@ -19,6 +20,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator
public TextCorpusEnumerator(IEnumerator enumerator, ScrVers refVersification, ScrVers versification)
{
_enumerator = enumerator;
+ _versification = versification;
_refVersification = refVersification;
_isScripture = refVersification != null && versification != null && refVersification != versification;
_verseRows = new Queue();
@@ -67,18 +69,20 @@ protected override void DisposeManagedResources()
private void CollectVerses()
{
+ bool hasCrossBookMappings = _versification.HasCrossBookMappings(_refVersification);
+
var rowList = new List<(ScriptureRef Ref, TextRow Row)>();
- bool outOfOrder = false;
+ bool versesOutOfOrder = false;
ScriptureRef prevRefRef = ScriptureRef.Empty;
int rangeStartOffset = -1;
do
{
TextRow row = _enumerator.Current;
var refRef = (ScriptureRef)row.Ref;
- if (!prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum)
+ refRef = refRef.ChangeVersification(_refVersification);
+ if (!hasCrossBookMappings && !prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum)
break;
- refRef = refRef.ChangeVersification(_refVersification);
// convert one-to-many versification mapping to a verse range
if (refRef.Equals(prevRefRef))
{
@@ -106,13 +110,13 @@ private void CollectVerses()
rangeStartOffset = -1;
}
rowList.Add((refRef, row));
- if (!outOfOrder && refRef.CompareTo(prevRefRef) < 0)
- outOfOrder = true;
+ if (!versesOutOfOrder && refRef.CompareTo(prevRefRef) < 0)
+ versesOutOfOrder = true;
prevRefRef = refRef;
_enumeratorHasMoreData = _enumerator.MoveNext();
} while (_enumeratorHasMoreData);
- if (outOfOrder)
+ if (versesOutOfOrder)
rowList.Sort((x, y) => x.Ref.CompareTo(y.Ref));
foreach ((ScriptureRef _, TextRow row) in rowList)
diff --git a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs
index 1d4eafd9..6a358597 100644
--- a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs
+++ b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs
@@ -1238,6 +1238,112 @@ public void GetRows_DifferentVersificationsWithVerseSegments()
Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split()));
}
+ [Test]
+ public void GetRows_DifferentVersificationsWithCrossBookMappings()
+ {
+ var sourceCorpus = new DictionaryTextCorpus(
+ new MemoryText(
+ "DAN",
+ new[]
+ {
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:23", ScrVers.Original),
+ "DAN source chapter three, verse twenty three ."
+ ),
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:24", ScrVers.Original),
+ "DAN source chapter three, verse twenty four ."
+ ),
+ }
+ ),
+ new MemoryText(
+ "S3Y",
+ new[]
+ {
+ TextRow(
+ "S3Y",
+ ScriptureRef.Parse("S3Y 1:1", ScrVers.Original),
+ "S3Y source chapter one, verse one ."
+ ),
+ TextRow(
+ "S3Y",
+ ScriptureRef.Parse("S3Y 1:68", ScrVers.Original),
+ "S3Y source chapter one, verse sixty eight ."
+ ),
+ }
+ )
+ )
+ {
+ Versification = ScrVers.Original,
+ };
+
+ var targetCorpus = new DictionaryTextCorpus(
+ new MemoryText(
+ "DAN",
+ new[]
+ {
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox),
+ "DAN target chapter three, verse twenty three ."
+ ),
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox),
+ "DAN target chapter three, verse twenty four ."
+ ),
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox),
+ "DAN target chapter three, verse ninety ."
+ ),
+ TextRow(
+ "DAN",
+ ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox),
+ "DAN target chapter three, verse ninety one ."
+ ),
+ }
+ )
+ )
+ {
+ Versification = ScrVers.RussianOrthodox,
+ };
+
+ // Russian Orthodox vs. Original
+ // DAN 3:24-90 = DAG 3:24-90
+ // DAN 3:91-100 = DAN 3:24-33
+ // Original
+ // S3Y 1:1-29 = DAG 3:24-52
+ // ...
+ // S3Y 1:38-68 = DAG 3:60-90
+
+ var parallelCorpus = sourceCorpus.AlignRows(targetCorpus, allSourceRows: true);
+ ParallelTextRow[] rows = parallelCorpus.ToArray();
+ Assert.That(rows.Length, Is.EqualTo(4));
+
+ Assert.That(rows[0].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.Original) }));
+ Assert.That(rows[0].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox) }));
+ Assert.That(rows[0].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty three .".Split()));
+ Assert.That(rows[0].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty three .".Split()));
+
+ Assert.That(rows[1].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.Original) }));
+ Assert.That(rows[1].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox) }));
+ Assert.That(rows[1].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty four .".Split()));
+ Assert.That(rows[1].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety one .".Split()));
+
+ Assert.That(rows[2].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:1", ScrVers.Original) }));
+ Assert.That(rows[2].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox) }));
+ Assert.That(rows[2].SourceSegment, Is.EqualTo("S3Y source chapter one, verse one .".Split()));
+ Assert.That(rows[2].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty four .".Split()));
+
+ Assert.That(rows[3].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:68", ScrVers.Original) }));
+ Assert.That(rows[3].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox) }));
+ Assert.That(rows[3].SourceSegment, Is.EqualTo("S3Y source chapter one, verse sixty eight .".Split()));
+ Assert.That(rows[3].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety .".Split()));
+ }
+
[Test]
public void GetRows_DifferentVersificationsWithExtraVerse()
{
diff --git a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs
deleted file mode 100644
index 31360c49..00000000
--- a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-using SIL.Scripture;
-
-namespace SIL.Machine.Corpora;
-
-public static class ScrVersExtensions
-{
- ///
- /// Gets a list of references (verse references) for the specified book.
- ///
- public static IEnumerable GetReferencesForBook(this ScrVers scrVers, int bookNum)
- {
- List references = new List();
- int lastChapter = scrVers.GetLastChapter(bookNum);
-
- for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++)
- {
- int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum);
-
- for (int verseNum = 1; verseNum <= lastVerse; verseNum++)
- {
- int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum);
- if (!scrVers.IsExcluded(bbbcccvvv))
- {
- references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers));
- }
- }
- }
-
- return references;
- }
-}
diff --git a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs
new file mode 100644
index 00000000..d006e255
--- /dev/null
+++ b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs
@@ -0,0 +1,33 @@
+using NUnit.Framework;
+using SIL.Scripture;
+
+namespace SIL.Machine.Corpora;
+
+[TestFixture]
+public class ScrVersExtensionsTests
+{
+ [Test]
+ public void AllIncludedVerses()
+ {
+ List originalVerses = ScrVers.Original.AllIncludedVerses().ToList();
+ Assert.That(originalVerses, Has.Count.EqualTo(41899));
+ Assert.That(originalVerses[21899].BBBCCCVVV, Is.EqualTo(27003024));
+ List englishVerses = ScrVers.English.AllIncludedVerses().ToList();
+ Assert.That(englishVerses, Has.Count.EqualTo(38393));
+ Assert.That(englishVerses[englishVerses.Count - 1].BBBCCCVVV, Is.EqualTo(123001020));
+ List russianOrthodoxVerses = ScrVers.RussianOrthodox.AllIncludedVerses().ToList();
+ Assert.That(russianOrthodoxVerses, Has.Count.EqualTo(37280));
+ Assert.That(russianOrthodoxVerses[russianOrthodoxVerses.Count - 1].BBBCCCVVV, Is.EqualTo(83001015));
+ }
+
+ [Test]
+ public void HasCrossBookMappings()
+ {
+ Assert.That(!ScrVers.Original.HasCrossBookMappings());
+ Assert.That(ScrVers.English.HasCrossBookMappings());
+ Assert.That(ScrVers.RussianOrthodox.HasCrossBookMappings());
+ Assert.That(!ScrVers.RussianProtestant.HasCrossBookMappings());
+ Assert.That(ScrVers.Vulgate.HasCrossBookMappings());
+ Assert.That(ScrVers.Vulgate.HasCrossBookMappings(ScrVers.English));
+ }
+}