From 990fc0ddc8a7237228c8ce1f47da68aff78ccf6b Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Tue, 9 Jun 2026 16:14:11 -0400 Subject: [PATCH] Port changes from https://github.com/sillsdev/machine.py/pull/309 --- src/SIL.Machine/Corpora/ScrVersExtensions.cs | 73 ++++++++++++ .../Corpora/TextCorpusEnumerator.cs | 16 ++- .../Corpora/ParallelTextCorpusTests.cs | 106 ++++++++++++++++++ .../Corpora/ScrVersExtensions.cs | 31 ----- .../Corpora/ScrVersExtensionsTests.cs | 33 ++++++ 5 files changed, 222 insertions(+), 37 deletions(-) create mode 100644 src/SIL.Machine/Corpora/ScrVersExtensions.cs delete mode 100644 tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs create mode 100644 tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs diff --git a/src/SIL.Machine/Corpora/ScrVersExtensions.cs b/src/SIL.Machine/Corpora/ScrVersExtensions.cs new file mode 100644 index 00000000..7114b10c --- /dev/null +++ b/src/SIL.Machine/Corpora/ScrVersExtensions.cs @@ -0,0 +1,73 @@ +using System.Collections.Generic; +using SIL.Scripture; + +namespace SIL.Machine.Corpora +{ + public static class ScrVersExtensions + { + /// + /// Gets a list of references (verse references) for the specified book. + /// + public static IEnumerable GetReferencesForBook(this ScrVers scrVers, int bookNum) + { + List references = new List(); + int lastChapter = scrVers.GetLastChapter(bookNum); + + for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++) + { + int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum); + + for (int verseNum = 1; verseNum <= lastVerse; verseNum++) + { + int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum); + if (!scrVers.IsExcluded(bbbcccvvv)) + { + references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers)); + } + } + } + + return references; + } + + public static IEnumerable AllIncludedVerses(this ScrVers scrVers) + { + for (int book = 1; book <= scrVers.GetLastBook(); book++) + { + if (!Canon.IsCanonical(book) || (book > 86 && book < 93)) + continue; + for (int chapter = 1; chapter <= scrVers.GetLastChapter(book); chapter++) + { + VerseRef? firstVerse = scrVers.FirstIncludedVerse(book, chapter); + bool yieldedFirstVerse = false; + for (int verseNumber = 2; verseNumber <= scrVers.GetLastVerse(book, chapter); verseNumber++) + { + VerseRef verse = new VerseRef(book, chapter, verseNumber, scrVers); + if (scrVers.IsExcluded(verse.BBBCCCVVV)) + continue; + if (!yieldedFirstVerse && firstVerse != null) + { + yield return (VerseRef)firstVerse; + yieldedFirstVerse = true; + } + yield return verse; + } + } + } + } + + public static bool HasCrossBookMappings(this ScrVers scrVers, ScrVers referenceVersification = null) + { + if (referenceVersification == null) + referenceVersification = ScrVers.Original; + foreach (VerseRef verseRef in scrVers.AllIncludedVerses()) + { + VerseRef standardRef = verseRef; + standardRef.ChangeVersification(referenceVersification); + if (verseRef.BookNum != standardRef.BookNum) + return true; + } + return false; + } + } +} diff --git a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs index 7d9547f5..6ad0222e 100644 --- a/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs +++ b/src/SIL.Machine/Corpora/TextCorpusEnumerator.cs @@ -11,6 +11,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator private readonly IEnumerator _enumerator; private readonly bool _isScripture = false; private readonly Queue _verseRows; + private readonly ScrVers _versification; private readonly ScrVers _refVersification; private TextRow _current; private bool _isEnumerating = false; @@ -19,6 +20,7 @@ internal class TextCorpusEnumerator : DisposableBase, IEnumerator public TextCorpusEnumerator(IEnumerator enumerator, ScrVers refVersification, ScrVers versification) { _enumerator = enumerator; + _versification = versification; _refVersification = refVersification; _isScripture = refVersification != null && versification != null && refVersification != versification; _verseRows = new Queue(); @@ -67,18 +69,20 @@ protected override void DisposeManagedResources() private void CollectVerses() { + bool hasCrossBookMappings = _versification.HasCrossBookMappings(_refVersification); + var rowList = new List<(ScriptureRef Ref, TextRow Row)>(); - bool outOfOrder = false; + bool versesOutOfOrder = false; ScriptureRef prevRefRef = ScriptureRef.Empty; int rangeStartOffset = -1; do { TextRow row = _enumerator.Current; var refRef = (ScriptureRef)row.Ref; - if (!prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum) + refRef = refRef.ChangeVersification(_refVersification); + if (!hasCrossBookMappings && !prevRefRef.IsEmpty && refRef.BookNum != prevRefRef.BookNum) break; - refRef = refRef.ChangeVersification(_refVersification); // convert one-to-many versification mapping to a verse range if (refRef.Equals(prevRefRef)) { @@ -106,13 +110,13 @@ private void CollectVerses() rangeStartOffset = -1; } rowList.Add((refRef, row)); - if (!outOfOrder && refRef.CompareTo(prevRefRef) < 0) - outOfOrder = true; + if (!versesOutOfOrder && refRef.CompareTo(prevRefRef) < 0) + versesOutOfOrder = true; prevRefRef = refRef; _enumeratorHasMoreData = _enumerator.MoveNext(); } while (_enumeratorHasMoreData); - if (outOfOrder) + if (versesOutOfOrder) rowList.Sort((x, y) => x.Ref.CompareTo(y.Ref)); foreach ((ScriptureRef _, TextRow row) in rowList) diff --git a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs index 1d4eafd9..6a358597 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParallelTextCorpusTests.cs @@ -1238,6 +1238,112 @@ public void GetRows_DifferentVersificationsWithVerseSegments() Assert.That(rows[5].TargetSegment, Is.EqualTo("target chapter sixteen, verse thirty nine b .".Split())); } + [Test] + public void GetRows_DifferentVersificationsWithCrossBookMappings() + { + var sourceCorpus = new DictionaryTextCorpus( + new MemoryText( + "DAN", + new[] + { + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:23", ScrVers.Original), + "DAN source chapter three, verse twenty three ." + ), + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:24", ScrVers.Original), + "DAN source chapter three, verse twenty four ." + ), + } + ), + new MemoryText( + "S3Y", + new[] + { + TextRow( + "S3Y", + ScriptureRef.Parse("S3Y 1:1", ScrVers.Original), + "S3Y source chapter one, verse one ." + ), + TextRow( + "S3Y", + ScriptureRef.Parse("S3Y 1:68", ScrVers.Original), + "S3Y source chapter one, verse sixty eight ." + ), + } + ) + ) + { + Versification = ScrVers.Original, + }; + + var targetCorpus = new DictionaryTextCorpus( + new MemoryText( + "DAN", + new[] + { + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox), + "DAN target chapter three, verse twenty three ." + ), + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox), + "DAN target chapter three, verse twenty four ." + ), + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox), + "DAN target chapter three, verse ninety ." + ), + TextRow( + "DAN", + ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox), + "DAN target chapter three, verse ninety one ." + ), + } + ) + ) + { + Versification = ScrVers.RussianOrthodox, + }; + + // Russian Orthodox vs. Original + // DAN 3:24-90 = DAG 3:24-90 + // DAN 3:91-100 = DAN 3:24-33 + // Original + // S3Y 1:1-29 = DAG 3:24-52 + // ... + // S3Y 1:38-68 = DAG 3:60-90 + + var parallelCorpus = sourceCorpus.AlignRows(targetCorpus, allSourceRows: true); + ParallelTextRow[] rows = parallelCorpus.ToArray(); + Assert.That(rows.Length, Is.EqualTo(4)); + + Assert.That(rows[0].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.Original) })); + Assert.That(rows[0].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:23", ScrVers.RussianOrthodox) })); + Assert.That(rows[0].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty three .".Split())); + Assert.That(rows[0].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty three .".Split())); + + Assert.That(rows[1].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.Original) })); + Assert.That(rows[1].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:91", ScrVers.RussianOrthodox) })); + Assert.That(rows[1].SourceSegment, Is.EqualTo("DAN source chapter three, verse twenty four .".Split())); + Assert.That(rows[1].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety one .".Split())); + + Assert.That(rows[2].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:1", ScrVers.Original) })); + Assert.That(rows[2].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:24", ScrVers.RussianOrthodox) })); + Assert.That(rows[2].SourceSegment, Is.EqualTo("S3Y source chapter one, verse one .".Split())); + Assert.That(rows[2].TargetSegment, Is.EqualTo("DAN target chapter three, verse twenty four .".Split())); + + Assert.That(rows[3].SourceRefs, Is.EqualTo(new[] { ScriptureRef.Parse("S3Y 1:68", ScrVers.Original) })); + Assert.That(rows[3].TargetRefs, Is.EqualTo(new[] { ScriptureRef.Parse("DAN 3:90", ScrVers.RussianOrthodox) })); + Assert.That(rows[3].SourceSegment, Is.EqualTo("S3Y source chapter one, verse sixty eight .".Split())); + Assert.That(rows[3].TargetSegment, Is.EqualTo("DAN target chapter three, verse ninety .".Split())); + } + [Test] public void GetRows_DifferentVersificationsWithExtraVerse() { diff --git a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs deleted file mode 100644 index 31360c49..00000000 --- a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensions.cs +++ /dev/null @@ -1,31 +0,0 @@ -using SIL.Scripture; - -namespace SIL.Machine.Corpora; - -public static class ScrVersExtensions -{ - /// - /// Gets a list of references (verse references) for the specified book. - /// - public static IEnumerable GetReferencesForBook(this ScrVers scrVers, int bookNum) - { - List references = new List(); - int lastChapter = scrVers.GetLastChapter(bookNum); - - for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++) - { - int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum); - - for (int verseNum = 1; verseNum <= lastVerse; verseNum++) - { - int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum); - if (!scrVers.IsExcluded(bbbcccvvv)) - { - references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers)); - } - } - } - - return references; - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs new file mode 100644 index 00000000..d006e255 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/ScrVersExtensionsTests.cs @@ -0,0 +1,33 @@ +using NUnit.Framework; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +[TestFixture] +public class ScrVersExtensionsTests +{ + [Test] + public void AllIncludedVerses() + { + List originalVerses = ScrVers.Original.AllIncludedVerses().ToList(); + Assert.That(originalVerses, Has.Count.EqualTo(41899)); + Assert.That(originalVerses[21899].BBBCCCVVV, Is.EqualTo(27003024)); + List englishVerses = ScrVers.English.AllIncludedVerses().ToList(); + Assert.That(englishVerses, Has.Count.EqualTo(38393)); + Assert.That(englishVerses[englishVerses.Count - 1].BBBCCCVVV, Is.EqualTo(123001020)); + List russianOrthodoxVerses = ScrVers.RussianOrthodox.AllIncludedVerses().ToList(); + Assert.That(russianOrthodoxVerses, Has.Count.EqualTo(37280)); + Assert.That(russianOrthodoxVerses[russianOrthodoxVerses.Count - 1].BBBCCCVVV, Is.EqualTo(83001015)); + } + + [Test] + public void HasCrossBookMappings() + { + Assert.That(!ScrVers.Original.HasCrossBookMappings()); + Assert.That(ScrVers.English.HasCrossBookMappings()); + Assert.That(ScrVers.RussianOrthodox.HasCrossBookMappings()); + Assert.That(!ScrVers.RussianProtestant.HasCrossBookMappings()); + Assert.That(ScrVers.Vulgate.HasCrossBookMappings()); + Assert.That(ScrVers.Vulgate.HasCrossBookMappings(ScrVers.English)); + } +}