From f09bbaa21268ea67b299e6de6993ac39a86699b3 Mon Sep 17 00:00:00 2001
From: SimonTaurus <simon.stier@gmx.de>
Date: Sun, 7 Jun 2026 18:50:12 +0200
Subject: [PATCH 1/2] fix: exclude subobject refs from required pages detection

Item:OSW<id>#OSW<subid> subobject references were incorrectly matched
as standalone Item:OSW<subid> required pages. Added negative lookbehind
for # to the Item regex pattern.
---
 src/osw/controller/page_package.py |  4 ++--
 src/osw/utils/regex.py             | 10 +++++++++-
 2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/src/osw/controller/page_package.py b/src/osw/controller/page_package.py
index 977d7cf..9fce695 100644
--- a/src/osw/controller/page_package.py
+++ b/src/osw/controller/page_package.py
@@ -71,8 +71,8 @@
         group_keys=["file"],
     ),
     "Item strings": RegExPatternExtended(
-        description="Match: Item:OSW<uuid>",
-        pattern=r"(Item:OSW[a-f0-9]{32})",
+        description="Match: Item:OSW<uuid> but not subobject refs after #",
+        pattern=r"(?<!#)(Item:OSW[a-f0-9]{32})",
         group_keys=["item"],
     ),
 }
diff --git a/src/osw/utils/regex.py b/src/osw/utils/regex.py
index 653f7ae..422ca71 100644
--- a/src/osw/utils/regex.py
+++ b/src/osw/utils/regex.py
@@ -181,7 +181,15 @@ def count_match_groups(pattern: Union[str, re.Pattern]):
             group_count += 1
             unmatched_brackets -= 1
 
-    corrected_group_count = group_count - pattern.count("(?:")
+    # Subtract non-capturing groups and zero-width assertions
+    non_capturing = (
+        pattern.count("(?:")
+        + pattern.count("(?=")
+        + pattern.count("(?!")
+        + pattern.count("(?<=")
+        + pattern.count("(?<!")
+    )
+    corrected_group_count = group_count - non_capturing
     return corrected_group_count
 
 

From 316ce65c3f22539c14c2570dd6b2d8d3e14cc719 Mon Sep 17 00:00:00 2001
From: SimonTaurus <simon.stier@gmx.de>
Date: Tue, 16 Jun 2026 02:55:04 +0200
Subject: [PATCH 2/2] fix: exclude comments in OSW-ID replacements

---
 src/osw/utils/code_postprocessing.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/osw/utils/code_postprocessing.py b/src/osw/utils/code_postprocessing.py
index faf2f2c..43c3b27 100644
--- a/src/osw/utils/code_postprocessing.py
+++ b/src/osw/utils/code_postprocessing.py
@@ -89,16 +89,18 @@ def resolve_osw_id_type_hints(content: str) -> str:
     if not osw_id_to_class:
         return content
 
-    # Find bare OSW IDs used as identifiers (not inside "Category:OSW..." strings)
+    # Replace bare OSW IDs used as identifiers with the resolved class name.
+    # Skip comment lines: the datamodel-codegen header
+    # "#   filename:  OSW....json" contains an OSW ID that is NOT a type hint
+    # and must stay intact (the lookbehind/lookahead alone do not exclude it,
+    # since it is preceded by a space and followed by ".json").
     bare_osw_pattern = re.compile(r"(?<![:\w])OSW[0-9a-f]{32}(?!\w)")
-    unresolved = set(bare_osw_pattern.findall(content))
 
-    for osw_id in unresolved:
-        if osw_id in osw_id_to_class:
-            content = re.sub(
-                r"(?<![:\w])" + re.escape(osw_id) + r"(?!\w)",
-                osw_id_to_class[osw_id],
-                content,
-            )
+    def _resolve_line(line: str) -> str:
+        if line.lstrip().startswith("#"):
+            return line  # never rewrite comments
+        return bare_osw_pattern.sub(
+            lambda m: osw_id_to_class.get(m.group(0), m.group(0)), line
+        )
 
-    return content
+    return "".join(_resolve_line(line) for line in content.splitlines(keepends=True))