diff --git a/src/osw/controller/page_package.py b/src/osw/controller/page_package.py index 977d7cf..9fce695 100644 --- a/src/osw/controller/page_package.py +++ b/src/osw/controller/page_package.py @@ -71,8 +71,8 @@ group_keys=["file"], ), "Item strings": RegExPatternExtended( - description="Match: Item:OSW", - pattern=r"(Item:OSW[a-f0-9]{32})", + description="Match: Item:OSW but not subobject refs after #", + pattern=r"(? str: if not osw_id_to_class: return content - # Find bare OSW IDs used as identifiers (not inside "Category:OSW..." strings) + # Replace bare OSW IDs used as identifiers with the resolved class name. + # Skip comment lines: the datamodel-codegen header + # "# filename: OSW....json" contains an OSW ID that is NOT a type hint + # and must stay intact (the lookbehind/lookahead alone do not exclude it, + # since it is preceded by a space and followed by ".json"). bare_osw_pattern = re.compile(r"(? str: + if line.lstrip().startswith("#"): + return line # never rewrite comments + return bare_osw_pattern.sub( + lambda m: osw_id_to_class.get(m.group(0), m.group(0)), line + ) - return content + return "".join(_resolve_line(line) for line in content.splitlines(keepends=True)) diff --git a/src/osw/utils/regex.py b/src/osw/utils/regex.py index 653f7ae..422ca71 100644 --- a/src/osw/utils/regex.py +++ b/src/osw/utils/regex.py @@ -181,7 +181,15 @@ def count_match_groups(pattern: Union[str, re.Pattern]): group_count += 1 unmatched_brackets -= 1 - corrected_group_count = group_count - pattern.count("(?:") + # Subtract non-capturing groups and zero-width assertions + non_capturing = ( + pattern.count("(?:") + + pattern.count("(?=") + + pattern.count("(?!") + + pattern.count("(?<=") + + pattern.count("(?