From 80c9c24e0fc136170a4e53b337d4ad1edea19bf8 Mon Sep 17 00:00:00 2001 From: Wolfvin Date: Sun, 28 Jun 2026 12:01:06 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20consolidate=20=5Fdefault=5Fdb=5Fpath=20t?= =?UTF-8?q?o=20utils.py=20=E2=80=94=20closes=20#40?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default-db-path logic was triplicated across: - scripts/commands/graph_schema.py:29-31 (_default_db_path) - scripts/graph_model.py:161-163 (_default_db_path) - scripts/persistent_registry.py:132-133 (__init__ os.path.join) - scripts/persistent_registry.py:858 (db_exists os.path.join) All four sites returned os.path.join(workspace, '.codelens', 'codelens.db') verbatim. Any future change (e.g. honoring CODELENS_DB_PATH env var, moving the db out of .codelens/) had to be made in four places — easy to miss one and create a silent bug where graph-schema and graph_model queries hit different databases. This refactor consolidates to a single source of truth: utils.default_db_path(workspace: str) -> str, with a docstring. Changes per file: - scripts/utils.py: add default_db_path helper (single source of truth). - scripts/commands/graph_schema.py: delete local _default_db_path; import default_db_path from utils; update call site. - scripts/graph_model.py: replace local _default_db_path definition with import + backward-compat alias (preserves the underscore-private symbol used by existing tests and test_hybrid_type_resolver.py); internal call sites now use the public name. - scripts/persistent_registry.py: __init__ and db_exists() now call default_db_path(workspace). DB_FILENAME constant preserved as a module-level export (no longer used internally; comment notes it's kept for backwards-compat with external importers). - tests/test_graph_model.py: add TestDefaultDbPathConsistency test that asserts all three call sites resolve to the same path for the same workspace (and graph_schema imports the same callable, not a re-definition). Pure refactor — no behavior change. The default path is still /.codelens/codelens.db. Verification: - python3 -m pytest tests/test_graph_model.py -v → 21/21 pass (20 existing + 1 new TestDefaultDbPathConsistency) - python3 -m pytest tests/test_graph_model.py tests/test_graph_incremental.py tests/test_hybrid_type_resolver.py tests/test_persistent_registry.py tests/test_persistent_registry_extra.py tests/test_registry.py → 129/129 pass (no regressions in any storage-related test) - python3 -m pytest tests/test_cli.py → 15/15 pass - python3 -c "from utils import default_db_path; print(default_db_path('/tmp/test'))" → /tmp/test/.codelens/codelens.db - python3 scripts/codelens.py graph-schema benchmarks/fixtures/clean_app → runs clean (zero-shaped before scan; 31 nodes/134 edges/6 indexes after scan — same as before refactor) - All modified files byte-compile with py_compile (no syntax errors) - All lines I added respect the 120-char limit (PEP 8 + repo convention) Out of scope (not fixed): - tests/test_vuln_staleness.py::test_vulnerable_app_has_stale_cache_info fails on both main and this branch — pre-existing date-based flake ('VULN_DB is 485 days old'), unrelated to this refactor. --- scripts/commands/graph_schema.py | 8 +--- scripts/graph_model.py | 14 +++--- scripts/persistent_registry.py | 11 ++--- scripts/utils.py | 23 ++++++++++ tests/test_graph_model.py | 76 ++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 17 deletions(-) diff --git a/scripts/commands/graph_schema.py b/scripts/commands/graph_schema.py index 7b25140..f892912 100644 --- a/scripts/commands/graph_schema.py +++ b/scripts/commands/graph_schema.py @@ -16,6 +16,7 @@ from typing import Any, Dict, Optional from commands import register_command +from utils import default_db_path def add_args(parser): @@ -26,11 +27,6 @@ def add_args(parser): help="Custom path for SQLite database file") -def _default_db_path(workspace: str) -> str: - """Return the default SQLite db path for a workspace.""" - return os.path.join(workspace, ".codelens", "codelens.db") - - def get_graph_schema(workspace: str, db_path: Optional[str] = None) -> Dict[str, Any]: """Return the shape of the graph (node/edge counts + type distribution). @@ -48,7 +44,7 @@ def get_graph_schema(workspace: str, db_path: Optional[str] = None) -> Dict[str, ``indexes``, ``status``, and ``workspace``. """ workspace = os.path.abspath(workspace) - db_path = db_path or _default_db_path(workspace) + db_path = db_path or default_db_path(workspace) schema: Dict[str, Any] = { "status": "ok", diff --git a/scripts/graph_model.py b/scripts/graph_model.py index 2a5667c..1df96a5 100644 --- a/scripts/graph_model.py +++ b/scripts/graph_model.py @@ -50,7 +50,7 @@ from collections import deque from typing import Any, Dict, Iterable, List, Optional, Set, Tuple -from utils import logger +from utils import default_db_path, logger # ─── Schema Constants ───────────────────────────────────────── @@ -158,9 +158,11 @@ def init_graph_schema(conn: sqlite3.Connection) -> None: # ─── Population ─────────────────────────────────────────────── -def _default_db_path(workspace: str) -> str: - """Return the default SQLite db path for a workspace.""" - return os.path.join(workspace, ".codelens", "codelens.db") +# Single source of truth for the default db path lives in utils.default_db_path +# (see issue #40). The private alias below is kept for backward compatibility +# with tests and callers that import ``graph_model._default_db_path`` directly; +# it delegates to the canonical helper so logic never drifts. +_default_db_path = default_db_path def _parse_file_line_from_node_id(node_id: str) -> Tuple[str, int]: @@ -244,7 +246,7 @@ def populate_graph_tables(workspace: str, db_path: Optional[str] = None) -> Dict Dict with keys 'nodes' and 'edges' giving the number of rows inserted. """ workspace = os.path.abspath(workspace) - db_path = db_path or _default_db_path(workspace) + db_path = db_path or default_db_path(workspace) # Lazy import to avoid circular dependency at module load time. from registry import load_backend_registry @@ -430,7 +432,7 @@ def incremental_graph_update( unresolved after the post-pass. """ workspace = os.path.abspath(workspace) - db_path = db_path or _default_db_path(workspace) + db_path = db_path or default_db_path(workspace) # Normalize changed_files to workspace-relative paths. We accept any # iterable (list, set, tuple) and de-duplicate via a set. diff --git a/scripts/persistent_registry.py b/scripts/persistent_registry.py index ca16557..2266665 100644 --- a/scripts/persistent_registry.py +++ b/scripts/persistent_registry.py @@ -27,12 +27,15 @@ from datetime import datetime, timezone from typing import Any, Dict, List, Optional, Set, Tuple -from utils import logger +from utils import default_db_path, logger # ─── Schema Version ──────────────────────────────────────────── SCHEMA_VERSION = 1 +# Module-level constant kept for backwards-compat with external callers/plugins +# that may import ``DB_FILENAME``. Internal path construction now goes through +# ``utils.default_db_path`` (single source of truth — see issue #40). DB_FILENAME = "codelens.db" # ─── SQL Statements ──────────────────────────────────────────── @@ -129,9 +132,7 @@ def __init__(self, workspace: str, db_path: Optional[str] = None): Defaults to .codelens/codelens.db """ self.workspace = workspace - self._db_path = db_path or os.path.join( - workspace, ".codelens", DB_FILENAME - ) + self._db_path = db_path or default_db_path(workspace) self._local = threading.local() # Thread-local storage for connections self._initialized = False self._init_lock = threading.Lock() @@ -855,7 +856,7 @@ def vacuum(self) -> None: def db_exists(workspace: str, db_path: Optional[str] = None) -> bool: """Check if a CodeLens SQLite database exists for the workspace.""" - path = db_path or os.path.join(workspace, ".codelens", DB_FILENAME) + path = db_path or default_db_path(workspace) return os.path.exists(path) diff --git a/scripts/utils.py b/scripts/utils.py index dbfc524..d003fd5 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -40,6 +40,29 @@ def get_logger(name: str = "codelens") -> logging.Logger: '.chunk.js', '.d.ts', # declaration files }) +# ─── Storage Path Resolution ────────────────────────────────── + + +def default_db_path(workspace: str) -> str: + """Return the default SQLite database path for a workspace. + + Single source of truth for the default CodeLens database location. + Previously triplicated across ``scripts/commands/graph_schema.py``, + ``scripts/graph_model.py``, and ``scripts/persistent_registry.py`` + (see issue #40). Any future change to the default path — e.g., + honoring a ``CODELENS_DB_PATH`` environment variable or moving the + database out of ``.codelens/`` — only needs to be made here. + + Args: + workspace: Absolute or relative path to the workspace root. + + Returns: + Path to the default SQLite database file: + ``/.codelens/codelens.db``. + """ + return os.path.join(workspace, ".codelens", "codelens.db") + + # ─── Output File Generation ───────────────────────────────── def write_output_files(workspace: str, scan_result, max_files: int = 3000) -> dict: diff --git a/tests/test_graph_model.py b/tests/test_graph_model.py index eb5fc30..5b98065 100644 --- a/tests/test_graph_model.py +++ b/tests/test_graph_model.py @@ -514,3 +514,79 @@ def test_use_graph_false_forces_flat(self, scanned_clean_app): ) assert self._chain_set(forced_flat["chains"]["up"]) == self._chain_set(flat["chains"]["up"]) + + +# ─── 7. Default DB Path Consistency (issue #40) ───────────── + + +class TestDefaultDbPathConsistency: + """Verify all three call sites resolve the default db path identically. + + Issue #40: ``_default_db_path`` was duplicated in ``graph_schema.py``, + ``graph_model.py``, and ``persistent_registry.py``. After consolidation + to ``utils.default_db_path``, this test guards against future drift by + asserting that all three sites (plus the canonical helper) produce the + same path for the same workspace. + """ + + def test_default_db_path_consistency(self): + """All three call sites must resolve to the same db path.""" + workspace = os.path.join(os.path.sep, "tmp", "codelens_path_test") + + # 1. Canonical helper — the single source of truth. + from utils import default_db_path + canonical = default_db_path(workspace) + + # 2. graph_model._default_db_path (kept as a backward-compat alias). + from graph_model import _default_db_path as graph_model_path + # 3. graph_schema used to define its own helper; it now imports + # utils.default_db_path, so we exercise the public path through + # the module's get_graph_schema function indirectly by checking + # that the import resolves to the same callable. + from commands import graph_schema as graph_schema_mod + + assert canonical == graph_model_path(workspace), ( + "graph_model._default_db_path must match utils.default_db_path; " + f"got graph_model={graph_model_path(workspace)!r} " + f"vs utils={canonical!r}" + ) + + # graph_schema.py no longer defines its own _default_db_path; the + # module must import default_db_path from utils so its callers + # resolve through the same single source of truth. + assert hasattr(graph_schema_mod, "default_db_path"), ( + "commands.graph_schema must import default_db_path from utils " + "(issue #40 consolidation)" + ) + assert graph_schema_mod.default_db_path is default_db_path, ( + "commands.graph_schema.default_db_path must be the same callable " + "as utils.default_db_path (not a re-definition)" + ) + assert graph_schema_mod.default_db_path(workspace) == canonical, ( + "commands.graph_schema must resolve to the same path as " + f"utils.default_db_path; got " + f"{graph_schema_mod.default_db_path(workspace)!r} " + f"vs {canonical!r}" + ) + + # 4. PersistentRegistry must construct the same default path when no + # explicit db_path is supplied. + from persistent_registry import PersistentRegistry, db_exists + registry = PersistentRegistry(workspace) + assert registry.db_path == canonical, ( + "PersistentRegistry default db_path must match utils.default_db_path; " + f"got registry={registry.db_path!r} vs utils={canonical!r}" + ) + # db_exists() helper must also use the same default path. + # (We can't easily assert the path it computes without filesystem + # side effects, but we can assert the function does not raise and + # returns False for a non-existent workspace — proving it computes + # a path rather than crashing on the old hardcoded join.) + assert db_exists(workspace) is False, ( + "db_exists must return False for a workspace with no database" + ) + + # Sanity: the canonical path has the expected shape. + assert canonical.endswith(os.path.join(".codelens", "codelens.db")), ( + f"canonical path {canonical!r} must end with .codelens/codelens.db" + )