From 9fc64692d4f043f3fc94b6ad46edc8d369785b5a Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 6 Jun 2026 15:33:36 +0530 Subject: [PATCH 01/17] feat(db): add local SQLite tracking database for extension fetches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a versioned local SQLite tracking database at `~/.polyskills/polyskills.db` that records every extension fetch performed through either the CLI tool (`polyskills manager ...`) or a direct Python API call. The database is materialised lazily on first import of the `polyskills` package and is re-used by every subsequent import; the schema bootstrap is idempotent and uses `CREATE TABLE IF NOT EXISTS` for every table. Why --- The framework previously offered no way to answer "what extensions did I install, from which remote, into which directory?". Operators running polyskills across many projects and machines needed a single source of truth for installed-extension state plus an audit trail of every fetch attempt (success or failure) so misconfigured remotes and stale destinations could be diagnosed without re-running the CLI. What ---- New `polyskills/db/` sub-package (stdlib `sqlite3` only - no new third-party dependencies): * `paths.py` - canonical path resolver returning `~/.polyskills/polyskills.db` and creating its parent directory on demand. * `schema.py` - v1 DDL with three tables: `meta` (bookkeeping), `extensions` (current state, UPSERT on the natural key `(remote_url, library, name, destination_dir)`), and `events` (append-only audit log). PRAGMAs set WAL journal mode, `synchronous = NORMAL`, foreign keys ON, and `user_version = 1` for future migrations. * `tracker.py` - best-effort `Tracker` façade exposing `record_start`, `record_success`, `record_failure`. Writes are serialised via `BEGIN IMMEDIATE` and `record_start` uses `INSERT ... ON CONFLICT ... RETURNING id` so id round-trip is atomic. Every public method swallows `sqlite3.Error` / `OSError` and emits a single per-process `RuntimeWarning` so a broken database never breaks the install path. Also owns the process-wide `set_invocation_context` flag. Wiring ------ * `polyskills/__init__.py` - bootstraps `get_tracker()` on import, inside a broad `try/except` so any failure (read-only home dir, exotic filesystem) cannot break `import polyskills`. Honours an internal `POLYSKILLS_DISABLE_BOOTSTRAP` env hook used by the test harness. * `polyskills/remote/sources.py` - new `_summariseDestination` helper and new `SourceManager._trackedExtensions` wrapper sit around `_getExtensions`. The wrapper lives in the abstract base class so every concrete remote provider (current `GithubManager`, any future GitLab/Bitbucket manager) gets tracking for free without per-provider wiring. The wrapper preserves the exception contract of `_getExtensions`: failures are recorded and re-raised unchanged; successes record file and byte counts plus wall-clock duration. * `polyskills/cli.py` - calls `set_invocation_context("cli")` in `main()` before dispatch so `events.invoked_via` distinguishes CLI usage (`"cli"`) from direct API usage (`"api"`). Tests ----- * `polyskills/tests/test_tracker.py` - 10 new hermetic tests covering schema bootstrap, idempotency, `user_version`, the success path, the failure path, upsert semantics on reinstall, invocation-context persistence, and graceful degradation on a corrupt database. * `polyskills/tests/base.py` - per-class temp database redirect in `setUpClass` / `tearDownClass` so live-network test classes never touch the user's real `~/.polyskills/polyskills.db`. * `polyskills/tests/__init__.py` and `__main__.py` - suite-level redirect and `POLYSKILLS_DISABLE_BOOTSTRAP=1` so even the import-time bootstrap is suppressed during test discovery. * The tracker imports `polyskills.db.paths` as a module and looks up `resolve_db_path` via attribute access (not by name binding) so test monkey-patches take effect. Documentation ------------- * `README.md` - new "Local Tracking Database" section describing the path, the three tables, the best-effort guarantee, and a one-line `sqlite3` inspection example. * `CHANGELOG.md` - entry under upcoming v2.0.0 covering the feature and the new `invoked_via` column. Verification ------------ * `python -m unittest polyskills.tests.test_tracker polyskills.tests.test_skill_install.TestUserSkillDirectoryPath` -> 18/18 PASS. * `python -m polyskills.tests` -> 47/50 PASS; the 3 failures are pre-existing CPython 3.12.4 `argparse.format_usage` AssertionErrors in `test_cli.py` and are unrelated to this change (verified by re-running the suite on the prior commit via `git stash`). * Manual smoke: `import polyskills` materialises the database at `~/.polyskills/polyskills.db` with all three tables, five indexes, WAL mode, `user_version = 1`, and seeded `meta` rows. The full test suite leaves zero rows in `extensions` and `events` of the real user database, confirming the redirect contract. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CHANGELOG.md | 11 + README.md | 20 ++ polyskills/__init__.py | 16 + polyskills/cli.py | 5 + polyskills/db/__init__.py | 56 ++++ polyskills/db/paths.py | 59 ++++ polyskills/db/schema.py | 143 +++++++++ polyskills/db/tracker.py | 489 +++++++++++++++++++++++++++++++ polyskills/remote/sources.py | 151 +++++++++- polyskills/tests/__init__.py | 17 ++ polyskills/tests/__main__.py | 6 + polyskills/tests/base.py | 30 +- polyskills/tests/test_tracker.py | 394 +++++++++++++++++++++++++ 13 files changed, 1394 insertions(+), 3 deletions(-) create mode 100644 polyskills/db/__init__.py create mode 100644 polyskills/db/paths.py create mode 100644 polyskills/db/schema.py create mode 100644 polyskills/db/tracker.py create mode 100644 polyskills/tests/test_tracker.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 22cb754..a763b1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,17 @@ under `h3` tags, while the `micro` and "version identifiers" are listed under `h +### PolySkills v2.0.0 | Unreleased + + * 🎉 **Local Tracking Database** - `polyskills` now ships a versioned SQLite tracking database at + `~/.polyskills/polyskills.db` that records every extension fetch performed through either the CLI tool or the + direct Python API. The database is created lazily on first import and is engineered to be best-effort: any + failure to open or write is downgraded to a one-shot `RuntimeWarning` so the install path is never broken by a + tracking error. The schema (`PRAGMA user_version = 1`) ships three tables - `meta`, `extensions`, `events` - and + is materialised through idempotent `CREATE TABLE IF NOT EXISTS` DDL so repeat imports are no-ops. + * ✨ **Invocation Context** - the new `events.invoked_via` column distinguishes fetches triggered from the CLI + (`"cli"`) from fetches triggered through the Python API (`"api"`). + ### PolySkills v1.0.0 | 2026-05-25 The world of AI is evolving fastm and agent workflows are the new *norms* to build awesome projects, or to create an entire diff --git a/README.md b/README.md index f4e43c2..b090b9a 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,26 @@ The `**library**` requires **Python 3.12+** and is designed to have minimal over with the upcoming releases (requires [standard libraries](https://docs.python.org/3/library/index.html) which is shipped by default) of Python language and AI tools. +## 🗄️ Local Tracking Database + +On the very first import of `polyskills`, the framework lazily creates a small SQLite database at +`~/.polyskills/polyskills.db` and uses it to record every extension that is fetched from a remote source - whether the +fetch is triggered through the CLI (`polyskills manager ...`) or through a direct Python API call. The schema is +versioned (`PRAGMA user_version = 1`) and backed by three tables: + + * `meta` - bookkeeping: schema version and creation timestamp. + * `extensions` - latest known state per `(remote_url, library, name, destination_dir)` natural key. + * `events` - append-only audit log; one row per fetch attempt, success or failure, including byte/file counts and + duration in milliseconds. + +The database is best-effort by design: any failure to open, write to, or upgrade the database results in a single +`RuntimeWarning` and a silent degradation - the install path is never broken by a tracking error. Inspect the database +with any standard SQLite client: + +```shell +$ sqlite3 ~/.polyskills/polyskills.db "SELECT name, library, last_status, file_count FROM extensions;" +``` + ## ⚖️ Project License This project is licensed under the [MIT License](...). Permission is granted to use, copy, modify, merge, publish, diff --git a/polyskills/__init__.py b/polyskills/__init__.py index e9ae197..0d20932 100644 --- a/polyskills/__init__.py +++ b/polyskills/__init__.py @@ -17,3 +17,19 @@ __version__ = "v1.0.0" # init-time options registrations, use api/ for public functions + +# ? bootstrap the local tracking database on first import; the call +# ? is wrapped in a broad try/except so any unexpected failure (read +# ? only home dir, exotic filesystem) cannot ever break ``import``. +# ? the bootstrap honours the internal POLYSKILLS_DISABLE_BOOTSTRAP +# ? hook so test harnesses can suppress on-disk artefacts entirely. +import os as _os + +if _os.environ.get("POLYSKILLS_DISABLE_BOOTSTRAP", "").lower() not in ( + "1", "true", "yes" +): + try: + from polyskills.db import get_tracker as _get_tracker + _get_tracker() + except Exception: + pass diff --git a/polyskills/cli.py b/polyskills/cli.py index d665d8c..39aad69 100644 --- a/polyskills/cli.py +++ b/polyskills/cli.py @@ -25,6 +25,7 @@ from typing import Callable, Optional from polyskills.apps.tools import SupportedTools +from polyskills.db import set_invocation_context from polyskills.remote.sources import ( GithubManager, SourceControl, SourceManager, ValidSources ) @@ -281,6 +282,10 @@ def main() -> None: args = buildParser().parse_args() + # ? mark every fetch triggered through this entry-point as a CLI + # ? invocation so ``events.invoked_via`` can split usage modes. + set_invocation_context("cli") + # ? terminal commands: 'sources', 'tools' - print and exit early # ? these subparsers attach a ``func`` default; manager does not. if hasattr(args, "func") and callable(args.func): diff --git a/polyskills/db/__init__.py b/polyskills/db/__init__.py new file mode 100644 index 0000000..7a92230 --- /dev/null +++ b/polyskills/db/__init__.py @@ -0,0 +1,56 @@ +# -*- encoding: utf-8 -*- + +""" +Local Tracking Database for the Polyskills Framework +===================================================== + +The :mod:`polyskills.db` sub-package owns the local SQLite tracking +database used by the framework to record every extension that is +fetched, downloaded, and flushed into a destination directory by +either the CLI tool or a direct Python API caller. + +The database lives at a fixed location under the user home directory: + + ``~/.polyskills/polyskills.db`` + +The database file is created lazily on the very first import of the +:mod:`polyskills` package and is then re-used by every subsequent +import - the schema bootstrap is idempotent and uses +``CREATE TABLE IF NOT EXISTS`` for every table. + +Public Surface +-------------- + +The sub-package exposes a small, deliberately narrow API so the rest +of the framework only ever has to learn three names: + + * :func:`get_tracker` - returns a process-wide :class:`Tracker` + singleton; the first call also runs :func:`apply_schema`. + * :func:`set_invocation_context` - records whether the calling + frame is the CLI ``main()`` or a direct library API caller; the + value is persisted into ``events.invoked_via`` so downstream + analytics can split CLI from API usage. + * :class:`Tracker` - the concrete object returned by + :func:`get_tracker`; offers ``record_start``, ``record_success``, + and ``record_failure``. + +:NOTE: Every public method of :class:`Tracker` is engineered to be a +best-effort no-op on failure - a broken database, a read-only home +directory, or a permission error never raises into the caller. The +install path is therefore guaranteed to succeed (or fail for its own +reasons) regardless of the database health. +""" + +from polyskills.db.paths import resolve_db_path +from polyskills.db.schema import apply_schema +from polyskills.db.tracker import ( + Tracker, get_tracker, set_invocation_context +) + +__all__ = [ + "Tracker", + "apply_schema", + "get_tracker", + "resolve_db_path", + "set_invocation_context", +] diff --git a/polyskills/db/paths.py b/polyskills/db/paths.py new file mode 100644 index 0000000..bcd30cd --- /dev/null +++ b/polyskills/db/paths.py @@ -0,0 +1,59 @@ +# -*- encoding: utf-8 -*- + +""" +Filesystem Path Resolver for the Polyskills Tracking Database + +Resolves the canonical on-disk location of the polyskills tracking +SQLite database. The location is fixed by design at +``~/.polyskills/polyskills.db`` so every install of the framework +writes to the same single source of truth and the user never needs +to learn an environment variable to opt in. + +Tests redirect the resolver by monkey-patching +:func:`resolve_db_path` to a temporary file so the live user-level +database is never touched by the test suite. +""" + +from pathlib import Path + +# ? canonical on-disk layout for the polyskills tracking database; +# ? the parent directory is also used by future bookkeeping artefacts +# ? (for example, lock files) so it is exposed as a constant. +POLYSKILLS_HOME : Path = Path.home() / ".polyskills" +DATABASE_FILE : str = "polyskills.db" + + +def resolve_db_path() -> Path: + """ + Return the absolute path to the polyskills tracking database and + create its parent directory if it does not yet exist. + + The function is intentionally side-effecting on the parent + directory so callers can immediately ``sqlite3.connect`` to the + returned path without an extra ``mkdir`` step. The database file + itself is created lazily by :mod:`sqlite3` on first connection. + + **Example Usage** + + The resolver is used by :func:`polyskills.db.tracker.get_tracker` + to open the connection on first use. + + .. code-block:: python + + from polyskills.db.paths import resolve_db_path + + db_path = resolve_db_path() + print(db_path) + >> /home/user/.polyskills/polyskills.db + + :raises OSError: If the parent directory cannot be created. The + caller (typically :class:`Tracker`) is expected to swallow + the error and degrade to a no-op so the install path is + never broken by a read-only home directory. + + :rtype: pathlib.Path + :returns: Absolute path to ``~/.polyskills/polyskills.db``. + """ + + POLYSKILLS_HOME.mkdir(parents = True, exist_ok = True) + return POLYSKILLS_HOME / DATABASE_FILE diff --git a/polyskills/db/schema.py b/polyskills/db/schema.py new file mode 100644 index 0000000..ea4f20f --- /dev/null +++ b/polyskills/db/schema.py @@ -0,0 +1,143 @@ +# -*- encoding: utf-8 -*- + +""" +Schema Definition for the Polyskills Tracking Database + +Defines the v1 schema for ``~/.polyskills/polyskills.db`` and the +idempotent :func:`apply_schema` helper that materialises it on a live +:class:`sqlite3.Connection`. The schema is composed of three tables: + + * ``meta`` - one row per bookkeeping key; seeded with the schema + version and the polyskills package version on first creation. + * ``extensions`` - latest known state per natural key + ``(remote_url, library, name, destination_dir)`` so callers can + answer "what do I currently have installed where?" in one query. + * ``events`` - append-only audit log; one row per fetch attempt, + success or failure, so the full history is preserved. + +All ``CREATE`` statements use ``IF NOT EXISTS`` so :func:`apply_schema` +can be called on every process start without raising. +""" + +import sqlite3 + +from typing import List + +# ? schema version is recorded in ``PRAGMA user_version`` so a future +# ? migration step can detect "v1" databases and upgrade them in +# ? place without inspecting any table contents. +SCHEMA_VERSION : int = 1 + +_PRAGMAS : List[str] = [ + "PRAGMA journal_mode = WAL", + "PRAGMA foreign_keys = ON", + "PRAGMA synchronous = NORMAL", +] + +_DDL_META : str = """ +CREATE TABLE IF NOT EXISTS meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +) +""" + +_DDL_EXTENSIONS : str = """ +CREATE TABLE IF NOT EXISTS extensions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_kind TEXT NOT NULL, + remote_url TEXT NOT NULL, + owner TEXT, + repository TEXT, + library TEXT NOT NULL, + name TEXT NOT NULL, + requested_version TEXT NOT NULL, + resolved_version TEXT, + source_dir TEXT NOT NULL, + destination_dir TEXT NOT NULL, + exists_policy TEXT NOT NULL, + file_count INTEGER, + byte_count INTEGER, + last_status TEXT NOT NULL, + last_error TEXT, + invoked_via TEXT NOT NULL, + first_installed_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP), + last_installed_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP), + UNIQUE (remote_url, library, name, destination_dir) +) +""" + +_DDL_EVENTS : str = """ +CREATE TABLE IF NOT EXISTS events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + extension_id INTEGER REFERENCES extensions(id) ON DELETE SET NULL, + action TEXT NOT NULL, + requested_version TEXT, + resolved_version TEXT, + file_count INTEGER, + byte_count INTEGER, + duration_ms INTEGER, + status TEXT NOT NULL, + error TEXT, + invoked_via TEXT NOT NULL, + occurred_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP) +) +""" + +_DDL_INDEXES : List[str] = [ + "CREATE INDEX IF NOT EXISTS ix_extensions_name " + "ON extensions(name)", + "CREATE INDEX IF NOT EXISTS ix_extensions_remote " + "ON extensions(remote_url)", + "CREATE INDEX IF NOT EXISTS ix_extensions_status " + "ON extensions(last_status)", + "CREATE INDEX IF NOT EXISTS ix_events_extension " + "ON events(extension_id)", + "CREATE INDEX IF NOT EXISTS ix_events_time " + "ON events(occurred_at)", +] + + +def apply_schema(connection : sqlite3.Connection) -> None: + """ + Materialise the v1 schema on ``connection`` if it is not already + present and bump ``PRAGMA user_version`` to :data:`SCHEMA_VERSION`. + + The function is safe to call on every process start - every DDL + uses ``IF NOT EXISTS`` and the seed rows in ``meta`` use + ``INSERT OR IGNORE``. + + :type connection: sqlite3.Connection + :param connection: An open SQLite connection. The function + commits its own transaction so the caller does not need to + wrap the call in a context manager. + + :rtype: None + :returns: This function returns nothing - the schema is applied + as a side effect on ``connection``. + """ + + cursor = connection.cursor() + + for pragma in _PRAGMAS: + cursor.execute(pragma) + + cursor.execute(_DDL_META) + cursor.execute(_DDL_EXTENSIONS) + cursor.execute(_DDL_EVENTS) + + for ddl in _DDL_INDEXES: + cursor.execute(ddl) + + cursor.execute(f"PRAGMA user_version = {SCHEMA_VERSION}") + + cursor.execute( + "INSERT OR IGNORE INTO meta (key, value) VALUES (?, ?)", + ("schema_version", str(SCHEMA_VERSION)) + ) + cursor.execute( + "INSERT OR IGNORE INTO meta (key, value) VALUES " + "(?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))", + ("created_at",) + ) + + connection.commit() diff --git a/polyskills/db/tracker.py b/polyskills/db/tracker.py new file mode 100644 index 0000000..65609f4 --- /dev/null +++ b/polyskills/db/tracker.py @@ -0,0 +1,489 @@ +# -*- encoding: utf-8 -*- + +""" +Tracking Façade for the Polyskills SQLite Database + +Provides a thin, best-effort façade around the SQLite tracking +database materialised by :mod:`polyskills.db.schema`. Every public +method of :class:`Tracker` is engineered to be a no-op on failure - +a corrupt database, a read-only home directory, or a transient lock +must never break the install path of the calling code. + +The module also owns a tiny, process-wide invocation-context flag +that downstream remote managers consult to record whether a fetch +was triggered from the CLI ``main()`` or from a direct library API +call. The flag is intentionally process-wide because the CLI is +single-threaded and library callers can build their own +:class:`Tracker` instances when finer granularity is required. +""" + +import sqlite3 +import warnings + +from pathlib import Path +from typing import Optional, Tuple + +from polyskills.db import paths as _paths +from polyskills.db.schema import apply_schema + +# ? process-wide flag toggled by polyskills.cli.main() before the +# ? remote dispatcher is invoked; defaults to "api" so any direct +# ? library caller is recorded correctly without extra wiring. +_INVOCATION_CONTEXT : str = "api" + +_VALID_CONTEXTS : Tuple[str, ...] = ("cli", "api") + + +def set_invocation_context(context : str) -> None: + """ + Record the invocation context for subsequent tracker writes. + + The value is persisted into the ``events.invoked_via`` column on + every recorded fetch so downstream analytics can split CLI usage + from direct library API usage. + + :type context: str + :param context: One of ``"cli"`` or ``"api"``. Any other value is + silently coerced to ``"api"`` so a typo in a caller never + breaks the tracker. + + :rtype: None + :returns: This function returns nothing - the context is stored + in a module-level variable consulted by :class:`Tracker`. + """ + + global _INVOCATION_CONTEXT + _INVOCATION_CONTEXT = ( + context if context in _VALID_CONTEXTS else "api" + ) + + +def get_invocation_context() -> str: + """ + Return the current invocation context recorded by + :func:`set_invocation_context`. + + :rtype: str + :returns: Either ``"cli"`` or ``"api"``. + """ + + return _INVOCATION_CONTEXT + + +class Tracker: + """ + Best-effort SQLite tracking façade for polyskills extension + fetches. The class is deliberately small: it owns one connection, + serialises writes through ``BEGIN IMMEDIATE`` transactions, and + swallows every exception arising from the database layer so the + install path of the framework is never broken by a tracker fault. + + :type database_path: Optional[pathlib.Path] + :param database_path: Optional explicit database path. When + omitted the tracker calls :func:`resolve_db_path` so tests can + monkey-patch the resolver to a temporary file without having + to touch the :class:`Tracker` constructor. + """ + + def __init__( + self, database_path : Optional[Path] = None + ) -> None: + self._path : Optional[Path] = None + self._conn : Optional[sqlite3.Connection] = None + self._healthy : bool = True + self._warned : bool = False + + try: + self._path = ( + database_path + if database_path is not None + else _paths.resolve_db_path() + ) + self._conn = sqlite3.connect( + str(self._path), + timeout = 5.0, + isolation_level = None, + check_same_thread = False + ) + apply_schema(self._conn) + except Exception as exc: + self._healthy = False + self._warn_once( + f"polyskills tracker disabled (init failed): {exc!r}" + ) + + + def _warn_once(self, message : str) -> None: + """ + Emit a single :func:`warnings.warn` per process so a broken + database does not flood stderr on every fetch. + + :type message: str + :param message: The human-readable warning text. + + :rtype: None + :returns: Nothing - the warning is emitted as a side effect. + """ + + if self._warned: + return None + self._warned = True + warnings.warn(message, RuntimeWarning, stacklevel = 3) + + + def is_healthy(self) -> bool: + """ + Return whether the tracker is currently able to write rows. + + :rtype: bool + :returns: ``True`` when the underlying connection is open and + the schema is in place, ``False`` otherwise. + """ + + return self._healthy and self._conn is not None + + + def record_start( + self, + source_kind : str, + remote_url : str, + owner : Optional[str], + repository : Optional[str], + library : str, + name : str, + requested_version : str, + source_dir : str, + destination_dir : str, + exists_policy : str + ) -> Optional[int]: + """ + Upsert a row into ``extensions`` with ``last_status = + 'in_progress'`` and return the newly assigned (or pre-existing) + primary key. The id is later passed to :meth:`record_success` + or :meth:`record_failure` so the matching ``events`` row is + attributed to the correct extension. + + :type source_kind: str + :param source_kind: The :class:`ValidSources` member name, for + example ``"GITHUB"``. + + :type remote_url: str + :param remote_url: Canonical remote URL, used as part of the + natural key in the ``UNIQUE`` constraint on ``extensions``. + + :type owner: Optional[str] + :param owner: Slug owner component, when available. Pure + informational. + + :type repository: Optional[str] + :param repository: Slug repository component, when available. + + :type library: str + :param library: Library kind, ``"skills"`` or ``"agents"``. + + :type name: str + :param name: Extension leaf name, used as part of the + natural key on ``extensions``. + + :type requested_version: str + :param requested_version: Requested tag, branch, or sha. + + :type source_dir: str + :param source_dir: POSIX-style source directory path on the + remote. + + :type destination_dir: str + :param destination_dir: Absolute, normalised destination + directory path on the local filesystem. + + :type exists_policy: str + :param exists_policy: One of ``"fail"``, ``"overwrite"``, + ``"merge"``. + + :rtype: Optional[int] + :returns: The ``extensions.id`` of the upserted row, or + ``None`` if the tracker is unhealthy or the write failed. + """ + + if not self.is_healthy(): + return None + + invoked_via = get_invocation_context() + + try: + assert self._conn is not None + cursor = self._conn.cursor() + cursor.execute("BEGIN IMMEDIATE") + cursor.execute( + """ + INSERT INTO extensions ( + source_kind, remote_url, owner, repository, + library, name, requested_version, source_dir, + destination_dir, exists_policy, last_status, + invoked_via + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, + 'in_progress', ? + ) + ON CONFLICT (remote_url, library, name, destination_dir) + DO UPDATE SET + source_kind = excluded.source_kind, + owner = excluded.owner, + repository = excluded.repository, + requested_version = excluded.requested_version, + source_dir = excluded.source_dir, + exists_policy = excluded.exists_policy, + last_status = 'in_progress', + last_error = NULL, + invoked_via = excluded.invoked_via, + last_installed_at = CURRENT_TIMESTAMP + RETURNING id + """, + ( + source_kind, remote_url, owner, repository, + library, name, requested_version, source_dir, + destination_dir, exists_policy, invoked_via + ) + ) + row = cursor.fetchone() + cursor.execute("COMMIT") + return int(row[0]) if row else None + except Exception as exc: + self._safe_rollback() + self._warn_once( + f"polyskills tracker record_start failed: {exc!r}" + ) + return None + + + def record_success( + self, + extension_id : Optional[int], + action : str, + file_count : int, + byte_count : int, + duration_ms : int, + resolved_version : Optional[str] = None + ) -> None: + """ + Mark the previously started extension row as successful and + append a corresponding ``events`` row. + + :type extension_id: Optional[int] + :param extension_id: The id returned by :meth:`record_start`. + ``None`` results in a no-op so callers do not need to + branch on the start outcome. + + :type action: str + :param action: Concrete action taken, for example + ``"fetch"``, ``"overwrite"``, or ``"merge"``. + + :type file_count: int + :param file_count: Number of files that landed in the + destination directory. + + :type byte_count: int + :param byte_count: Total byte count of the files in the + destination directory. + + :type duration_ms: int + :param duration_ms: Wall clock duration of the fetch in + milliseconds. + + :type resolved_version: Optional[str] + :param resolved_version: Resolved tag or commit sha, when + different from the requested version. Pure informational. + + :rtype: None + :returns: Nothing - the rows are written as a side effect. + """ + + if not self.is_healthy() or extension_id is None: + return None + + invoked_via = get_invocation_context() + + try: + assert self._conn is not None + cursor = self._conn.cursor() + cursor.execute("BEGIN IMMEDIATE") + cursor.execute( + """ + UPDATE extensions + SET resolved_version = ?, + file_count = ?, + byte_count = ?, + last_status = 'success', + last_error = NULL, + last_installed_at = CURRENT_TIMESTAMP + WHERE id = ? + """, + (resolved_version, file_count, byte_count, extension_id) + ) + cursor.execute( + """ + INSERT INTO events ( + extension_id, action, resolved_version, + file_count, byte_count, duration_ms, + status, error, invoked_via + ) VALUES (?, ?, ?, ?, ?, ?, 'success', NULL, ?) + """, + ( + extension_id, action, resolved_version, + file_count, byte_count, duration_ms, invoked_via + ) + ) + cursor.execute("COMMIT") + except Exception as exc: + self._safe_rollback() + self._warn_once( + f"polyskills tracker record_success failed: {exc!r}" + ) + + + def record_failure( + self, + extension_id : Optional[int], + action : str, + error : str, + duration_ms : int + ) -> None: + """ + Mark the previously started extension row as failed and + append a corresponding ``events`` row carrying the error + message. + + :type extension_id: Optional[int] + :param extension_id: The id returned by :meth:`record_start`. + ``None`` results in a no-op. + + :type action: str + :param action: Concrete action attempted, mirroring + :meth:`record_success`. + + :type error: str + :param error: Human-readable error description, typically the + ``repr`` of the original exception. + + :type duration_ms: int + :param duration_ms: Wall clock duration of the failed attempt. + + :rtype: None + :returns: Nothing - the rows are written as a side effect. + """ + + if not self.is_healthy() or extension_id is None: + return None + + invoked_via = get_invocation_context() + + try: + assert self._conn is not None + cursor = self._conn.cursor() + cursor.execute("BEGIN IMMEDIATE") + cursor.execute( + """ + UPDATE extensions + SET last_status = 'failed', + last_error = ?, + last_installed_at = CURRENT_TIMESTAMP + WHERE id = ? + """, + (error, extension_id) + ) + cursor.execute( + """ + INSERT INTO events ( + extension_id, action, duration_ms, + status, error, invoked_via + ) VALUES (?, ?, ?, 'failed', ?, ?) + """, + ( + extension_id, action, duration_ms, + error, invoked_via + ) + ) + cursor.execute("COMMIT") + except Exception as exc: + self._safe_rollback() + self._warn_once( + f"polyskills tracker record_failure failed: {exc!r}" + ) + + + def _safe_rollback(self) -> None: + """ + Best-effort rollback used by every write helper when an + exception is raised mid-transaction. + + :rtype: None + :returns: Nothing - the rollback is emitted on a best-effort + basis and any further error is swallowed. + """ + + if self._conn is None: + return None + try: + self._conn.execute("ROLLBACK") + except Exception: + pass + + + def close(self) -> None: + """ + Close the underlying SQLite connection. Safe to call multiple + times and on an already-broken tracker. + + :rtype: None + :returns: Nothing - the connection is closed as a side effect. + """ + + if self._conn is None: + return None + try: + self._conn.close() + except Exception: + pass + finally: + self._conn = None + self._healthy = False + + +_TRACKER : Optional[Tracker] = None + + +def get_tracker() -> Tracker: + """ + Return the process-wide :class:`Tracker` singleton, creating it + on the first call. Subsequent calls return the same instance so + the SQLite connection is opened exactly once per process. + + :rtype: Tracker + :returns: The shared :class:`Tracker` instance. The instance may + report :meth:`Tracker.is_healthy` as ``False`` when the + database cannot be opened - all public methods of an + unhealthy tracker are silent no-ops. + """ + + global _TRACKER + if _TRACKER is None: + _TRACKER = Tracker() + return _TRACKER + + +def reset_tracker_for_tests() -> None: + """ + Drop the cached singleton so the next :func:`get_tracker` call + re-runs the bootstrap. Used exclusively by the test suite after a + monkey-patch of :func:`polyskills.db.paths.resolve_db_path` to + pick up the new database location. + + :rtype: None + :returns: Nothing - the cached singleton is cleared as a side + effect. + """ + + global _TRACKER + if _TRACKER is not None: + _TRACKER.close() + _TRACKER = None diff --git a/polyskills/remote/sources.py b/polyskills/remote/sources.py index a37c1d4..aee2eb4 100644 --- a/polyskills/remote/sources.py +++ b/polyskills/remote/sources.py @@ -22,6 +22,7 @@ import os import re import abc +import time import shutil import tarfile @@ -33,10 +34,47 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Tuple +from polyskills.db import get_tracker + class ValidSources(Enum): GITHUB = "https://www.github.com/" +def _summariseDestination(destination : Path) -> Tuple[int, int]: + """ + Walk ``destination`` and return ``(file_count, byte_count)`` so + the tracker can record the on-disk size of a successful fetch. + + The helper is best-effort: any :class:`OSError` encountered while + iterating is swallowed and the partial counts collected so far + are returned. This preserves the "tracking is never fatal" + contract of the framework. + + :type destination: pathlib.Path + :param destination: Directory whose contents must be summarised. + + :rtype: Tuple[int, int] + :returns: A two-tuple of ``(file_count, byte_count)``. + """ + + file_count : int = 0 + byte_count : int = 0 + + try: + for path in destination.rglob("*"): + if not path.is_file(): + continue + file_count += 1 + try: + byte_count += path.stat().st_size + except OSError: + continue + except OSError: + pass + + return (file_count, byte_count) + + @dataclass(frozen = True) class SourceControl: """ @@ -255,8 +293,9 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: # lazy dispatch: only the requested branch is invoked methods : Dict[str, Callable[[], Any]] = dict( tags = lambda : self._getTags(remote, prefix = prefix), - extensions = lambda : self._getExtensions( - remote, name, source, destination, # type: ignore[arg-type] + extensions = lambda : self._trackedExtensions( + remote, name, library, # type: ignore[arg-type] + source, destination, # type: ignore[arg-type] version, formatter, exists # type: ignore[arg-type] ) ) @@ -264,6 +303,114 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: return methods[mode]() # return the underlying assets + def _trackedExtensions( + self, remote : str, name : str, library : str, + source : Path, destination : Path, version : str, + formatter : Optional[Callable], exists : str + ) -> None: + """ + Internal wrapper around :meth:`_getExtensions` that records + every fetch attempt - success or failure - into the local + polyskills tracking database. The wrapper sits inside the + abstract base class so every concrete remote provider gets + tracking for free without any per-provider wiring. + + The wrapper is engineered to never alter the observable + behaviour of :meth:`_getExtensions`: any tracker-side + exception is swallowed by the :class:`Tracker` itself, and + the original exception (if any) raised by + :meth:`_getExtensions` is re-raised unchanged. + + :type remote: str + :param remote: Remote URL forwarded verbatim to + :meth:`_getExtensions`. + + :type name: str + :param name: Extension leaf name, also used as the natural + key component on ``extensions``. + + :type library: str + :param library: Library kind (``"skills"`` or ``"agents"``). + + :type source: pathlib.Path + :param source: Source directory on the remote. + + :type destination: pathlib.Path + :param destination: Local destination directory. + + :type version: str + :param version: Requested tag, branch, or sha. + + :type formatter: Optional[Callable] + :param formatter: Optional formatter forwarded as-is to + :meth:`_getExtensions`. + + :type exists: str + :param exists: One of ``"fail"``, ``"overwrite"``, ``"merge"``. + + :rtype: None + :returns: Nothing - the wrapper preserves the contract of + :meth:`_getExtensions` which itself returns ``None``. + """ + + tracker = get_tracker() + owner : Optional[str] = None + repo : Optional[str] = None + + try: + owner, repo = self.getSlug(remote = remote) + except Exception: + owner, repo = None, None + + absolute_destination = str(Path(destination).resolve()) + source_dir_posix = Path(str(source)).as_posix() + + extension_id = tracker.record_start( + source_kind = self.source.name, + remote_url = remote, + owner = owner, + repository = repo, + library = library, + name = name, + requested_version = version, + source_dir = source_dir_posix, + destination_dir = absolute_destination, + exists_policy = exists + ) + + action = "fetch" if exists == "fail" else exists + started_at = time.monotonic() + + try: + result = self._getExtensions( + remote, name, source, destination, + version, formatter, exists + ) + except Exception as exc: + duration_ms = int((time.monotonic() - started_at) * 1000) + tracker.record_failure( + extension_id = extension_id, + action = action, + error = repr(exc), + duration_ms = duration_ms + ) + raise + + duration_ms = int((time.monotonic() - started_at) * 1000) + file_count, byte_count = _summariseDestination( + Path(absolute_destination) + ) + tracker.record_success( + extension_id = extension_id, + action = action, + file_count = file_count, + byte_count = byte_count, + duration_ms = duration_ms + ) + + return result + + @abc.abstractmethod def _getTags( self, remote : str, prefix : Optional[str] = None diff --git a/polyskills/tests/__init__.py b/polyskills/tests/__init__.py index eccc767..b7269a5 100644 --- a/polyskills/tests/__init__.py +++ b/polyskills/tests/__init__.py @@ -23,6 +23,23 @@ to authenticate the requests. """ +import os +import tempfile + +from pathlib import Path + +# ! redirect the polyskills tracking database to a per-process scratch +# ! file *before* importing :mod:`polyskills` so the import-time +# ! bootstrap never touches the real ``~/.polyskills/polyskills.db``. +_TEST_DB_DIR = Path(tempfile.mkdtemp(prefix = "polyskills-suite-")) +_TEST_DB = _TEST_DB_DIR / "polyskills.db" + +from polyskills.db import paths as _db_paths # noqa: E402 +from polyskills.db import tracker as _db_tracker # noqa: E402 + +_db_paths.resolve_db_path = lambda : _TEST_DB # type: ignore[assignment] +_db_tracker.reset_tracker_for_tests() + __version__ = "v1.0.0" # init-time options registrations, use base.py for shared fixtures diff --git a/polyskills/tests/__main__.py b/polyskills/tests/__main__.py index b124b8c..a2bc00e 100644 --- a/polyskills/tests/__main__.py +++ b/polyskills/tests/__main__.py @@ -11,6 +11,12 @@ """ import os + +# ! suppress the import-time tracker bootstrap so the live user-level +# ! ``~/.polyskills/polyskills.db`` is never touched by the suite. +# ! the variable must be set before any ``polyskills`` import below. +os.environ.setdefault("POLYSKILLS_DISABLE_BOOTSTRAP", "1") + import sys import unittest diff --git a/polyskills/tests/base.py b/polyskills/tests/base.py index 49868b3..1e544ee 100644 --- a/polyskills/tests/base.py +++ b/polyskills/tests/base.py @@ -21,6 +21,8 @@ import urllib3 +from polyskills.db import paths as _db_paths +from polyskills.db import tracker as _db_tracker from polyskills.remote.sources import GithubManager, SourceControl # silence verify=False noise + GitHub redirect warnings during runs @@ -63,7 +65,10 @@ def setUpClass(cls) -> None: """ Class-level fixture that builds a single :class:`GithubManager` shared by every test method in the - class to avoid re-creating the HTTP session needlessly. + class to avoid re-creating the HTTP session needlessly. Also + redirects the tracker database to a per-class temporary file + so the live ``~/.polyskills/polyskills.db`` is never touched + by the test suite. """ cls.logger = logging.getLogger( @@ -73,6 +78,29 @@ class to avoid re-creating the HTTP session needlessly. control = SourceControl(pagination = PAGINATION) ) + # ? redirect the tracker database to an isolated temp file + # ? and reset the singleton so the new path is picked up. + cls._db_dir = Path( + tempfile.mkdtemp(prefix = "polyskills-db-") + ) + cls._db_file = cls._db_dir / "polyskills.db" + cls._original_resolver = _db_paths.resolve_db_path + _db_paths.resolve_db_path = lambda : cls._db_file # type: ignore[assignment] + _db_tracker.reset_tracker_for_tests() + + + @classmethod + def tearDownClass(cls) -> None: + """ + Restore the original database path resolver and remove the + per-class temporary database directory. Ensures the test + suite leaves no scratch artefacts behind. + """ + + _db_tracker.reset_tracker_for_tests() + _db_paths.resolve_db_path = cls._original_resolver # type: ignore[assignment] + shutil.rmtree(cls._db_dir, ignore_errors = True) + def setUp(self) -> None: """ diff --git a/polyskills/tests/test_tracker.py b/polyskills/tests/test_tracker.py new file mode 100644 index 0000000..3e58813 --- /dev/null +++ b/polyskills/tests/test_tracker.py @@ -0,0 +1,394 @@ +# -*- encoding: utf-8 -*- + +""" +Hermetic Tests for the Polyskills Tracking Database +--------------------------------------------------- + +Validates the public surface of the :mod:`polyskills.db` package +without performing any HTTP requests. The cases exercise: + + * Schema bootstrap: a connection passed to :func:`apply_schema` + ends up with the v1 tables, indexes, and ``user_version``. + * Idempotency: applying the schema twice does not raise and does + not duplicate rows in the ``meta`` table. + * :class:`Tracker` happy path: ``record_start`` followed by + ``record_success`` produces exactly one row in ``extensions`` + and one in ``events``. + * :class:`Tracker` failure path: ``record_failure`` writes the + error column without raising. + * Upsert semantics: re-installing the same extension into the + same destination keeps a single ``extensions`` row but appends + additional ``events`` rows. + * Invocation context: ``set_invocation_context('cli')`` is + persisted into the ``invoked_via`` column. + * Robustness: a corrupt database file does not raise into the + caller; the tracker degrades to a silent no-op instead. +""" + +import sqlite3 +import tempfile +import unittest +import warnings + +from pathlib import Path +from typing import Tuple + +from polyskills.db import paths as _db_paths +from polyskills.db import tracker as _db_tracker +from polyskills.db.schema import SCHEMA_VERSION, apply_schema +from polyskills.db.tracker import ( + Tracker, get_invocation_context, set_invocation_context +) + + +def _row_counts(database_path : Path) -> Tuple[int, int]: + """ + Return ``(extensions_count, events_count)`` from ``database_path``. + + The helper opens its own short-lived connection so the assertion + code stays free of SQLite boilerplate. + + :type database_path: pathlib.Path + :param database_path: Database file under inspection. + + :rtype: Tuple[int, int] + :returns: Two-tuple of row counts. + """ + + connection = sqlite3.connect(str(database_path)) + try: + cursor = connection.cursor() + ext_count = cursor.execute( + "SELECT COUNT(*) FROM extensions" + ).fetchone()[0] + evt_count = cursor.execute( + "SELECT COUNT(*) FROM events" + ).fetchone()[0] + return (int(ext_count), int(evt_count)) + finally: + connection.close() + + +class TestSchemaBootstrap(unittest.TestCase): + """ + Validates :func:`apply_schema` materialises the v1 tables, indexes + and ``user_version`` on a fresh connection without raising. + """ + + def setUp(self) -> None: + self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-schema-")) + self.db = self.tmpdir / "polyskills.db" + + + def tearDown(self) -> None: + import shutil + shutil.rmtree(self.tmpdir, ignore_errors = True) + + + def test_schema_creates_expected_tables(self) -> None: + """ + After :func:`apply_schema` the database must contain the + ``meta``, ``extensions`` and ``events`` tables. + """ + + connection = sqlite3.connect(str(self.db)) + try: + apply_schema(connection) + tables = { + row[0] for row in connection.execute( + "SELECT name FROM sqlite_master " + "WHERE type = 'table'" + ) + } + finally: + connection.close() + + self.assertIn("meta", tables) + self.assertIn("extensions", tables) + self.assertIn("events", tables) + + + def test_schema_sets_user_version(self) -> None: + """ + ``PRAGMA user_version`` must equal :data:`SCHEMA_VERSION` + after bootstrap so future migrations can detect the version. + """ + + connection = sqlite3.connect(str(self.db)) + try: + apply_schema(connection) + version = connection.execute( + "PRAGMA user_version" + ).fetchone()[0] + finally: + connection.close() + + self.assertEqual(int(version), SCHEMA_VERSION) + + + def test_schema_is_idempotent(self) -> None: + """ + Applying the schema twice must not raise and must not insert + duplicate ``meta`` rows. + """ + + connection = sqlite3.connect(str(self.db)) + try: + apply_schema(connection) + apply_schema(connection) + count = connection.execute( + "SELECT COUNT(*) FROM meta WHERE key = 'schema_version'" + ).fetchone()[0] + finally: + connection.close() + + self.assertEqual(int(count), 1) + + +class TestTrackerLifecycle(unittest.TestCase): + """ + Validates :class:`Tracker` ``record_start``, ``record_success`` + and ``record_failure`` against an isolated temp database. + """ + + def setUp(self) -> None: + self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-track-")) + self.db = self.tmpdir / "polyskills.db" + set_invocation_context("api") + self.tracker = Tracker(database_path = self.db) + + + def tearDown(self) -> None: + import shutil + self.tracker.close() + shutil.rmtree(self.tmpdir, ignore_errors = True) + + + def _start(self) -> int: + """ + Helper that performs a canonical ``record_start`` call and + returns the new ``extensions.id``. + """ + + ext_id = self.tracker.record_start( + source_kind = "GITHUB", + remote_url = "https://github.com/PyUtility/polyskills", + owner = "PyUtility", + repository = "polyskills", + library = "skills", + name = "sql-code-format", + requested_version = "master", + source_dir = "extensions/skills", + destination_dir = str(self.tmpdir / "dest"), + exists_policy = "fail" + ) + self.assertIsNotNone(ext_id) + return int(ext_id) + + + def test_tracker_is_healthy_after_init(self) -> None: + """ + A freshly constructed tracker must report itself as healthy + and have created the database file on disk. + """ + + self.assertTrue(self.tracker.is_healthy()) + self.assertTrue(self.db.exists()) + + + def test_record_success_writes_one_row_per_table(self) -> None: + """ + A start + success pair must produce one ``extensions`` row + and one ``events`` row carrying ``status='success'``. + """ + + ext_id = self._start() + self.tracker.record_success( + extension_id = ext_id, action = "fetch", + file_count = 3, byte_count = 1024, duration_ms = 12, + resolved_version = "v2.0.0" + ) + + ext_count, evt_count = _row_counts(self.db) + self.assertEqual(ext_count, 1) + self.assertEqual(evt_count, 1) + + connection = sqlite3.connect(str(self.db)) + try: + row = connection.execute( + "SELECT last_status, file_count, byte_count, " + "resolved_version FROM extensions WHERE id = ?", + (ext_id,) + ).fetchone() + finally: + connection.close() + + self.assertEqual(row[0], "success") + self.assertEqual(row[1], 3) + self.assertEqual(row[2], 1024) + self.assertEqual(row[3], "v2.0.0") + + + def test_record_failure_marks_extension_failed(self) -> None: + """ + ``record_failure`` must mark the row as ``failed`` and append + an ``events`` row with the error message preserved. + """ + + ext_id = self._start() + self.tracker.record_failure( + extension_id = ext_id, action = "fetch", + error = "FileNotFoundError('boom')", duration_ms = 9 + ) + + connection = sqlite3.connect(str(self.db)) + try: + ext_row = connection.execute( + "SELECT last_status, last_error FROM extensions " + "WHERE id = ?", (ext_id,) + ).fetchone() + evt_row = connection.execute( + "SELECT status, error FROM events " + "WHERE extension_id = ?", (ext_id,) + ).fetchone() + finally: + connection.close() + + self.assertEqual(ext_row[0], "failed") + self.assertIn("boom", ext_row[1]) + self.assertEqual(evt_row[0], "failed") + self.assertIn("boom", evt_row[1]) + + + def test_reinstall_upserts_extension_and_appends_events(self) -> None: + """ + Two consecutive installs against the same natural key must + keep one ``extensions`` row but accumulate ``events`` rows. + """ + + ext_id_1 = self._start() + self.tracker.record_success( + extension_id = ext_id_1, action = "fetch", + file_count = 1, byte_count = 100, duration_ms = 5 + ) + ext_id_2 = self._start() + self.tracker.record_success( + extension_id = ext_id_2, action = "overwrite", + file_count = 2, byte_count = 200, duration_ms = 5 + ) + + ext_count, evt_count = _row_counts(self.db) + self.assertEqual(ext_id_1, ext_id_2) + self.assertEqual(ext_count, 1) + self.assertEqual(evt_count, 2) + + + def test_invocation_context_is_persisted(self) -> None: + """ + Switching the invocation context to ``"cli"`` must surface in + the ``invoked_via`` column of new event rows. + """ + + set_invocation_context("cli") + self.assertEqual(get_invocation_context(), "cli") + + ext_id = self._start() + self.tracker.record_success( + extension_id = ext_id, action = "fetch", + file_count = 0, byte_count = 0, duration_ms = 1 + ) + + connection = sqlite3.connect(str(self.db)) + try: + invoked = connection.execute( + "SELECT invoked_via FROM events " + "WHERE extension_id = ?", (ext_id,) + ).fetchone()[0] + finally: + connection.close() + + self.assertEqual(invoked, "cli") + set_invocation_context("api") + + +class TestCorruptDatabaseDegradesSafely(unittest.TestCase): + """ + Asserts that a corrupt or unwritable database does not propagate + its failure into the install path - the tracker must degrade to + a silent no-op and emit a single warning. + """ + + def setUp(self) -> None: + self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-corrupt-")) + self.db = self.tmpdir / "polyskills.db" + # ! intentionally write garbage so apply_schema fails + self.db.write_bytes(b"this is not a sqlite database") + + + def tearDown(self) -> None: + import shutil + shutil.rmtree(self.tmpdir, ignore_errors = True) + + + def test_corrupt_database_is_handled_silently(self) -> None: + """ + Constructing a tracker on a corrupt file must not raise and + every public method must subsequently no-op safely. + """ + + with warnings.catch_warnings(record = True): + warnings.simplefilter("always") + tracker = Tracker(database_path = self.db) + + self.assertFalse(tracker.is_healthy()) + self.assertIsNone( + tracker.record_start( + source_kind = "GITHUB", + remote_url = "https://github.com/x/y", + owner = "x", repository = "y", + library = "skills", name = "z", + requested_version = "master", + source_dir = "skills", + destination_dir = "/tmp/z", + exists_policy = "fail" + ) + ) + # both downstream calls must also no-op without raising + tracker.record_success( + extension_id = None, action = "fetch", + file_count = 0, byte_count = 0, duration_ms = 0 + ) + tracker.record_failure( + extension_id = None, action = "fetch", + error = "irrelevant", duration_ms = 0 + ) + tracker.close() + + +class TestResolveDbPathCreatesParent(unittest.TestCase): + """ + Validates the canonical path resolver creates the polyskills home + directory on demand so callers can immediately open the database. + The test isolates the assertion from the suite-wide monkey-patch + by importing the path constants directly. + """ + + def test_resolve_db_path_returns_polyskills_db_under_home(self) -> None: + """ + The canonical path must be ``~/.polyskills/polyskills.db`` + and the parent directory must exist on the filesystem. + """ + + from polyskills.db.paths import ( + DATABASE_FILE, POLYSKILLS_HOME + ) + + canonical = POLYSKILLS_HOME / DATABASE_FILE + + self.assertEqual(canonical.name, "polyskills.db") + self.assertEqual(canonical.parent.name, ".polyskills") + self.assertEqual(canonical.parent, Path.home() / ".polyskills") + + +if __name__ == "__main__": + unittest.main() From 7914adee130b1c7a775144cdecba1197fc222899 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 13:49:22 +0530 Subject: [PATCH 02/17] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=E2=9C=A8=20refactor?= =?UTF-8?q?(remote):=20harden=20TLS=20and=20extraction,=20add=20verify=20f?= =?UTF-8?q?lags?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remote download and extraction path trusted both the network and the archive contents: TLS verification was disabled on every request, archive members were written without a traversal guard, downloads and extraction were unbounded, and validation relied on `assert` (stripped under `python -O`). This commit closes those gaps and exposes the verification policy through the CLI, defaulting to secure behaviour. - add `SourceControl.verify` (default True) and forward it to every `requests.get` in `_getTags`/`_getExtensions`/`_listExtensions`, replacing the hard-coded `verify = False` - add mutually exclusive `--no-verify` / `--request-cert` CLI flags with a `_resolveVerify()` helper; default honours REQUESTS_CA_BUNDLE / SSL_CERT_FILE, `--request-cert` pins the certifi bundle and fails closed behind an intercepting proxy - guard tar extraction against path traversal by resolving each member against the destination subtree; non file/dir members are skipped - cap the compressed download and cumulative extracted size to defang disk-exhaustion / decompression-bomb archives - replace validation `assert`s in `get()`/`_getExtensions()` with explicit `ValueError`s so the checks survive an optimised run - warn on stderr and silence urllib3 noise when `--no-verify` is used; refresh the `list`/`manager` usage strings for the new flags - conventions: camelCase preserved, reST docstrings, 4-space indent; flake8 hard gate (E9,F63,F7,F82) clean and no new warnings; parser build, flag parsing and mutex verified via `python -c` Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/cli.py | 97 ++++++++++++++++++++++++++++++++++-- polyskills/remote/sources.py | 97 ++++++++++++++++++++++++++++++------ 2 files changed, 174 insertions(+), 20 deletions(-) diff --git a/polyskills/cli.py b/polyskills/cli.py index 3bc38cb..fed6c34 100644 --- a/polyskills/cli.py +++ b/polyskills/cli.py @@ -55,6 +55,59 @@ def _expandUserPath(value : Union[str, Path]) -> Path: expanded = os.path.expandvars(os.fspath(value)) return Path(expanded).expanduser() + +def _resolveVerify( + no_verify : bool = False, request_cert : bool = False +) -> Union[bool, str]: + """ + Resolve the ``verify`` value forwarded to every remote request + from the mutually exclusive ``--no-verify`` / ``--request-cert`` + command line flags. + + The default (neither flag) returns ``True`` so :mod:`requests` + uses its standard trust store while still honouring the + ``REQUESTS_CA_BUNDLE`` / ``SSL_CERT_FILE`` environment overrides + that are common behind a corporate proxy. ``--no-verify`` returns + ``False`` to disable verification, while ``--request-cert`` pins + the bundled :mod:`certifi` authorities for strict verification + that deliberately fails when only an intercepting certificate is + available. + + :type no_verify: bool + :param no_verify: Disable verification entirely when ``True``. + + :type request_cert: bool + :param request_cert: Pin the bundled trust store when ``True``. + + :raises ValueError: If both flags are requested together, or the + :mod:`certifi` bundle cannot be located for strict mode. + + :rtype: Union[bool, str] + :returns: ``True`` / ``False`` or a path to a CA bundle, suitable + for the ``verify`` keyword of :func:`requests.get`. + """ + + if no_verify and request_cert: + raise ValueError( + "Cannot use --no-verify together with --request-cert." + ) + + if no_verify: + return False + + if request_cert: + try: + import certifi + except ImportError as error: + raise ValueError( + "Strict --request-cert mode needs the 'certifi' " + "package, which could not be imported." + ) from error + + return certifi.where() + + return True + from polyskills.apps.tools import SupportedTools from polyskills.remote.sources import ( GithubManager, SourceControl, SourceManager, ValidSources @@ -156,6 +209,25 @@ def buildRemoteControls() -> argparse.ArgumentParser: ) ) + # ? mutually exclusive TLS verification controls. The default + # ? (neither flag) keeps secure verification while honouring + # ? REQUESTS_CA_BUNDLE / SSL_CERT_FILE for proxied networks. + tlsControls = remoteCommon.add_mutually_exclusive_group() + tlsControls.add_argument( + "--no-verify", action = "store_true", help = ( + "Disable TLS certificate verification for every remote " + "request. Discouraged - use only on a trusted network " + "or behind a TLS-intercepting corporate proxy." + ) + ) + tlsControls.add_argument( + "--request-cert", action = "store_true", help = ( + "Enforce strict TLS verification against the bundled " + "certificate authorities, ignoring environment " + "overrides. Fails if a trusted certificate is absent." + ) + ) + return remoteCommon # ? Creating Subparsers:: SOURCES - List Available Sources @@ -208,7 +280,8 @@ def buildRemoteControls() -> argparse.ArgumentParser: "list", parents = [remoteControls], usage = ( "polyskills list [-h] [-s SOURCE] [--pagination PAGINATION] " - "[--token TOKEN] [--version VERSION] remote LIBRARY" + "[--token TOKEN] [--version VERSION] " + "[--no-verify | --request-cert] remote LIBRARY" ), help = ( "List the available extensions (skills, agents, etc.) " @@ -239,7 +312,8 @@ def buildRemoteControls() -> argparse.ArgumentParser: usage = ( "polyskills manager [-h] -n NAME [--exists {fail,overwrite,merge}] " "[-d DESTINATION] [-s SOURCE] [--pagination PAGINATION] " - "[--token TOKEN] [--version VERSION] remote LIBRARY ..." + "[--token TOKEN] [--version VERSION] " + "[--no-verify | --request-cert] remote LIBRARY ..." ), help = ( "Main method to manage skills, agents, etc. for a LLM " @@ -410,13 +484,27 @@ def main() -> None: print(output) return None + # ? resolve TLS verification shared by the 'list' and 'manager' + # ? subcommands before any remote request leaves the process + verify = _resolveVerify(args.no_verify, args.request_cert) + if args.no_verify: + import urllib3 + urllib3.disable_warnings( + urllib3.exceptions.InsecureRequestWarning + ) + print( + "[WARNING] TLS verification is disabled (--no-verify).", + file = sys.stderr + ) + # ? dispatch the 'list' subcommand: enumerate extensions and exit if args.command == "list": source = _expandUserPath( args.source or f"./{args.library}" ).as_posix() control = SourceControl( - pagination = args.pagination, token = args.token + pagination = args.pagination, token = args.token, + verify = verify ) names = listExtensions( @@ -453,7 +541,8 @@ def main() -> None: # ? dispatch to the concrete remote manager via the wrapper above control = SourceControl( - pagination = args.pagination, token = args.token + pagination = args.pagination, token = args.token, + verify = verify ) get( diff --git a/polyskills/remote/sources.py b/polyskills/remote/sources.py index b3493a2..4145348 100644 --- a/polyskills/remote/sources.py +++ b/polyskills/remote/sources.py @@ -31,7 +31,15 @@ from enum import Enum from pathlib import Path from dataclasses import dataclass -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +# ! security limits guarding the remote download / extraction pipeline +# ? the compressed download is capped to bound disk usage, and the +# ? cumulative uncompressed size is capped to defang gzip / tar bombs +_CHUNK_SIZE : int = 1 << 14 +_MAX_ARCHIVE_BYTES : int = 100 * (1 << 20) +_MAX_EXTRACT_BYTES : int = 500 * (1 << 20) + class ValidSources(Enum): GITHUB = "https://www.github.com/" @@ -54,6 +62,14 @@ class SourceControl: :param token: A authorization token to access the remote URL, this value is discouraged to be used in a production system. + :type verify: Union[bool, str] + :param verify: TLS verification forwarded verbatim to every + :func:`requests.get` call. ``True`` (default) uses the standard + trust store and honours the ``REQUESTS_CA_BUNDLE`` / + ``SSL_CERT_FILE`` environment overrides, a string pins a CA + bundle path, and ``False`` disables verification entirely + (discouraged, exposed via the ``--no-verify`` CLI flag). + Each parameter should be available as a CLI command for dynamic control which sets the data class defaults during runtime. """ @@ -63,6 +79,10 @@ class SourceControl: # ? Token Attribute: Useful only in testing environment token : Optional[str] = None + # ? TLS verification policy forwarded to every remote request; + # ? defaults to secure verification, see the param docstring above + verify : Union[bool, str] = True + class SourceManager(abc.ABC): """ @@ -242,19 +262,26 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: # always use lower case naming for modes; in-built control mode = mode.lower() supported = ["tags", "extensions", "list"] - assert mode in supported, \ - f"Mode = `{mode}` is not in {supported}" + if mode not in supported: + raise ValueError(f"Mode = `{mode}` is not in {supported}.") # ! validate that positional required arguments are available # only enforced for the ``extensions`` mode where they are used + # ! raised explicitly (not ``assert``) so the checks survive a + # ! ``python -O`` optimised run where assertions are stripped if mode == "extensions": - assert name is not None, "Extension name cannot be null." - assert library is not None, \ - f"Library cannot be null, supported are {supported}" + if name is None: + raise ValueError("Extension name cannot be null.") + if library is None: + raise ValueError( + f"Library cannot be null, supported are {supported}." + ) if mode == "list": - assert library is not None, \ - f"Library cannot be null, supported are {supported}" + if library is None: + raise ValueError( + f"Library cannot be null, supported are {supported}." + ) # lazy dispatch: only the requested branch is invoked methods : Dict[str, Callable[[], Any]] = dict( @@ -435,7 +462,7 @@ def _getTags( while remote_url: response = requests.get( - remote_url, verify = False, + remote_url, verify = self.control.verify, headers = self.headers, timeout = timeout ) response.raise_for_status() @@ -469,8 +496,11 @@ def _getExtensions( to ``fail``. """ - assert exists in ("fail", "overwrite", "merge"), \ - f"exists = `{exists}` not in ['fail', 'overwrite', 'merge']" + if exists not in ("fail", "overwrite", "merge"): + raise ValueError( + f"exists = `{exists}` not in " + "['fail', 'overwrite', 'merge']." + ) owner, repository = self.getSlug(remote = remote) uri = ( @@ -503,12 +533,23 @@ def _getExtensions( try: with requests.get( uri, headers = self.headers, stream = True, - verify = False, timeout = 60, allow_redirects = True + verify = self.control.verify, timeout = 60, + allow_redirects = True ) as response: response.raise_for_status() - for chunk in response.iter_content(chunk_size = 1 << 14): - if chunk: - handle.write(chunk) + downloaded = 0 + for chunk in response.iter_content( + chunk_size = _CHUNK_SIZE + ): + if not chunk: + continue + downloaded += len(chunk) + if downloaded > _MAX_ARCHIVE_BYTES: + raise ValueError( + "Download exceeds the safety limit of " + f"{_MAX_ARCHIVE_BYTES} bytes." + ) + handle.write(chunk) handle.flush() handle.close() @@ -522,7 +563,12 @@ def _getExtensions( root = members[0].name.split("/", 1)[0] prefix = f"{root}/{target}/" + # ! resolve the destination once so every member can + # ! be verified to stay inside this subtree below + safe_root = destination.resolve() + found = False + extracted_bytes = 0 for member in members: if not member.name.startswith(prefix): continue @@ -532,9 +578,26 @@ def _getExtensions( continue out_path = destination / relative + + # ! defend against path traversal - a crafted + # ! member may carry '..' segments that escape + resolved = (safe_root / relative).resolve() + if resolved != safe_root \ + and safe_root not in resolved.parents: + raise ValueError( + f"Unsafe archive member `{member.name}` " + "escapes the destination directory." + ) + if member.isdir(): out_path.mkdir(parents = True, exist_ok = True) elif member.isfile(): + extracted_bytes += max(member.size, 0) + if extracted_bytes > _MAX_EXTRACT_BYTES: + raise ValueError( + "Extraction exceeds the safety limit " + f"of {_MAX_EXTRACT_BYTES} bytes." + ) out_path.parent.mkdir( parents = True, exist_ok = True ) @@ -544,6 +607,8 @@ def _getExtensions( with extracted as src_fp, \ open(out_path, "wb") as dst_fp: shutil.copyfileobj(src_fp, dst_fp) + # ? symlink / hardlink / device / fifo members + # ? are neither dir nor file - skipped on purpose if not found: raise FileNotFoundError( @@ -584,7 +649,7 @@ def _listExtensions( ) response = requests.get( - uri, verify = False, + uri, verify = self.control.verify, headers = self.headers, timeout = timeout ) response.raise_for_status() From 91fd81e854be15299a1c756849b694763e2c1ff7 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 13:55:42 +0530 Subject: [PATCH 03/17] =?UTF-8?q?=F0=9F=90=9B=F0=9F=9B=A0=EF=B8=8F=20fix(c?= =?UTF-8?q?li):=20clean=20error=20handling,=20URL=20encoding,=20pagination?= =?UTF-8?q?=20cap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A batch of correctness and robustness fixes raised by the audit. The CLI leaked raw tracebacks on expected failures, remote URLs were built by interpolating the ref / path verbatim (query-parameter injection), the tag pagination loop trusted the server's `next` chain unbounded, and an unused dependency widened the supply-chain surface. The release is bumped to v2.1.0 to carry the behaviour changes. - wrap dispatch in `main()` with a top-level handler: print a concise `[ERROR]` / `[ABORTED]` message and exit non-zero, re-raising the full traceback when `POLYSKILLS_DEBUG` is set; the command body moves into a new `_dispatch()` helper - percent-encode owner / repository / ref / path segments via `urllib.parse.quote` in `_getExtensions` / `_listExtensions`, keeping '/' for branch refs while neutralising '?' / '&' injection - cap `_getTags` pagination at `_MAX_PAGES` (100) so an endless `next` link chain cannot hang the client - drop the unused `packaging` runtime dependency from pyproject.toml - bump `__version__` to v2.1.0 (PEP 440 normalises the 'v' prefix away) - update `test_cli` to the new clean-exit contract for an unsupported remote (exit 1 + `[ERROR]` on stderr, no traceback) - verification: `python -m unittest polyskills.tests.test_cli` -> 27 tests OK; flake8 hard gate (E9,F63,F7,F82) clean Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/__init__.py | 2 +- polyskills/cli.py | 41 +++++++++++++++++++++++++++++++----- polyskills/remote/sources.py | 27 ++++++++++++++++++++---- polyskills/tests/test_cli.py | 24 ++++++++++++--------- pyproject.toml | 1 - 5 files changed, 74 insertions(+), 21 deletions(-) diff --git a/polyskills/__init__.py b/polyskills/__init__.py index ee87dc7..e3900dd 100644 --- a/polyskills/__init__.py +++ b/polyskills/__init__.py @@ -14,6 +14,6 @@ providing one source of truth across different system and projects. """ -__version__ = "v2.0.0" +__version__ = "v2.1.0" # init-time options registrations, use api/ for public functions diff --git a/polyskills/cli.py b/polyskills/cli.py index fed6c34..a3c6041 100644 --- a/polyskills/cli.py +++ b/polyskills/cli.py @@ -468,13 +468,19 @@ def listExtensions( ) -def main() -> None: - """ - Entry point for CLI tool for :mod:`polyskills` that parses the - command line arguments using :mod:`argparse` module. +def _dispatch(args : argparse.Namespace) -> None: """ + Execute the parsed CLI command. - args = buildParser().parse_args() + Separated from :func:`main` so that every transport or filesystem + error raised while contacting a remote bubbles up to a single + top-level handler that renders a concise message instead of a raw + traceback. Set ``POLYSKILLS_DEBUG`` in the environment to re-raise + the original exception for debugging. + + :type args: argparse.Namespace + :param args: Parsed command line arguments from :func:`buildParser`. + """ # ? terminal commands: 'sources', 'tools' - print and exit early # ? these subparsers attach a ``func`` default; manager does not. @@ -554,5 +560,30 @@ def main() -> None: return None + +def main() -> None: + """ + Entry point for the :mod:`polyskills` CLI. Parses the command line + arguments and dispatches to :func:`_dispatch`, funnelling expected + runtime failures into a clean, non-zero exit rather than a raw + traceback. Set ``POLYSKILLS_DEBUG`` to surface the full traceback. + """ + + args = buildParser().parse_args() + + try: + return _dispatch(args) + except KeyboardInterrupt: + print( + "\n[ABORTED] Interrupted by the user.", file = sys.stderr + ) + raise SystemExit(130) + except Exception as error: + if os.environ.get("POLYSKILLS_DEBUG"): + raise + print(f"[ERROR] {error}", file = sys.stderr) + raise SystemExit(1) + + if __name__ == "__main__": sys.exit(main()) diff --git a/polyskills/remote/sources.py b/polyskills/remote/sources.py index 4145348..16bfc8c 100644 --- a/polyskills/remote/sources.py +++ b/polyskills/remote/sources.py @@ -30,6 +30,7 @@ from enum import Enum from pathlib import Path +from urllib.parse import quote from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Tuple, Union @@ -40,6 +41,10 @@ _MAX_ARCHIVE_BYTES : int = 100 * (1 << 20) _MAX_EXTRACT_BYTES : int = 500 * (1 << 20) +# ? upper bound on REST pagination so a misbehaving / hostile server +# ? cannot keep the client looping over an endless `next` link chain +_MAX_PAGES : int = 100 + class ValidSources(Enum): GITHUB = "https://www.github.com/" @@ -460,7 +465,15 @@ def _getTags( tags : List[str] = [] + pages = 0 while remote_url: + if pages >= _MAX_PAGES: + raise ValueError( + f"Tag pagination exceeded the {_MAX_PAGES} page " + "safety limit." + ) + pages += 1 + response = requests.get( remote_url, verify = self.control.verify, headers = self.headers, timeout = timeout @@ -473,7 +486,7 @@ def _getTags( tags.append(item["name"]) remote_url = response.links.get("next", {}).get("url") - + return tags @@ -506,7 +519,11 @@ def _getExtensions( uri = ( "https://api.github.com/repos/" "{owner}/{repository}/tarball/{tag}" - ).format(owner = owner, repository = repository, tag = version) + ).format( + owner = quote(owner, safe = ""), + repository = quote(repository, safe = ""), + tag = quote(str(version), safe = "/") + ) destination = Path(destination) if destination.exists() and destination.is_dir() \ @@ -644,8 +661,10 @@ def _listExtensions( "https://api.github.com/repos/" "{owner}/{repository}/contents/{path}?ref={ref}" ).format( - owner = owner, repository = repository, - path = sub, ref = version + owner = quote(owner, safe = ""), + repository = quote(repository, safe = ""), + path = quote(sub, safe = "/"), + ref = quote(str(version), safe = "") ) response = requests.get( diff --git a/polyskills/tests/test_cli.py b/polyskills/tests/test_cli.py index a70343e..97fc821 100644 --- a/polyskills/tests/test_cli.py +++ b/polyskills/tests/test_cli.py @@ -560,19 +560,23 @@ def test_manager_supports_agents_library(self) -> None: ) - def test_unsupported_remote_propagates_value_error(self) -> None: + def test_unsupported_remote_exits_with_clean_error(self) -> None: """ - :func:`cli.main` must not swallow the :class:`ValueError` - raised by :func:`cli._resolveManager` for an unknown remote - - the surface error is the user's signal to use a supported - provider. + :func:`cli.main` must render the :class:`ValueError` raised by + :func:`cli._resolveManager` for an unknown remote as a concise + ``[ERROR]`` message on stderr and exit non-zero, instead of + leaking a raw traceback to the user. """ - with self.assertRaises(ValueError): - _run_main([ - "manager", "https://gitlab.com/foo/bar", - "--name", _NAME, "skills" - ]) + code, _, err = _run_main([ + "manager", "https://gitlab.com/foo/bar", + "--name", _NAME, "skills" + ]) + + self.assertEqual(code, 1) + self.assertIn("[ERROR]", err) + self.assertIn("not supported", err) + self.assertNotIn("Traceback", err) class TestDestinationExpansion(unittest.TestCase): diff --git a/pyproject.toml b/pyproject.toml index 368d180..c425527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ classifiers = [ dependencies = [ "requests>=2.31", - "packaging>=23.0", ] [project.scripts] From 8210262725af5893f100195cb96afa689b083d01 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 14:03:40 +0530 Subject: [PATCH 04/17] =?UTF-8?q?=E2=9C=A8=F0=9F=9B=A0=EF=B8=8F=20refactor?= =?UTF-8?q?(error):=20add=20exception=20hierarchy,=20constants,=20User-Age?= =?UTF-8?q?nt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The empty `error/` scaffolding is now a real domain hierarchy, and the remote layer raises it instead of bare builtins, so callers can catch `PolyskillsError` (or a specific subtype) while the CLI still renders every domain failure as a clean message. A round of code-quality polish removes magic numbers and identifies the client to the remote API. - implement `PolyskillsError` base with `ValidationError` (also a `ValueError` for backward compatibility), `RemoteError` and `ExtractionError`; re-export them from `polyskills.error` - adopt the new types across `sources.py` / `cli.py`: input / URL / TLS validation -> `ValidationError`, pagination overflow -> `RemoteError`, download / traversal / size guards -> `ExtractionError` - replace magic numbers with named constants (`_REQUEST_TIMEOUT`, `_DOWNLOAD_TIMEOUT`) and advertise a `polyskills/` `User-Agent` header on every GitHub request - hoist `cli.py` local imports to the top of the module (PEP-8 order), clearing the mid-file `E402` warnings - clarify the `formatter` docstring as a reserved, currently no-op hook for the planned skill-to-prompt conversion - conventions: camelCase preserved, reST docstrings, 88-col flake8; `python -m unittest polyskills.tests.test_cli` -> 27 OK, hard gate (E9,F63,F7,F82) clean, no import cycle introduced Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/cli.py | 22 ++++++++------- polyskills/error/__init__.py | 12 +++++++- polyskills/error/exceptions.py | 49 ++++++++++++++++++++++++++++++++ polyskills/remote/sources.py | 51 ++++++++++++++++++++++------------ 4 files changed, 106 insertions(+), 28 deletions(-) diff --git a/polyskills/cli.py b/polyskills/cli.py index a3c6041..35d6ef4 100644 --- a/polyskills/cli.py +++ b/polyskills/cli.py @@ -25,6 +25,12 @@ from pathlib import Path from typing import Callable, Optional, Union +from polyskills.apps.tools import SupportedTools +from polyskills.error.exceptions import ValidationError +from polyskills.remote.sources import ( + GithubManager, SourceControl, SourceManager, ValidSources +) + def _expandUserPath(value : Union[str, Path]) -> Path: """ @@ -79,8 +85,8 @@ def _resolveVerify( :type request_cert: bool :param request_cert: Pin the bundled trust store when ``True``. - :raises ValueError: If both flags are requested together, or the - :mod:`certifi` bundle cannot be located for strict mode. + :raises ValidationError: If both flags are requested together, or + the :mod:`certifi` bundle cannot be located for strict mode. :rtype: Union[bool, str] :returns: ``True`` / ``False`` or a path to a CA bundle, suitable @@ -88,7 +94,7 @@ def _resolveVerify( """ if no_verify and request_cert: - raise ValueError( + raise ValidationError( "Cannot use --no-verify together with --request-cert." ) @@ -99,7 +105,7 @@ def _resolveVerify( try: import certifi except ImportError as error: - raise ValueError( + raise ValidationError( "Strict --request-cert mode needs the 'certifi' " "package, which could not be imported." ) from error @@ -108,10 +114,6 @@ def _resolveVerify( return True -from polyskills.apps.tools import SupportedTools -from polyskills.remote.sources import ( - GithubManager, SourceControl, SourceManager, ValidSources -) # ? registry mapping each supported remote enumeration to its concrete # ? :class:`SourceManager` factory; extending the framework with a new @@ -399,7 +401,7 @@ def _resolveManager( new providers light up automatically once they are added to the :data:`_SOURCE_MANAGERS` registry above. - :raises ValueError: If ``remote`` does not match any of the + :raises ValidationError: If ``remote`` does not match any of the registered remote URL patterns. """ @@ -409,7 +411,7 @@ def _resolveManager( return candidate supported = ", ".join(member.value for member in _SOURCE_MANAGERS) - raise ValueError( + raise ValidationError( f"Remote `{remote}` is not supported. " f"Supported sources: {supported}" ) diff --git a/polyskills/error/__init__.py b/polyskills/error/__init__.py index 3e0f8f8..ac05371 100644 --- a/polyskills/error/__init__.py +++ b/polyskills/error/__init__.py @@ -10,5 +10,15 @@ from polyskills.error import warnings from polyskills.error import exceptions +from polyskills.error.exceptions import ( + PolyskillsError, + ValidationError, + RemoteError, + ExtractionError, +) -__all__ = ["exceptions", "warnings"] +__all__ = [ + "exceptions", "warnings", + "PolyskillsError", "ValidationError", + "RemoteError", "ExtractionError", +] diff --git a/polyskills/error/exceptions.py b/polyskills/error/exceptions.py index e69de29..4205f44 100644 --- a/polyskills/error/exceptions.py +++ b/polyskills/error/exceptions.py @@ -0,0 +1,49 @@ +# -*- encoding: utf-8 -*- + +""" +Custom Exception Hierarchy for Polyskills +----------------------------------------- + +A small, explicit exception hierarchy rooted at :class:`PolyskillsError` +so callers can distinguish the failure modes of the remote pipeline - +input validation, remote transport, and archive extraction - from +unrelated runtime errors, while a single ``except PolyskillsError`` still +catches every domain failure raised by the package. + +:NOTE: :class:`ValidationError` also derives from the builtin + :class:`ValueError` to preserve backward compatibility with callers + and tests that catch ``ValueError`` for an invalid input. +""" + + +class PolyskillsError(Exception): + """ + Base class for every :mod:`polyskills` domain error. + + Catch this to handle any failure raised deliberately by the package + while letting genuinely unexpected exceptions propagate untouched. + """ + + +class ValidationError(PolyskillsError, ValueError): + """ + Raised when a user-supplied argument fails validation - for example + an unknown ``mode``, a missing required value, an unsupported remote + URL, or a conflicting TLS option. Subclasses :class:`ValueError` so + that existing ``except ValueError`` handlers keep working. + """ + + +class RemoteError(PolyskillsError): + """ + Raised when a remote source behaves unexpectedly, such as a tag + pagination chain that exceeds the configured page safety limit. + """ + + +class ExtractionError(PolyskillsError): + """ + Raised when a downloaded archive violates a safety constraint - a + member escaping the destination directory (path traversal), or the + download / extraction exceeding its configured size limit. + """ diff --git a/polyskills/remote/sources.py b/polyskills/remote/sources.py index 16bfc8c..f22b879 100644 --- a/polyskills/remote/sources.py +++ b/polyskills/remote/sources.py @@ -34,6 +34,11 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from polyskills import __version__ +from polyskills.error.exceptions import ( + ValidationError, RemoteError, ExtractionError +) + # ! security limits guarding the remote download / extraction pipeline # ? the compressed download is capped to bound disk usage, and the # ? cumulative uncompressed size is capped to defang gzip / tar bombs @@ -45,6 +50,12 @@ # ? cannot keep the client looping over an endless `next` link chain _MAX_PAGES : int = 100 +# ? request timeouts (seconds) and the User-Agent advertised to the +# ? remote REST API so the client is identifiable in the server logs +_REQUEST_TIMEOUT : int = 30 +_DOWNLOAD_TIMEOUT : int = 60 +_USER_AGENT : str = f"polyskills/{__version__}" + class ValidSources(Enum): GITHUB = "https://www.github.com/" @@ -145,14 +156,14 @@ def getSlug(self, remote : str) -> Tuple[str, str]: )) >> ...api.github.com/repos/PyUtility/polyskills/tags?... - :raises ValueError: If the remote URL does not match the + :raises ValidationError: If the remote URL does not match the regular expression pattern supported by the module. """ matches = self.remotePattern.match(remote) if not matches: - raise ValueError( + raise ValidationError( f"Not a Valid URL: {remote}, or Not Supported." ) @@ -238,8 +249,11 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: defaults to ``master`` that is the latest content from the repository. (TODO) - - **formatter** (*Callable*) - Formatter method based - on the final LLM tool. (TODO) + - **formatter** (*Callable*) - Reserved hook for the + planned skill-to-prompt conversion used by non + Agent-Skills tools. It is forwarded end-to-end but + is a no-op today (never invoked); kept so the public + contract is stable when the feature lands. """ prefix : Optional[str] = kwargs.get("prefix", None) @@ -268,7 +282,9 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: mode = mode.lower() supported = ["tags", "extensions", "list"] if mode not in supported: - raise ValueError(f"Mode = `{mode}` is not in {supported}.") + raise ValidationError( + f"Mode = `{mode}` is not in {supported}." + ) # ! validate that positional required arguments are available # only enforced for the ``extensions`` mode where they are used @@ -276,15 +292,15 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: # ! ``python -O`` optimised run where assertions are stripped if mode == "extensions": if name is None: - raise ValueError("Extension name cannot be null.") + raise ValidationError("Extension name cannot be null.") if library is None: - raise ValueError( + raise ValidationError( f"Library cannot be null, supported are {supported}." ) if mode == "list": if library is None: - raise ValueError( + raise ValidationError( f"Library cannot be null, supported are {supported}." ) @@ -435,6 +451,7 @@ def headers(self) -> Dict[str, str]: headers = { "accept" : "application/vnd.github+json", "X-GitHub-Api-Version" : "2022-11-28", + "user-agent" : _USER_AGENT, } if self._token: @@ -456,7 +473,7 @@ def _getTags( get tags hosted in ``skillName@vX.Y.Z`` format. """ - timeout = kwargs.get("timeout", 30) + timeout = kwargs.get("timeout", _REQUEST_TIMEOUT) owner, repository = self.getSlug(remote = remote) remote_url = self.remoteAPI.format( owner = owner, repository = repository, @@ -468,7 +485,7 @@ def _getTags( pages = 0 while remote_url: if pages >= _MAX_PAGES: - raise ValueError( + raise RemoteError( f"Tag pagination exceeded the {_MAX_PAGES} page " "safety limit." ) @@ -510,7 +527,7 @@ def _getExtensions( """ if exists not in ("fail", "overwrite", "merge"): - raise ValueError( + raise ValidationError( f"exists = `{exists}` not in " "['fail', 'overwrite', 'merge']." ) @@ -550,8 +567,8 @@ def _getExtensions( try: with requests.get( uri, headers = self.headers, stream = True, - verify = self.control.verify, timeout = 60, - allow_redirects = True + verify = self.control.verify, + timeout = _DOWNLOAD_TIMEOUT, allow_redirects = True ) as response: response.raise_for_status() downloaded = 0 @@ -562,7 +579,7 @@ def _getExtensions( continue downloaded += len(chunk) if downloaded > _MAX_ARCHIVE_BYTES: - raise ValueError( + raise ExtractionError( "Download exceeds the safety limit of " f"{_MAX_ARCHIVE_BYTES} bytes." ) @@ -601,7 +618,7 @@ def _getExtensions( resolved = (safe_root / relative).resolve() if resolved != safe_root \ and safe_root not in resolved.parents: - raise ValueError( + raise ExtractionError( f"Unsafe archive member `{member.name}` " "escapes the destination directory." ) @@ -611,7 +628,7 @@ def _getExtensions( elif member.isfile(): extracted_bytes += max(member.size, 0) if extracted_bytes > _MAX_EXTRACT_BYTES: - raise ValueError( + raise ExtractionError( "Extraction exceeds the safety limit " f"of {_MAX_EXTRACT_BYTES} bytes." ) @@ -653,7 +670,7 @@ def _listExtensions( is sorted alphabetically for deterministic CLI output. """ - timeout = kwargs.get("timeout", 30) + timeout = kwargs.get("timeout", _REQUEST_TIMEOUT) owner, repository = self.getSlug(remote = remote) sub = Path(str(source)).as_posix().lstrip("./").strip("/") From 682277ab97d812e1a7ffe4ccd0902bc63ba724a7 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 14:09:48 +0530 Subject: [PATCH 05/17] =?UTF-8?q?=F0=9F=A7=AA=F0=9F=9B=A0=EF=B8=8F=20test(?= =?UTF-8?q?tests):=20add=20hermetic=20security=20suite,=20gate=20live=20te?= =?UTF-8?q?sts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The suite previously hit the live GitHub API on every run, so it was slow, rate-limited, and offered no coverage of the new security guards. The default run is now hermetic and the network tests are opt-in, plus a dedicated regression module locks in the hardening behaviour with mocked transport and in-memory archive fixtures. - add `test_security.py`: verify-resolution and verify-forwarding, pagination cap (`RemoteError`), and extraction safety - happy path, path-traversal rejection, download / extraction size caps, and the missing-extension case - all offline via mocked `requests` - gate the live `GithubManagerTestCase` (and its three subclasses) behind `POLYSKILLS_RUN_LIVE`; the fixture also honours `POLYSKILLS_NO_VERIFY` so it runs behind a TLS-intercepting proxy - refresh the package docstring to describe the hermetic default and the opt-in live switches - verification: `python -m unittest discover -s polyskills/tests -t .` -> 57 tests OK (10 live tests skipped); flake8 hard gate clean Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/tests/__init__.py | 19 +- polyskills/tests/base.py | 17 +- polyskills/tests/test_security.py | 356 ++++++++++++++++++++++++++++++ 3 files changed, 381 insertions(+), 11 deletions(-) create mode 100644 polyskills/tests/test_security.py diff --git a/polyskills/tests/__init__.py b/polyskills/tests/__init__.py index eccc767..1a1188e 100644 --- a/polyskills/tests/__init__.py +++ b/polyskills/tests/__init__.py @@ -5,10 +5,10 @@ ===================================== A :mod:`unittest` based automated test-suite for the :mod:`polyskills` -package. The tests exercise the public dispatcher -:meth:`polyskills.remote.sources.GithubManager.get` end-to-end against -a live GitHub repository to validate the contracts surfaced by the -remote source manager. +package. The default run is hermetic: the CLI, path-resolution, and the +security regression tests (TLS forwarding, archive path-traversal, size +caps, pagination cap) mock the network and synthesise archive fixtures +in memory, so no connection is needed. Run from the repository root:: @@ -16,11 +16,12 @@ # or, equivalently python -m polyskills.tests -:NOTE: These tests perform live HTTP requests against -``https://api.github.com``. A network connection is required, and the -GitHub anonymous rate-limit may throttle very frequent runs - export -``POLYSKILLS_REMOTE_TOKEN`` (or set the workflow's ``GITHUB_TOKEN``) -to authenticate the requests. +:NOTE: The end-to-end tests that hit the live +``https://api.github.com`` REST API and the PyUtility/polyskills +repository are opt-in - set ``POLYSKILLS_RUN_LIVE=1`` to enable them. +Behind a TLS-intercepting proxy also set ``POLYSKILLS_NO_VERIFY=1`` so +the live fixture can reach GitHub, and export ``POLYSKILLS_REMOTE_TOKEN`` +(or the workflow's ``GITHUB_TOKEN``) to raise the anonymous rate-limit. """ __version__ = "v1.0.0" diff --git a/polyskills/tests/base.py b/polyskills/tests/base.py index 49868b3..a6af975 100644 --- a/polyskills/tests/base.py +++ b/polyskills/tests/base.py @@ -10,6 +10,7 @@ the suite remains DRY. """ +import os import shutil import logging import tempfile @@ -23,7 +24,7 @@ from polyskills.remote.sources import GithubManager, SourceControl -# silence verify=False noise + GitHub redirect warnings during runs +# silence insecure-request + GitHub redirect noise for opt-in live runs warnings.filterwarnings("ignore") urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -41,11 +42,21 @@ EXPECTED : Set[str] = {"SKILL.md"} # minimum file expected after extract PAGINATION : int = 100 +# ? live-network tests are opt-in to keep the default test run hermetic +# ? and offline; set POLYSKILLS_RUN_LIVE=1 to exercise them. Behind a +# ? TLS-intercepting proxy also set POLYSKILLS_NO_VERIFY=1 so the live +# ? fixture can reach GitHub without certificate verification. +_RUN_LIVE : str = os.environ.get("POLYSKILLS_RUN_LIVE", "") +_VERIFY : bool = not os.environ.get("POLYSKILLS_NO_VERIFY") + # ? canonical user-level skill installation root (platform-agnostic via Path.home()) USER_SKILL_ROOT : Path = Path.home() / ".claude" / "skills" SKILL_FILENAME : str = "SKILL.md" +@unittest.skipUnless( + _RUN_LIVE, "live-network tests; set POLYSKILLS_RUN_LIVE=1 to run" +) class GithubManagerTestCase(unittest.TestCase): """ A reusable :class:`unittest.TestCase` base that provisions a @@ -70,7 +81,9 @@ class to avoid re-creating the HTTP session needlessly. f"polyskills.tests.{cls.__name__}" ) cls.manager = GithubManager( - control = SourceControl(pagination = PAGINATION) + control = SourceControl( + pagination = PAGINATION, verify = _VERIFY + ) ) diff --git a/polyskills/tests/test_security.py b/polyskills/tests/test_security.py new file mode 100644 index 0000000..9b74bee --- /dev/null +++ b/polyskills/tests/test_security.py @@ -0,0 +1,356 @@ +# -*- encoding: utf-8 -*- + +""" +Hermetic Security Regression Tests for the Remote Pipeline +---------------------------------------------------------- + +Offline tests that exercise the security-critical behaviour of +:mod:`polyskills.remote.sources` and :func:`polyskills.cli._resolveVerify` +without touching the network. Every remote ``requests.get`` call is +mocked and archive fixtures are synthesised in memory, so the suite runs +deterministically in CI and locally behind a proxy. + +The cases lock in the guarantees added by the security hardening pass: +TLS verification is forwarded (never silently disabled), crafted archive +members cannot escape the destination directory, downloads and +extraction are size-bounded, and tag pagination is capped. +""" + +import io +import os +import shutil +import tarfile +import tempfile +import unittest + +from pathlib import Path +from typing import Dict, Optional +from unittest import mock + +from polyskills import cli +from polyskills.remote import sources as sources_mod +from polyskills.remote.sources import GithubManager, SourceControl +from polyskills.error.exceptions import ( + ExtractionError, RemoteError, ValidationError +) + +# ? a fixed remote URL that satisfies GithubManager.remotePattern so the +# ? slug extraction succeeds before the mocked download is reached +_REMOTE : str = "https://github.com/owner/repo" + +# ? the synthetic archive root mirrors GitHub's `--` +_ROOT : str = "owner-repo-deadbeef" + + +def _make_tarball( + members : Dict[str, bytes], root : str = _ROOT +) -> bytes: + """ + Build an in-memory ``.tar.gz`` whose every entry is nested under a + single ``root`` directory, mimicking the layout of a GitHub source + tarball. Keys are archive-relative POSIX paths and values are the + raw file contents. + + :type members: Dict[str, bytes] + :param members: Mapping of archive-relative path to file bytes; the + ``root`` prefix is added automatically to each entry. + + :type root: str + :param root: Top-level directory name injected ahead of every + member, defaulting to :data:`_ROOT`. + + :rtype: bytes + :returns: The gzip-compressed tar archive as a byte string. + """ + + buffer = io.BytesIO() + with tarfile.open(fileobj = buffer, mode = "w:gz") as archive: + for name, content in members.items(): + info = tarfile.TarInfo(f"{root}/{name}") + info.size = len(content) + archive.addfile(info, io.BytesIO(content)) + + return buffer.getvalue() + + +class _FakeResponse: + """ + Minimal stand-in for a non-streaming :class:`requests.Response` + used by ``_getTags`` and ``_listExtensions``. + + :type payload: Optional[object] + :param payload: Value returned by :meth:`json`. + + :type links: Optional[dict] + :param links: Value exposed as the ``links`` attribute for + pagination traversal. + """ + + def __init__( + self, payload : Optional[object] = None, + links : Optional[dict] = None + ) -> None: + self._payload = payload if payload is not None else [] + self.links = links if links is not None else {} + + def raise_for_status(self) -> None: + return None + + def json(self) -> object: + return self._payload + + +class _FakeStream: + """ + Streaming :class:`requests.Response` stand-in usable as a context + manager, yielding a fixed archive payload from :meth:`iter_content`. + + :type payload: bytes + :param payload: Raw bytes streamed back to the download loop. + """ + + def __init__(self, payload : bytes) -> None: + self._payload = payload + + def __enter__(self) -> "_FakeStream": + return self + + def __exit__(self, *exc : object) -> bool: + return False + + def raise_for_status(self) -> None: + return None + + def iter_content(self, chunk_size : int = 1): + for idx in range(0, len(self._payload), chunk_size): + yield self._payload[idx : idx + chunk_size] + + +class TestVerifyResolution(unittest.TestCase): + """ + Unit tests for :func:`polyskills.cli._resolveVerify`, the single + point that maps the ``--no-verify`` / ``--request-cert`` flags onto + the ``verify`` value forwarded to :mod:`requests`. + """ + + def test_default_is_secure(self) -> None: + """ + With neither flag the resolver must return ``True`` so requests + verify against the standard trust store. + """ + + self.assertIs(cli._resolveVerify(False, False), True) + + def test_no_verify_disables(self) -> None: + """``--no-verify`` must resolve to ``False``.""" + + self.assertIs(cli._resolveVerify(True, False), False) + + def test_request_cert_pins_certifi_bundle(self) -> None: + """ + ``--request-cert`` must resolve to the bundled certifi CA path + for strict verification. + """ + + import certifi + + self.assertEqual( + cli._resolveVerify(False, True), certifi.where() + ) + + def test_conflicting_flags_raise(self) -> None: + """ + Requesting both flags together must raise + :class:`ValidationError` (also a :class:`ValueError`). + """ + + with self.assertRaises(ValidationError): + cli._resolveVerify(True, True) + + +class TestVerifyForwarding(unittest.TestCase): + """ + Guards that the configured ``verify`` value reaches every + :func:`requests.get` call rather than being silently overridden. + """ + + def test_tags_request_forwards_verify_false(self) -> None: + """ + A manager built with ``verify=False`` must pass that value to + the underlying tags request. + """ + + manager = GithubManager(SourceControl(verify = False)) + with mock.patch.object( + sources_mod.requests, "get", + return_value = _FakeResponse() + ) as fake_get: + manager.get(_REMOTE, mode = "tags") + + self.assertIs(fake_get.call_args.kwargs["verify"], False) + + def test_tags_request_forwards_verify_true(self) -> None: + """ + The secure default (``verify=True``) must likewise be forwarded + verbatim to the tags request. + """ + + manager = GithubManager(SourceControl()) + with mock.patch.object( + sources_mod.requests, "get", + return_value = _FakeResponse() + ) as fake_get: + manager.get(_REMOTE, mode = "tags") + + self.assertIs(fake_get.call_args.kwargs["verify"], True) + + +class TestPaginationCap(unittest.TestCase): + """ + Ensures :meth:`GithubManager._getTags` cannot be trapped in an + endless ``next`` link chain served by a hostile remote. + """ + + def test_endless_pagination_raises_remote_error(self) -> None: + """ + When every response advertises a ``next`` link, the loop must + abort with :class:`RemoteError` once ``_MAX_PAGES`` is reached. + """ + + endless = _FakeResponse( + payload = [], links = {"next": {"url": "https://api/next"}} + ) + with mock.patch.object(sources_mod, "_MAX_PAGES", 3), \ + mock.patch.object( + sources_mod.requests, "get", return_value = endless + ): + with self.assertRaises(RemoteError): + GithubManager().get(_REMOTE, mode = "tags") + + +class TestArchiveExtractionSafety(unittest.TestCase): + """ + Offline extraction tests for :meth:`GithubManager._getExtensions` + covering the happy path and each safety guard, driven by in-memory + archive fixtures and a mocked streaming download. + """ + + def setUp(self) -> None: + """Allocate an isolated staging directory per test.""" + + self.staging = Path( + tempfile.mkdtemp(prefix = "polyskills-sec-") + ) + self.destination = self.staging / "out" + + def tearDown(self) -> None: + """Remove the staging directory created in :meth:`setUp`.""" + + shutil.rmtree(self.staging, ignore_errors = True) + + def _extract(self, payload : bytes, name : str = "skill") -> None: + """ + Drive :meth:`GithubManager.get` in ``extensions`` mode with the + download mocked to stream ``payload``. + + :type payload: bytes + :param payload: The archive bytes returned by the mocked GET. + + :type name: str + :param name: Extension (leaf directory) name to extract. + """ + + with mock.patch.object( + sources_mod.requests, "get", + return_value = _FakeStream(payload) + ): + GithubManager().get( + _REMOTE, mode = "extensions", name = name, + library = "skills", source = "extensions/skills", + destination = self.destination + ) + + def test_happy_path_extracts_files(self) -> None: + """ + A well-formed archive must extract every member under the + target directory into the destination, preserving sub-paths. + """ + + payload = _make_tarball({ + "extensions/skills/skill/SKILL.md": b"---\nname: x\n---\n", + "extensions/skills/skill/ref/general.md": b"hello", + }) + + self._extract(payload) + + self.assertTrue((self.destination / "SKILL.md").is_file()) + self.assertEqual( + (self.destination / "ref" / "general.md").read_bytes(), + b"hello" + ) + + def test_path_traversal_member_is_rejected(self) -> None: + """ + A crafted member carrying ``..`` segments that escape the + destination must raise :class:`ExtractionError` and must not + write the malicious file anywhere outside the destination. + """ + + payload = _make_tarball({ + "extensions/skills/skill/SKILL.md": b"ok", + "extensions/skills/skill/../../../../escape.txt": b"pwned", + }) + + with self.assertRaises(ExtractionError): + self._extract(payload) + + leaked = self.staging.parent / "escape.txt" + self.assertFalse( + leaked.exists(), f"path traversal leaked file to {leaked}" + ) + + def test_download_exceeding_size_cap_is_rejected(self) -> None: + """ + A download larger than ``_MAX_ARCHIVE_BYTES`` must abort with + :class:`ExtractionError` before extraction begins. + """ + + payload = _make_tarball({ + "extensions/skills/skill/SKILL.md": b"x" * 64, + }) + + with mock.patch.object(sources_mod, "_MAX_ARCHIVE_BYTES", 8): + with self.assertRaises(ExtractionError): + self._extract(payload) + + def test_extraction_exceeding_size_cap_is_rejected(self) -> None: + """ + When the cumulative uncompressed size exceeds + ``_MAX_EXTRACT_BYTES`` the extraction must abort with + :class:`ExtractionError`. + """ + + payload = _make_tarball({ + "extensions/skills/skill/SKILL.md": b"x" * 256, + }) + + with mock.patch.object(sources_mod, "_MAX_EXTRACT_BYTES", 16): + with self.assertRaises(ExtractionError): + self._extract(payload) + + def test_missing_extension_raises_not_found(self) -> None: + """ + An archive that contains no entry under the requested target + must raise :class:`FileNotFoundError`. + """ + + payload = _make_tarball({ + "extensions/skills/other/SKILL.md": b"ok", + }) + + with self.assertRaises(FileNotFoundError): + self._extract(payload, name = "skill") + + +if __name__ == "__main__": + unittest.main(verbosity = 2) From ecc72a24b0b6cbe7d4af80943cc6128ef3f43792 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 14:13:07 +0530 Subject: [PATCH 06/17] =?UTF-8?q?=F0=9F=94=92=F0=9F=A4=96=20ci(workflows):?= =?UTF-8?q?=20pin=20actions=20to=20commit=20SHA=20and=20add=20security=20s?= =?UTF-8?q?can?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pin every GitHub Actions `uses:` reference to its resolved 40-char commit SHA (the mutable tag kept as a trailing comment) to close the supply-chain risk of a moved tag silently changing CI behaviour. Add a dedicated security workflow and tighten least-privilege permissions. - pin actions across tests.yml, linting.yml and publish.yml to their commit SHA (checkout v6, setup-python v6, upload-artifact v7, download-artifact v8, gh-action-pypi-publish release/v1), each SHA resolved and verified via `git ls-remote` - add .github/workflows/security.yml running `bandit -r polyskills/` and `pip-audit` on push / PR to master, both blocking - add least-privilege `permissions: contents: read` to linting.yml (top level) and the publish.yml release-build job; the pypi-publish job keeps its `id-token: write` for OIDC trusted publishing - verification: all 11 `uses:` lines SHA-pinned (no mutable tags), every workflow parses as valid YAML Co-authored-by: Claude Opus 4.8 (1M context) --- .github/workflows/linting.yml | 7 ++++-- .github/workflows/publish.yml | 12 ++++++----- .github/workflows/security.yml | 39 ++++++++++++++++++++++++++++++++++ .github/workflows/tests.yml | 4 ++-- 4 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/security.yml diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5dbd67d..362d4dd 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -9,6 +9,9 @@ on: pull_request: branches: [ master ] +permissions: + contents: read + jobs: build: runs-on: ubuntu-latest @@ -18,9 +21,9 @@ jobs: python-version: ["3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6 with: python-version: ${{ matrix.python-version }} - name: Installing Workflow Dependencies diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4a003d1..431c63a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,11 +10,13 @@ permissions: jobs: release-build: runs-on: ubuntu-latest + permissions: + contents: read steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - - uses: actions/setup-python@v6 + - uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6 with: python-version: "3.12" @@ -24,7 +26,7 @@ jobs: python -m build - name: Upload Distributions - uses: actions/upload-artifact@v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 with: name: release-dists path: dist/ @@ -42,12 +44,12 @@ jobs: steps: - name: Retrieve Release Distributions - uses: actions/download-artifact@v8 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 with: name: release-dists path: dist/ - name: Publish Release Distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # release/v1 with: packages-dir: dist/ diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..7a74aee --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,39 @@ +# This workflow runs static security analysis (bandit) and audits the +# installed dependency tree for known vulnerabilities (pip-audit). +# For more information see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Security Scan (bandit, pip-audit) + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +permissions: + contents: read + +jobs: + scan: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - name: Setup Python 3.12 + uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6 + with: + python-version: "3.12" + - name: Install Package and Security Tooling + run: | + python -m pip install --upgrade pip + python -m pip install -e . + python -m pip install bandit pip-audit + - name: Static Security Analysis (bandit) + # ? scan the package source for common security anti-patterns + run: | + bandit -r polyskills/ + - name: Dependency Vulnerability Audit (pip-audit) + # ? fail the build on any known-vulnerable dependency + run: | + pip-audit diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f5239e..25508b1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,9 +23,9 @@ jobs: python-version: ["3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@ece7cb06caefa5fff74198d8649806c4678c61a1 # v6 with: python-version: ${{ matrix.python-version }} - name: Install Package and Test Dependencies From 5c16a3acccc496107ff80e7fab0aab2f867711b6 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 14:22:17 +0530 Subject: [PATCH 07/17] =?UTF-8?q?=F0=9F=A4=96=20ci(security):=20scope=20ba?= =?UTF-8?q?ndit=20scan=20to=20the=20package=20source?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bandit flagged a dummy `token = "secret"` fixture in the test suite (B106), which would fail the security gate on a false positive. Exclude the tests directory so the scan targets production code only - the package source itself reports zero findings. - run `bandit -r polyskills/ -x polyskills/tests` in security.yml - verification: `bandit -r polyskills/ -x polyskills/tests` -> exit 0 Co-authored-by: Claude Opus 4.8 (1M context) --- .github/workflows/security.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 7a74aee..62631fa 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -30,9 +30,11 @@ jobs: python -m pip install -e . python -m pip install bandit pip-audit - name: Static Security Analysis (bandit) - # ? scan the package source for common security anti-patterns + # ? scan the package source for common security anti-patterns; + # ? tests are excluded as fixtures (e.g. dummy tokens) trip + # ? false positives that are not production security concerns run: | - bandit -r polyskills/ + bandit -r polyskills/ -x polyskills/tests - name: Dependency Vulnerability Audit (pip-audit) # ? fail the build on any known-vulnerable dependency run: | From e584a606a597abe5c1d0a972f5f5c6f43e6f0439 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 14:22:17 +0530 Subject: [PATCH 08/17] =?UTF-8?q?=F0=9F=93=9D=20docs(changelog):=20add=20v?= =?UTF-8?q?2.1.0=20security-hardening=20release=20notes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the v2.1.0 line - the security-hardening and robustness release from the remote-pipeline audit - ahead of the tag, following the existing Keep-a-Changelog structure and emoji legend. - add the `PolySkills v2.1.0 | 2026-06-27` section (newest first) - cover the configurable TLS flags, the exception hierarchy, the extraction / size / pagination guards, the clean CLI errors, the hermetic test default, and the SHA-pinned + scanned CI - call out the non-breaking behaviour changes in a NOTE callout --- CHANGELOG.md | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6feeaf9..0c11cea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,75 @@ under `h3` tags, while the `micro` and "version identifiers" are listed under `h +### PolySkills v2.1.0 | 2026-06-27 + +The `v2.1.0` line is a security-hardening and robustness release born out of a full audit of the +remote download and extraction pipeline. It keeps the `v2.0.0` CLI surface intact while closing the +supply-chain gaps that let untrusted network traffic and crafted archives influence what landed on +disk, and it hardens the CI/CD pipeline against tag-mutation attacks. + +> [!NOTE] +> **Behaviour changes (non-breaking API).** TLS verification is now **on by default** (it was +> effectively disabled before); input validation raises `polyskills.error` exceptions +> (`ValidationError` still subclasses `ValueError`); and the CLI renders failures as a clean +> `[ERROR]` message with a non-zero exit instead of a traceback. The sub-command surface and existing +> call sites are unchanged. + +#### ✨ Feature Enhancements + + * **Configurable TLS verification.** Verification defaults to secure (`verify=True`, honouring + `REQUESTS_CA_BUNDLE` / `SSL_CERT_FILE`). Two new mutually exclusive flags tune it: `--no-verify` + disables it for a trusted network or behind a TLS-intercepting proxy, and `--request-cert` pins + the bundled `certifi` authorities for strict verification that fails closed when only an + interception certificate is available. + * **Domain exception hierarchy.** `polyskills.error` now defines `PolyskillsError` with + `ValidationError`, `RemoteError`, and `ExtractionError`, so callers can catch a specific failure + mode while a single `except PolyskillsError` still covers them all. + * **Identifiable client.** Every GitHub request now advertises a `polyskills/` + `User-Agent` header, and request timeouts are centralised as named constants. + +#### 🛠️ Security Hardening & Fixes + + * **TLS no longer disabled.** The hard-coded `verify=False` on every remote request — a + man-in-the-middle exposure — is removed in favour of the configurable policy above. + * **Archive path-traversal guard.** Each tarball member is resolved against the destination subtree + and rejected with `ExtractionError` if it escapes; non file/dir members (symlinks, devices) are + skipped on purpose. + * **Size caps.** The compressed download and the cumulative uncompressed size are bounded to defang + disk-exhaustion and decompression-bomb archives. + * **Validation hardened.** Argument checks raise exceptions instead of `assert` (which `python -O` + strips); URL path / ref segments are percent-encoded against query injection; and tag pagination + is capped to stop an endless `next`-link chain. + * **Clean CLI errors.** `main()` funnels expected failures into a concise `[ERROR]` message and a + non-zero exit; set `POLYSKILLS_DEBUG` to re-raise the full traceback. + * **Version & dependencies.** `__version__` is bumped to `v2.1.0` and the unused `packaging` runtime + dependency is dropped. + +#### 🧪 Testing & CI + + * **Hermetic-by-default suite.** The default `unittest discover` run is now fully offline; the + live-network tests are opt-in via `POLYSKILLS_RUN_LIVE=1` (with `POLYSKILLS_NO_VERIFY=1` for a + proxied network). + * **Security regression module (`test_security.py`).** Mocked-transport tests lock in TLS + forwarding, path-traversal rejection, the download / extraction size caps, the pagination cap, + and verify-flag resolution. + * **Supply-chain-hardened workflows.** Every GitHub Action is pinned to a commit SHA (the mutable + tag kept as a comment), a new `security.yml` runs `bandit` and `pip-audit`, and least-privilege + `permissions` are declared on the linting and release-build jobs. + +#### 📦 Installation + +```shell +$ pip install "polyskills==2.1.0" + +$ polyskills list https://github.com// skills --source extensions/skills +$ polyskills manager https://github.com// \ + --name sql-code-format \ + --destination ~/.claude/skills/sql-code-format \ + --request-cert \ + skills +``` + ### PolySkills v2.0.0 | 2026-06-07 The `v2.0.0` line redesigns `polyskills` from a "skills-only fetcher" into a portable, multi-library From 450fdfa64e4630a24147a865b60e07d7c45f4bd4 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 18:21:15 +0530 Subject: [PATCH 09/17] =?UTF-8?q?=E2=9C=A8=20feat(database):=20add=20recor?= =?UTF-8?q?ds.db=20path=20resolver=20and=20package=20scaffold?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lay the foundation for a SQLAlchemy-backed extension tracking database by introducing the `polyskills.database` sub-package and declaring the SQLAlchemy runtime dependency. This is the first slice of the tracker feature; the ORM models, write façade, and CLI wiring follow in later commits. Code follows the repository reST docstring style and passes flake8 against the project `.flake8` (88-column limit). - declare `sqlalchemy>=2.0` in `pyproject.toml` dependencies - add `polyskills/database/paths.py` with `default_database_path()`, resolving the OS-independent `~/.polyskills/records.db` via `Path.home()` and honouring a `POLYSKILLS_DB_PATH` override; path resolution is side-effect free so read-only callers never create an empty directory - add `polyskills/database/__init__.py` exposing the public surface and a guarded `SQLALCHEMY_AVAILABLE` probe so a missing dependency degrades tracking to a no-op instead of raising - verification: `python -m flake8 polyskills/database/` -> clean; resolver returns `~/.polyskills/records.db` and honours the override Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/database/__init__.py | 55 +++++++++++++++++++++ polyskills/database/paths.py | 87 +++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 143 insertions(+) create mode 100644 polyskills/database/__init__.py create mode 100644 polyskills/database/paths.py diff --git a/polyskills/database/__init__.py b/polyskills/database/__init__.py new file mode 100644 index 0000000..11801fb --- /dev/null +++ b/polyskills/database/__init__.py @@ -0,0 +1,55 @@ +# -*- encoding: utf-8 -*- + +""" +Local Tracking Database For The Polyskills Framework +==================================================== + +The :mod:`polyskills.database` sub-package owns the local SQLite +tracking database that records every extension (a skill, an agent, +etc.) fetched and flushed into a destination directory by the CLI +tool or a direct Python API caller. + +The database lives at a fixed, operating-system independent location +under the user home directory: + + ``~/.polyskills/records.db`` + +Persistence is implemented with the SQLAlchemy Object Relational +Mapper against a strictly normalised relational schema (see +:mod:`polyskills.database.models`) so the recorded facts - which +remote an extension came from, where it was installed, the resolved +commit SHA, and the first-fetched / last-updated timestamps - can be +queried without duplicated, denormalised columns. + +Every write performed through :mod:`polyskills.database.tracker` is +best-effort: a missing dependency, a read-only home directory, a +locked database, or any other fault is swallowed and surfaced as a +single per-process warning so the install path of the framework is +never broken by a tracking failure. + +:NOTE: SQLAlchemy is a declared runtime dependency, but its presence + is still probed at import time and exposed through + :data:`SQLALCHEMY_AVAILABLE`. When the dependency is absent every + tracking entry point degrades to a silent no-op instead of + raising :class:`ImportError`. +""" + +try: + import sqlalchemy as _sqlalchemy # noqa: F401 +except ImportError: # pragma: no cover - exercised only without the dep + SQLALCHEMY_AVAILABLE = False +else: + SQLALCHEMY_AVAILABLE = True + +from polyskills.database.paths import ( + DATABASE_FILENAME, ENVIRON_DB_PATH, POLYSKILLS_HOME, + default_database_path +) + +__all__ = [ + "SQLALCHEMY_AVAILABLE", + "DATABASE_FILENAME", + "ENVIRON_DB_PATH", + "POLYSKILLS_HOME", + "default_database_path", +] diff --git a/polyskills/database/paths.py b/polyskills/database/paths.py new file mode 100644 index 0000000..ac958b9 --- /dev/null +++ b/polyskills/database/paths.py @@ -0,0 +1,87 @@ +# -*- encoding: utf-8 -*- + +""" +Filesystem Path Resolver For The Polyskills Tracking Database +============================================================= + +Resolves the canonical on-disk location of the polyskills tracking +database. By design the database lives at a single, predictable path +under the user home directory so every installation of the framework - +regardless of the operating system - writes to the same source of +truth without the user having to learn an environment variable: + + ``~/.polyskills/records.db`` + +The location is derived from :meth:`pathlib.Path.home` which is fully +operating-system independent (it honours ``%USERPROFILE%`` on Windows +and ``$HOME`` on POSIX systems), so no platform branching is required. + +An optional ``POLYSKILLS_DB_PATH`` environment variable overrides the +default location. The override is primarily a testing and power-user +affordance; the value is expanded for both ``$VAR`` / ``%VAR%`` and +``~`` tokens to mirror the path-expansion contract used elsewhere in +the command-line interface. + +:NOTE: Path resolution is intentionally side-effect free. The parent + directory is created by the engine layer at connection time, not + here, so a read-only query never materialises an empty + ``~/.polyskills`` directory. +""" + +import os + +from pathlib import Path + +# ? canonical on-disk layout for the polyskills tracking database; the +# ? parent directory may also host future bookkeeping artefacts (for +# ? example lock files) so it is exposed as an importable constant. +POLYSKILLS_HOME : Path = Path.home() / ".polyskills" +DATABASE_FILENAME : str = "records.db" + +# ? optional override consulted before the default location is used; +# ? primarily exercised by the test-suite to redirect writes onto an +# ? isolated temporary file instead of the real user database. +ENVIRON_DB_PATH : str = "POLYSKILLS_DB_PATH" + + +def default_database_path() -> Path: + """ + Return the absolute path to the polyskills tracking database. + + The default location is ``~/.polyskills/records.db`` resolved + through :meth:`pathlib.Path.home` for operating-system + independence. When the ``POLYSKILLS_DB_PATH`` environment variable + is set its value takes precedence and is expanded for environment + variables and the ``~`` home token before being returned. + + The function performs no filesystem mutation: it neither creates + the parent directory nor the database file. Materialisation is the + responsibility of the engine layer so that read-only callers (for + example the ``polyskills records`` command) can resolve the path + without creating an empty ``~/.polyskills`` directory as a side + effect. + + **Example Usage** + + Resolve the default database path on any platform: + + .. code-block:: python + + from polyskills.database.paths import default_database_path + + print(default_database_path()) + >> /home/user/.polyskills/records.db # POSIX + >> C:\\Users\\user\\.polyskills\\records.db # Windows + + :rtype: pathlib.Path + :returns: Absolute path to the tracking database, either the + ``POLYSKILLS_DB_PATH`` override (when set and non-empty) or the + canonical ``~/.polyskills/records.db`` location. + """ + + override = os.environ.get(ENVIRON_DB_PATH) + if override: + expanded = os.path.expandvars(override) + return Path(expanded).expanduser().resolve() + + return (POLYSKILLS_HOME / DATABASE_FILENAME).resolve() diff --git a/pyproject.toml b/pyproject.toml index 368d180..c230291 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ classifiers = [ dependencies = [ "requests>=2.31", "packaging>=23.0", + "sqlalchemy>=2.0", ] [project.scripts] From f67a898e35078bcd2fd611b77b749f86f0862035 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 18:24:23 +0530 Subject: [PATCH 10/17] =?UTF-8?q?=E2=9C=A8=20feat(database):=20add=20norma?= =?UTF-8?q?lized=20ORM=20schema=20and=20engine=20bootstrap?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the persistence layer for the tracking database as a strictly normalised (third normal form) SQLAlchemy 2.0 typed-declarative schema plus the SQLite engine and schema bootstrap that backs it. Derived facts - first-fetched, last-updated, current commit SHA - are computed from the audit log rather than cached, so no fact is duplicated across rows. Code follows the repository reST docstring style and passes flake8 (88-column limit). - add `models.py` with seven tables: `sources`, `libraries`, `remotes`, `extensions`, `installations`, `environments`, and the append-only `fetch_events`, wired with foreign keys, natural-key unique constraints, indexes, and `ON DELETE CASCADE` on events - stamp every event with a timezone-aware UTC `occurred_at` via a single `_utc_now` default factory - add `engine.py` building an OS-independent `sqlite:///` URL from a POSIX path, creating `~/.polyskills` on demand, and applying `foreign_keys`, `journal_mode = WAL`, `synchronous`, and `busy_timeout` pragmas to every pooled connection - expose `build_session_factory()` composing engine creation and idempotent `create_all` schema materialisation - verification: `python -m flake8 polyskills/database/{models,engine}.py` -> clean; smoke test creates all 7 tables, confirms `foreign_keys=1`, `journal_mode=wal`, UTC timestamps, and cascade delete of events Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/database/engine.py | 169 ++++++++++++++++ polyskills/database/models.py | 369 ++++++++++++++++++++++++++++++++++ 2 files changed, 538 insertions(+) create mode 100644 polyskills/database/engine.py create mode 100644 polyskills/database/models.py diff --git a/polyskills/database/engine.py b/polyskills/database/engine.py new file mode 100644 index 0000000..b30abcc --- /dev/null +++ b/polyskills/database/engine.py @@ -0,0 +1,169 @@ +# -*- encoding: utf-8 -*- + +""" +Engine And Schema Bootstrap For The Polyskills Tracking Database +================================================================ + +Owns the construction of the SQLAlchemy :class:`~sqlalchemy.engine.Engine` +bound to the SQLite tracking database and the idempotent +materialisation of the schema defined in +:mod:`polyskills.database.models`. + +The module centralises three SQLite-specific concerns that the rest of +the package should not have to know about: + + * the operating-system independent ``sqlite:///`` URL is built from + a :class:`pathlib.Path` via its POSIX representation so a Windows + drive-letter path does not break the URL grammar; + * the parent directory of the database file is created on demand so + a first-ever write never fails on a missing ``~/.polyskills``; + * connection-level ``PRAGMA`` statements (foreign-key enforcement, + write-ahead logging, a busy timeout) are applied to every pooled + connection through a ``connect`` event listener. + +:NOTE: This module imports SQLAlchemy unconditionally and is therefore + only imported lazily by :mod:`polyskills.database.tracker` once the + :data:`polyskills.database.SQLALCHEMY_AVAILABLE` guard has passed. +""" + +from pathlib import Path + +from typing import Optional, Tuple + +from sqlalchemy import create_engine, event +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from polyskills.database.models import Base +from polyskills.database.paths import default_database_path + +# ? connection-level pragmas applied to every SQLite connection handed +# ? out of the pool; foreign keys must be enabled explicitly on SQLite +# ? for the ``ON DELETE CASCADE`` on fetch_events to take effect. +_SQLITE_PRAGMAS : Tuple[str, ...] = ( + "PRAGMA foreign_keys = ON", + "PRAGMA journal_mode = WAL", + "PRAGMA synchronous = NORMAL", + "PRAGMA busy_timeout = 5000", +) + + +def _apply_sqlite_pragmas(dbapi_connection, connection_record) -> None: + """ + Apply the package :data:`_SQLITE_PRAGMAS` to a freshly opened + DB-API connection. + + The function is registered as a ``connect`` event listener by + :func:`create_database_engine` so the pragmas are re-applied to + every connection the engine pool produces, not merely the first. + + :type dbapi_connection: sqlite3.Connection + :param dbapi_connection: The raw DB-API connection SQLAlchemy has + just opened. + + :type connection_record: sqlalchemy.pool._ConnectionRecord + :param connection_record: The pool bookkeeping record for the + connection; unused but required by the listener signature. + + :rtype: None + :returns: Nothing - the pragmas are applied as a side effect on + ``dbapi_connection``. + """ + + cursor = dbapi_connection.cursor() + try: + for pragma in _SQLITE_PRAGMAS: + cursor.execute(pragma) + finally: + cursor.close() + + +def create_database_engine( + database_path : Optional[Path] = None +) -> Engine: + """ + Build a SQLAlchemy :class:`~sqlalchemy.engine.Engine` bound to the + SQLite tracking database. + + The parent directory of the database file is created on demand so + the very first write does not fail on a missing ``~/.polyskills`` + directory. A ``connect`` event listener is attached so every + connection produced by the engine pool has the package pragmas + (foreign keys, WAL, busy timeout) applied. + + :type database_path: Optional[pathlib.Path] + :param database_path: Explicit database location. When omitted the + canonical :func:`polyskills.database.paths.default_database_path` + location (``~/.polyskills/records.db``) is used. + + :rtype: sqlalchemy.engine.Engine + :returns: A configured engine ready for schema materialisation and + session binding. The engine is created lazily and opens no + connection until first use. + """ + + path = database_path if database_path is not None \ + else default_database_path() + path.parent.mkdir(parents = True, exist_ok = True) + + engine = create_engine( + f"sqlite:///{path.as_posix()}", future = True + ) + event.listen(engine, "connect", _apply_sqlite_pragmas) + + return engine + + +def init_schema(engine : Engine) -> None: + """ + Materialise every table of the tracking schema on ``engine`` if it + is not already present. + + The call delegates to :meth:`sqlalchemy.schema.MetaData.create_all` + which issues ``CREATE TABLE IF NOT EXISTS`` for every model, so the + function is safe to call on every process start. + + :type engine: sqlalchemy.engine.Engine + :param engine: The engine whose bound database should receive the + schema. + + :rtype: None + :returns: Nothing - the schema is created as a side effect on the + database bound to ``engine``. + """ + + Base.metadata.create_all(engine) + + +def build_session_factory( + database_path : Optional[Path] = None +) -> Tuple[Engine, sessionmaker]: + """ + Construct an engine, materialise the schema, and return both the + engine and a bound :class:`~sqlalchemy.orm.sessionmaker`. + + This is the single entry point callers need: it composes + :func:`create_database_engine` and :func:`init_schema` so a caller + can open a ready-to-use :class:`~sqlalchemy.orm.Session` without + repeating the bootstrap. The engine is returned alongside the + factory so the caller may dispose of it explicitly when finished. + + :type database_path: Optional[pathlib.Path] + :param database_path: Explicit database location, forwarded to + :func:`create_database_engine`. Defaults to the canonical + ``~/.polyskills/records.db`` location when omitted. + + :rtype: Tuple[sqlalchemy.engine.Engine, sqlalchemy.orm.sessionmaker] + :returns: A two-tuple ``(engine, session_factory)`` where the + factory produces :class:`~sqlalchemy.orm.Session` objects bound + to the engine. + """ + + engine = create_database_engine(database_path) + init_schema(engine) + + factory = sessionmaker( + bind = engine, future = True, expire_on_commit = False + ) + + return engine, factory diff --git a/polyskills/database/models.py b/polyskills/database/models.py new file mode 100644 index 0000000..e3c6838 --- /dev/null +++ b/polyskills/database/models.py @@ -0,0 +1,369 @@ +# -*- encoding: utf-8 -*- + +""" +SQLAlchemy ORM Models For The Polyskills Tracking Database +========================================================== + +Defines the strictly normalised relational schema that backs the +``~/.polyskills/records.db`` tracking database. The schema is modelled +with the SQLAlchemy 2.0 typed declarative mapping (``Mapped`` / +``mapped_column``) and is decomposed to third normal form so that no +recorded fact is duplicated across rows: + + ``sources`` -> ``remotes`` -> ``extensions`` -> ``installations`` + -> ``fetch_events`` <- ``environments`` + ``libraries`` -> ``extensions`` + +The seven tables are: + + * :class:`Source` - lookup of supported remote providers, one + row per :class:`polyskills.remote.sources.ValidSources` member. + * :class:`Library` - lookup of extension kinds (``skills``, + ``agents``, ``commands``, ``hooks``). + * :class:`Remote` - one row per ``(provider, owner, repo)``. + * :class:`Extension` - one row per logical extension on a remote, + keyed by ``(remote, library, name, source_dir)``. + * :class:`Installation`- one row per physical install location on + this machine, keyed by the absolute destination path. + * :class:`Environment` - lookup of the host environment a fetch ran + in, keyed by ``(hostname, platform, python, polyskills)``. + * :class:`FetchEvent` - append-only audit row, one per fetch + attempt (success or failure), carrying the resolved commit SHA, + timestamp, and on-disk size of that attempt. + +Derived facts are never stored: the "first fetched" and "last +updated" timestamps and the "current commit SHA" of an installation +are computed from :class:`FetchEvent` rows via aggregation, keeping +the schema in third normal form. + +:NOTE: This module imports SQLAlchemy unconditionally. It is only + imported lazily by :mod:`polyskills.database.tracker` after the + :data:`polyskills.database.SQLALCHEMY_AVAILABLE` guard has + confirmed the dependency is importable. +""" + +from datetime import datetime, timezone + +from typing import List, Optional + +from sqlalchemy import ( + DateTime, ForeignKey, Integer, String, Text, UniqueConstraint +) +from sqlalchemy.orm import ( + DeclarativeBase, Mapped, mapped_column, relationship +) + + +def _utc_now() -> datetime: + """ + Return the current time as a timezone-aware UTC + :class:`datetime.datetime`. + + The helper is used as the default factory for + :attr:`FetchEvent.occurred_at` so every audit row is stamped in a + single, unambiguous time zone regardless of the host locale. + + :rtype: datetime.datetime + :returns: The current instant in the UTC time zone. + """ + + return datetime.now(timezone.utc) + + +class Base(DeclarativeBase): + """ + Declarative base shared by every model in the tracking schema. + + A dedicated subclass of :class:`sqlalchemy.orm.DeclarativeBase` + is used (rather than the legacy ``declarative_base()`` factory) + so the schema participates in SQLAlchemy 2.0 typed mappings and + its :attr:`metadata` can be materialised in one call by + :func:`polyskills.database.engine.init_schema`. + """ + + +class Source(Base): + """ + Lookup table of supported remote providers. + + Each row corresponds to one + :class:`polyskills.remote.sources.ValidSources` member (for + example ``GITHUB``) and exists so that :class:`Remote` rows + reference a provider by foreign key rather than repeating the + provider name and base URL on every remote. + """ + + __tablename__ = "sources" + + id : Mapped[int] = mapped_column(primary_key = True) + name : Mapped[str] = mapped_column( + String(64), unique = True, nullable = False + ) + base_url : Mapped[str] = mapped_column(String(255), nullable = False) + + remotes : Mapped[List["Remote"]] = relationship( + back_populates = "source" + ) + + +class Library(Base): + """ + Lookup table of extension kinds managed by the framework. + + The supported values mirror the CLI library selector - + ``skills``, ``agents``, ``commands``, ``hooks`` - and are + referenced by :class:`Extension` rows through a foreign key so the + kind is never repeated as a free-text column. + """ + + __tablename__ = "libraries" + + id : Mapped[int] = mapped_column(primary_key = True) + name : Mapped[str] = mapped_column( + String(32), unique = True, nullable = False + ) + + extensions : Mapped[List["Extension"]] = relationship( + back_populates = "library" + ) + + +class Remote(Base): + """ + A single remote repository identified by its provider, owner, and + repository slug. + + The natural key ``(source_id, owner, repository)`` is enforced by + a unique constraint so re-fetching from the same repository reuses + the existing row instead of inserting a duplicate. + + :type source_id: int + :param source_id: Foreign key into :class:`Source`. + + :type owner: str + :param owner: Repository owner slug, for example ``PyUtility``. + + :type repository: str + :param repository: Repository name slug, for example + ``polyskills``. + + :type url: str + :param url: The canonical remote URL exactly as supplied on the + command line. + """ + + __tablename__ = "remotes" + __table_args__ = ( + UniqueConstraint( + "source_id", "owner", "repository", + name = "uq_remote_provider_slug" + ), + ) + + id : Mapped[int] = mapped_column(primary_key = True) + source_id : Mapped[int] = mapped_column( + ForeignKey("sources.id"), nullable = False, index = True + ) + owner : Mapped[str] = mapped_column(String(255), nullable = False) + repository : Mapped[str] = mapped_column(String(255), nullable = False) + url : Mapped[str] = mapped_column(Text, nullable = False) + + source : Mapped["Source"] = relationship(back_populates = "remotes") + extensions : Mapped[List["Extension"]] = relationship( + back_populates = "remote" + ) + + +class Extension(Base): + """ + A logical extension hosted on a remote repository. + + The natural key ``(remote_id, library_id, name, source_dir)`` + uniquely identifies an extension and is enforced by a unique + constraint. The same extension installed into several destinations + is represented by one :class:`Extension` row and many + :class:`Installation` rows. + + :type remote_id: int + :param remote_id: Foreign key into :class:`Remote`. + + :type library_id: int + :param library_id: Foreign key into :class:`Library`. + + :type name: str + :param name: Leaf directory name of the extension on the remote. + + :type source_dir: str + :param source_dir: POSIX-style source directory on the remote the + extension was fetched from. + """ + + __tablename__ = "extensions" + __table_args__ = ( + UniqueConstraint( + "remote_id", "library_id", "name", "source_dir", + name = "uq_extension_natural_key" + ), + ) + + id : Mapped[int] = mapped_column(primary_key = True) + remote_id : Mapped[int] = mapped_column( + ForeignKey("remotes.id"), nullable = False, index = True + ) + library_id : Mapped[int] = mapped_column( + ForeignKey("libraries.id"), nullable = False, index = True + ) + name : Mapped[str] = mapped_column( + String(255), nullable = False, index = True + ) + source_dir : Mapped[str] = mapped_column(String(512), nullable = False) + + remote : Mapped["Remote"] = relationship( + back_populates = "extensions" + ) + library : Mapped["Library"] = relationship( + back_populates = "extensions" + ) + installations : Mapped[List["Installation"]] = relationship( + back_populates = "extension" + ) + + +class Installation(Base): + """ + A physical installation of an extension at one absolute path on + the local filesystem. + + The absolute destination path is the natural key and is enforced + unique so repeated fetches into the same directory append a new + :class:`FetchEvent` to the existing installation rather than + creating a duplicate location. + + :type extension_id: int + :param extension_id: Foreign key into :class:`Extension`. + + :type install_path: str + :param install_path: Absolute, resolved destination directory on + the local filesystem. + """ + + __tablename__ = "installations" + + id : Mapped[int] = mapped_column(primary_key = True) + extension_id : Mapped[int] = mapped_column( + ForeignKey("extensions.id"), nullable = False, index = True + ) + install_path : Mapped[str] = mapped_column( + Text, nullable = False, unique = True + ) + + extension : Mapped["Extension"] = relationship( + back_populates = "installations" + ) + events : Mapped[List["FetchEvent"]] = relationship( + back_populates = "installation", + cascade = "all, delete-orphan" + ) + + +class Environment(Base): + """ + Lookup table of host environments a fetch was executed in. + + Recording the environment in its own table - keyed by + ``(hostname, platform, python_version, polyskills_version)`` - + keeps these repeating strings out of the high-cardinality + :class:`FetchEvent` table while still allowing every event to be + attributed to the exact environment that produced it. + """ + + __tablename__ = "environments" + __table_args__ = ( + UniqueConstraint( + "hostname", "platform", "python_version", + "polyskills_version", name = "uq_environment_identity" + ), + ) + + id : Mapped[int] = mapped_column(primary_key = True) + hostname : Mapped[Optional[str]] = mapped_column(String(255)) + platform : Mapped[Optional[str]] = mapped_column(String(255)) + python_version : Mapped[Optional[str]] = mapped_column(String(64)) + polyskills_version : Mapped[Optional[str]] = mapped_column(String(64)) + + events : Mapped[List["FetchEvent"]] = relationship( + back_populates = "environment" + ) + + +class FetchEvent(Base): + """ + An append-only audit record of a single extension fetch attempt. + + One row is written per attempt - whether it succeeds or fails - so + the full history of an installation is preserved. The first-fetched + timestamp, the last-updated timestamp, and the current commit SHA + of an installation are all derived from these rows by aggregation + rather than being cached on :class:`Installation`. + + :type installation_id: int + :param installation_id: Foreign key into :class:`Installation`. + + :type environment_id: Optional[int] + :param environment_id: Foreign key into :class:`Environment`, or + ``None`` when the environment could not be captured. + + :type action: str + :param action: Concrete action taken - ``fetch``, ``overwrite``, + or ``merge`` - derived from the ``--exists`` policy. + + :type requested_version: str + :param requested_version: Tag, branch, or SHA requested by the + caller, for example ``master``. + + :type resolved_commit_sha: Optional[str] + :param resolved_commit_sha: The concrete commit SHA the requested + version resolved to, when it could be determined. + + :type status: str + :param status: Either ``success`` or ``failed``. + """ + + __tablename__ = "fetch_events" + + id : Mapped[int] = mapped_column(primary_key = True) + installation_id : Mapped[int] = mapped_column( + ForeignKey("installations.id", ondelete = "CASCADE"), + nullable = False, index = True + ) + environment_id : Mapped[Optional[int]] = mapped_column( + ForeignKey("environments.id"), nullable = True, index = True + ) + + action : Mapped[str] = mapped_column( + String(32), nullable = False + ) + requested_version : Mapped[str] = mapped_column( + String(255), nullable = False + ) + resolved_commit_sha : Mapped[Optional[str]] = mapped_column(String(64)) + status : Mapped[str] = mapped_column( + String(16), nullable = False, index = True + ) + error : Mapped[Optional[str]] = mapped_column(Text) + file_count : Mapped[Optional[int]] = mapped_column(Integer) + total_bytes : Mapped[Optional[int]] = mapped_column(Integer) + duration_ms : Mapped[Optional[int]] = mapped_column(Integer) + invoked_via : Mapped[str] = mapped_column( + String(16), nullable = False, default = "api" + ) + occurred_at : Mapped[datetime] = mapped_column( + DateTime, nullable = False, default = _utc_now, index = True + ) + + installation : Mapped["Installation"] = relationship( + back_populates = "events" + ) + environment : Mapped[Optional["Environment"]] = relationship( + back_populates = "events" + ) From 2952b8d573af243c62c343914dd4b4661f579972 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 18:31:06 +0530 Subject: [PATCH 11/17] =?UTF-8?q?=E2=9C=A8=20feat(database):=20add=20best-?= =?UTF-8?q?effort=20tracking=20write=20and=20query=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the public tracking façade that the framework uses to record extension fetches and read them back. Writing upserts the normalised provider, remote, extension, installation, and environment rows and appends one append-only event; reading reconstructs the derived first-fetched, last-updated, and current-commit facts from the audit log. Every entry point is best-effort - a missing dependency, a locked database, or any fault is swallowed and warned once so the install path is never broken. Heavy imports are deferred so importing the module never requires SQLAlchemy. Passes flake8 against the project `.flake8`. - add `tracker.py` with keyword-only `record_fetch()`, a cached per-path session factory, a generic `_get_or_create` honouring the natural keys, host-`Environment` resolution, and `list_installations` / `get_installation` read helpers that fold events into summaries - carry a process-wide invocation context (`cli` vs `api`) and a warn-once latch so a broken database does not flood stderr - export the tracking surface from `polyskills/database/__init__.py` - verification: CI hard gate `flake8 --select=E9,F63,F7,F82` -> 0; offline round-trip records two fetches of one extension into one installation (2 events, 1 source, 1 remote) plus a failure row, and derives the latest SHA, first-fetched, and last-updated correctly Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/database/__init__.py | 10 + polyskills/database/tracker.py | 586 ++++++++++++++++++++++++++++++++ 2 files changed, 596 insertions(+) create mode 100644 polyskills/database/tracker.py diff --git a/polyskills/database/__init__.py b/polyskills/database/__init__.py index 11801fb..c5a123b 100644 --- a/polyskills/database/__init__.py +++ b/polyskills/database/__init__.py @@ -45,6 +45,10 @@ DATABASE_FILENAME, ENVIRON_DB_PATH, POLYSKILLS_HOME, default_database_path ) +from polyskills.database.tracker import ( + get_installation, get_invocation_context, list_installations, + record_fetch, reset_cache_for_tests, set_invocation_context +) __all__ = [ "SQLALCHEMY_AVAILABLE", @@ -52,4 +56,10 @@ "ENVIRON_DB_PATH", "POLYSKILLS_HOME", "default_database_path", + "get_installation", + "get_invocation_context", + "list_installations", + "record_fetch", + "reset_cache_for_tests", + "set_invocation_context", ] diff --git a/polyskills/database/tracker.py b/polyskills/database/tracker.py new file mode 100644 index 0000000..7570626 --- /dev/null +++ b/polyskills/database/tracker.py @@ -0,0 +1,586 @@ +# -*- encoding: utf-8 -*- + +""" +Best-Effort Tracking Façade For The Polyskills Database +======================================================= + +Provides the high-level, best-effort API the rest of the framework +uses to record extension fetches into and read them back from the +SQLite tracking database. The module is the only public entry point +for tracking; callers never touch :mod:`polyskills.database.models` +or :mod:`polyskills.database.engine` directly. + +Two responsibilities live here: + + * **writing** - :func:`record_fetch` upserts the normalised + ``source`` / ``remote`` / ``extension`` / ``installation`` / + ``environment`` rows for one fetch attempt and appends a single + :class:`~polyskills.database.models.FetchEvent`. + * **reading** - :func:`list_installations` and + :func:`get_installation` reconstruct the derived facts + (first-fetched, last-updated, current commit SHA) from the audit + log so the ``polyskills records`` command can present them. + +Every public function is engineered to be a no-op on failure: a +missing SQLAlchemy dependency, a read-only home directory, a locked +database, or any other fault is swallowed and surfaced as a single +per-process :class:`RuntimeWarning`. The install path of the framework +is therefore never broken by a tracking fault. + +:NOTE: SQLAlchemy and the sibling :mod:`~polyskills.database.engine` + and :mod:`~polyskills.database.models` modules are imported lazily + inside the functions below so that merely importing this module + (for example from :mod:`polyskills.cli`) never requires the + dependency to be installed. +""" + +import platform as _platform +import socket +import warnings + +from pathlib import Path + +from typing import Any, Dict, List, Optional, Tuple + +from polyskills.database import SQLALCHEMY_AVAILABLE +from polyskills.database.paths import default_database_path + +# ? process-wide cache of (engine, session-factory) keyed by the +# ? resolved database path so repeated calls in one process reuse a +# ? single connection pool instead of rebuilding the engine each time. +_FACTORY_CACHE : Dict[str, Tuple[Any, Any]] = {} + +# ? single per-process latch so a broken database warns exactly once +# ? rather than flooding stderr on every fetch in a batch run. +_WARNED : bool = False + +# ? invocation context persisted into ``fetch_events.invoked_via`` so +# ? downstream analytics can split CLI usage from direct API usage. +_INVOCATION_CONTEXT : str = "api" +_VALID_CONTEXTS : Tuple[str, ...] = ("cli", "api") + + +def set_invocation_context(context : str) -> None: + """ + Record the invocation context stamped onto subsequent fetch + events. + + :type context: str + :param context: One of ``"cli"`` or ``"api"``. Any other value is + silently coerced to ``"api"`` so a caller typo never breaks + the tracker. + + :rtype: None + :returns: Nothing - the context is stored in a module-level + variable consulted by :func:`record_fetch`. + """ + + global _INVOCATION_CONTEXT + _INVOCATION_CONTEXT = ( + context if context in _VALID_CONTEXTS else "api" + ) + + +def get_invocation_context() -> str: + """ + Return the invocation context recorded by + :func:`set_invocation_context`. + + :rtype: str + :returns: Either ``"cli"`` or ``"api"``. + """ + + return _INVOCATION_CONTEXT + + +def _warn_once(message : str) -> None: + """ + Emit a single :func:`warnings.warn` per process. + + :type message: str + :param message: The human-readable warning text. + + :rtype: None + :returns: Nothing - the warning is emitted as a side effect at + most once per process lifetime. + """ + + global _WARNED + if _WARNED: + return None + _WARNED = True + warnings.warn(message, RuntimeWarning, stacklevel = 3) + + +def _session_factory(database_path : Optional[Path]) -> Optional[Any]: + """ + Return a cached :class:`~sqlalchemy.orm.sessionmaker` for + ``database_path``, building and bootstrapping the engine on first + use. + + The function is the single guarded gateway to the persistence + layer: it short-circuits when SQLAlchemy is unavailable and + swallows any engine-construction error, returning ``None`` so + every caller degrades to a no-op. + + :type database_path: Optional[pathlib.Path] + :param database_path: Explicit database location, or ``None`` for + the canonical ``~/.polyskills/records.db``. + + :rtype: Optional[sqlalchemy.orm.sessionmaker] + :returns: A bound session factory, or ``None`` when tracking is + unavailable or the engine could not be built. + """ + + if not SQLALCHEMY_AVAILABLE: + _warn_once( + "polyskills tracking disabled: SQLAlchemy is not installed" + ) + return None + + resolved = database_path or default_database_path() + key = str(resolved) + + cached = _FACTORY_CACHE.get(key) + if cached is not None: + return cached[1] + + try: + from polyskills.database.engine import build_session_factory + engine, factory = build_session_factory(resolved) + except Exception as exc: # pragma: no cover - defensive guard + _warn_once(f"polyskills tracking disabled (init failed): {exc!r}") + return None + + _FACTORY_CACHE[key] = (engine, factory) + return factory + + +def _get_or_create( + session : Any, model : Any, + defaults : Optional[Dict[str, Any]] = None, **keys : Any +) -> Any: + """ + Fetch the row of ``model`` matching ``keys`` or create it. + + The lookup uses only the natural-key columns in ``keys``; any + ``defaults`` are applied solely on creation so a re-fetch never + rewrites immutable columns. The new row is flushed so its primary + key is available to the caller immediately. + + :type session: sqlalchemy.orm.Session + :param session: The active session the row is queried and added + through. + + :type model: type + :param model: The mapped model class to query or instantiate. + + :type defaults: Optional[Dict[str, Any]] + :param defaults: Extra column values applied only when a new row + is created. + + :rtype: object + :returns: The existing or newly created mapped instance. + """ + + instance = session.query(model).filter_by(**keys).one_or_none() + if instance is not None: + return instance + + params = dict(keys) + if defaults: + params.update(defaults) + + instance = model(**params) + session.add(instance) + session.flush() + return instance + + +def _resolve_environment(session : Any) -> Any: + """ + Return the :class:`~polyskills.database.models.Environment` row + describing the current host, creating it on first encounter. + + The environment identity is the tuple of hostname, platform + descriptor, Python version, and polyskills version, mirroring the + unique constraint on the table. + + :type session: sqlalchemy.orm.Session + :param session: The active session used for the get-or-create. + + :rtype: polyskills.database.models.Environment + :returns: The environment row matching the current host. + """ + + from polyskills.database.models import Environment + + try: + from polyskills import __version__ as polyskills_version + except Exception: # pragma: no cover - defensive guard + polyskills_version = None + + return _get_or_create( + session, Environment, + hostname = socket.gethostname(), + platform = _platform.platform(), + python_version = _platform.python_version(), + polyskills_version = polyskills_version, + ) + + +def record_fetch( + *, + source_name : str, + source_base_url : str, + owner : str, + repository : str, + remote_url : str, + library : str, + name : str, + source_dir : str, + install_path : str, + requested_version : str, + action : str, + status : str, + resolved_commit_sha : Optional[str] = None, + file_count : Optional[int] = None, + total_bytes : Optional[int] = None, + duration_ms : Optional[int] = None, + error : Optional[str] = None, + invoked_via : Optional[str] = None, + database_path : Optional[Path] = None, +) -> Optional[int]: + """ + Record one extension fetch attempt - success or failure - into the + tracking database. + + The call upserts the normalised provider, remote, extension, + installation, and environment rows and appends one + :class:`~polyskills.database.models.FetchEvent`. Every argument is + keyword-only so the long parameter list cannot be supplied + positionally by mistake. The whole operation is best-effort: any + failure is swallowed, warned once, and reported as a ``None`` + return so the caller's install path is never disrupted. + + :type source_name: str + :param source_name: Provider enumeration name, for example + ``"GITHUB"``. + + :type source_base_url: str + :param source_base_url: Provider base URL stored on first sight of + the provider. + + :type owner: str + :param owner: Repository owner slug. + + :type repository: str + :param repository: Repository name slug. + + :type remote_url: str + :param remote_url: Canonical remote URL as supplied by the caller. + + :type library: str + :param library: Extension kind - ``"skills"``, ``"agents"``, etc. + + :type name: str + :param name: Extension leaf name. + + :type source_dir: str + :param source_dir: POSIX source directory on the remote. + + :type install_path: str + :param install_path: Absolute, resolved destination directory. + + :type requested_version: str + :param requested_version: Requested tag, branch, or SHA. + + :type action: str + :param action: Concrete action - ``"fetch"``, ``"overwrite"``, or + ``"merge"``. + + :type status: str + :param status: ``"success"`` or ``"failed"``. + + **Keyword Arguments** + + The remaining keyword-only arguments enrich the event row and are + all optional, defaulting to ``None`` when the caller cannot + determine them. + + * **resolved_commit_sha** (*Optional[str]*): Concrete commit + SHA the requested version resolved to. + + * **file_count** (*Optional[int]*): Files written to the + destination on a successful fetch. + + * **total_bytes** (*Optional[int]*): Total on-disk size of the + written files. + + * **duration_ms** (*Optional[int]*): Wall-clock duration of + the attempt in milliseconds. + + * **error** (*Optional[str]*): Error description recorded on a + failed attempt. + + * **invoked_via** (*Optional[str]*): Override for the recorded + invocation context; defaults to the process context. + + * **database_path** (*Optional[pathlib.Path]*): Explicit + database location, primarily for tests. + + :rtype: Optional[int] + :returns: The primary key of the inserted ``fetch_events`` row, or + ``None`` when tracking was unavailable or the write failed. + """ + + factory = _session_factory(database_path) + if factory is None: + return None + + from polyskills.database.models import ( + Extension, FetchEvent, Installation, Library, Remote, Source + ) + + context = invoked_via if invoked_via in _VALID_CONTEXTS \ + else get_invocation_context() + + try: + with factory() as session: + source = _get_or_create( + session, Source, + defaults = {"base_url": source_base_url}, + name = source_name, + ) + lib = _get_or_create(session, Library, name = library) + remote = _get_or_create( + session, Remote, defaults = {"url": remote_url}, + source_id = source.id, owner = owner, + repository = repository, + ) + extension = _get_or_create( + session, Extension, + remote_id = remote.id, library_id = lib.id, + name = name, source_dir = source_dir, + ) + installation = _get_or_create( + session, Installation, + defaults = {"extension_id": extension.id}, + install_path = install_path, + ) + if installation.extension_id != extension.id: + installation.extension_id = extension.id + + environment = _resolve_environment(session) + + event = FetchEvent( + installation_id = installation.id, + environment_id = environment.id, + action = action, + requested_version = requested_version, + resolved_commit_sha = resolved_commit_sha, + status = status, + error = error, + file_count = file_count, + total_bytes = total_bytes, + duration_ms = duration_ms, + invoked_via = context, + ) + session.add(event) + session.commit() + return int(event.id) + except Exception as exc: + _warn_once(f"polyskills tracker record_fetch failed: {exc!r}") + return None + + +def _summarise_events(events : List[Any]) -> Dict[str, Any]: + """ + Reduce an installation's :class:`FetchEvent` rows to the derived + facts presented by the read API. + + :type events: List[polyskills.database.models.FetchEvent] + :param events: All events belonging to one installation, in any + order. + + :rtype: Dict[str, Any] + :returns: A mapping with the first-fetched and last-updated + timestamps (successful events only), the latest commit SHA and + requested version, the latest status, and the total attempt + count. + """ + + ordered = sorted(events, key = lambda event : event.occurred_at) + successes = [ + event for event in ordered if event.status == "success" + ] + latest = ordered[-1] if ordered else None + latest_success = successes[-1] if successes else None + + return { + "first_fetched_at": successes[0].occurred_at if successes else None, + "last_updated_at": latest_success.occurred_at + if latest_success else None, + "resolved_commit_sha": latest_success.resolved_commit_sha + if latest_success else None, + "requested_version": latest.requested_version if latest else None, + "last_status": latest.status if latest else None, + "fetch_count": len(ordered), + } + + +def list_installations( + database_path : Optional[Path] = None +) -> List[Dict[str, Any]]: + """ + Return one summary row per tracked installation. + + Each summary joins the installation to its extension, remote, and + library and folds the audit log into the derived first-fetched, + last-updated, and current-commit facts. The function is read-only + and best-effort: when tracking is unavailable, the database does + not yet exist, or a query fails, an empty list is returned. + + :type database_path: Optional[pathlib.Path] + :param database_path: Explicit database location, or ``None`` for + the canonical location. + + :rtype: List[Dict[str, Any]] + :returns: A list of summary mappings sorted by extension name then + install path, each describing one installation. + """ + + factory = _session_factory(database_path) + if factory is None: + return [] + + from polyskills.database.models import Installation + + try: + with factory() as session: + installations = session.query(Installation).all() + rows : List[Dict[str, Any]] = [] + for installation in installations: + extension = installation.extension + remote = extension.remote + summary = _summarise_events(installation.events) + summary.update({ + "name": extension.name, + "library": extension.library.name, + "owner": remote.owner, + "repository": remote.repository, + "remote_url": remote.url, + "source_dir": extension.source_dir, + "install_path": installation.install_path, + }) + rows.append(summary) + except Exception as exc: + _warn_once(f"polyskills tracker list_installations failed: {exc!r}") + return [] + + return sorted( + rows, key = lambda row : (row["name"], row["install_path"]) + ) + + +def get_installation( + name : str, database_path : Optional[Path] = None +) -> List[Dict[str, Any]]: + """ + Return the detailed records for every installation of the + extension called ``name``. + + The detail extends the :func:`list_installations` summary with the + full chronological event history of each installation so the + ``polyskills records --name`` view can show every fetch attempt. + + :type name: str + :param name: Extension leaf name to look up. + + :type database_path: Optional[pathlib.Path] + :param database_path: Explicit database location, or ``None`` for + the canonical location. + + :rtype: List[Dict[str, Any]] + :returns: A list of detail mappings (one per matching + installation), each carrying an ``events`` list of per-attempt + mappings. Empty when no match is found or tracking is + unavailable. + """ + + factory = _session_factory(database_path) + if factory is None: + return [] + + from polyskills.database.models import Extension, Installation + + try: + with factory() as session: + installations = ( + session.query(Installation) + .join(Extension) + .filter(Extension.name == name) + .all() + ) + details : List[Dict[str, Any]] = [] + for installation in installations: + extension = installation.extension + remote = extension.remote + summary = _summarise_events(installation.events) + summary.update({ + "name": extension.name, + "library": extension.library.name, + "owner": remote.owner, + "repository": remote.repository, + "remote_url": remote.url, + "source_dir": extension.source_dir, + "install_path": installation.install_path, + "events": [ + { + "occurred_at": event.occurred_at, + "action": event.action, + "status": event.status, + "requested_version": event.requested_version, + "resolved_commit_sha": + event.resolved_commit_sha, + "file_count": event.file_count, + "total_bytes": event.total_bytes, + "duration_ms": event.duration_ms, + "invoked_via": event.invoked_via, + "error": event.error, + } + for event in sorted( + installation.events, + key = lambda event : event.occurred_at + ) + ], + }) + details.append(summary) + except Exception as exc: + _warn_once(f"polyskills tracker get_installation failed: {exc!r}") + return [] + + return details + + +def reset_cache_for_tests() -> None: + """ + Dispose every cached engine and clear the factory cache. + + Used exclusively by the test-suite between cases that point the + tracker at different temporary databases so a stale engine is + never reused across an isolated fixture. + + :rtype: None + :returns: Nothing - the module-level cache is emptied as a side + effect. + """ + + global _WARNED + for engine, _factory in _FACTORY_CACHE.values(): + try: + engine.dispose() + except Exception: # pragma: no cover - defensive guard + pass + _FACTORY_CACHE.clear() + _WARNED = False From b663f028f8c67054a09b97372057c4879aeedf79 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 18:31:28 +0530 Subject: [PATCH 12/17] =?UTF-8?q?=F0=9F=9B=A0=EF=B8=8F=20feat(remote):=20a?= =?UTF-8?q?dd=20resolveCommit=20ref-to-SHA=20resolver=20for=20tracking?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Give the source managers a way to resolve a tag, branch, or SHA to the concrete commit SHA it points at so the tracking database can record the exact commit a fetch landed on. The base manager returns `None` (resolution is never required for a fetch), and the GitHub manager overrides it using the commits REST endpoint with the `application/vnd.github.sha` media type for a cheap plain-text reply. Both follow the file's existing camelCase method style and reST docstrings. - add a concrete `SourceManager.resolveCommit()` default returning `None` so every provider degrades gracefully - override `GithubManager.resolveCommit()` calling `GET /repos/{owner}/{repo}/commits/{ref}`; any transport or HTTP error yields `None` so a tracking lookup never raises into the fetch - verification: CI hard gate `flake8 --select=E9,F63,F7,F82` -> 0; `import polyskills.remote.sources` ok and `list` dispatch mode intact Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/remote/sources.py | 83 +++++++++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/polyskills/remote/sources.py b/polyskills/remote/sources.py index b3493a2..8c51682 100644 --- a/polyskills/remote/sources.py +++ b/polyskills/remote/sources.py @@ -271,6 +271,34 @@ def get(self, remote : str, mode : str = "tags", **kwargs) -> Any: return methods[mode]() # return the underlying assets + def resolveCommit( + self, remote : str, version : str = "master", **kwargs + ) -> Optional[str]: + """ + Resolve ``version`` (a tag, branch, or SHA) to the concrete + commit SHA it points at on the remote. + + The base implementation returns ``None``; providers that can + cheaply resolve a reference to a commit SHA override this so + the tracking database can record the exact commit a fetch + landed on. Returning ``None`` is always acceptable and simply + leaves the recorded SHA empty - resolution is never required + for a fetch to succeed. + + :type remote: str + :param remote: Remote URL forwarded to the provider. + + :type version: str + :param version: The tag, branch, or SHA to resolve. + + :rtype: Optional[str] + :returns: The resolved commit SHA, or ``None`` when it cannot + be determined. + """ + + return None + + @abc.abstractmethod def _getTags( self, remote : str, prefix : Optional[str] = None @@ -446,10 +474,63 @@ def _getTags( tags.append(item["name"]) remote_url = response.links.get("next", {}).get("url") - + return tags + def resolveCommit( + self, remote : str, version : str = "master", **kwargs + ) -> Optional[str]: + """ + Resolve ``version`` to a commit SHA using the GitHub commits + REST endpoint ``GET /repos/{owner}/{repository}/commits/{ref}``. + + The request sets the ``Accept: application/vnd.github.sha`` + media type so the endpoint returns the 40-character SHA as a + plain-text body instead of the full commit payload, keeping + the lookup cheap. The call is best-effort: any transport or + HTTP error yields ``None`` so a tracking lookup never raises + into the fetch path. + + :type remote: str + :param remote: GitHub remote URL. + + :type version: str + :param version: Tag, branch, or SHA to resolve, defaults to + ``master``. + + :rtype: Optional[str] + :returns: The resolved commit SHA, or ``None`` on any failure. + """ + + timeout = kwargs.get("timeout", 30) + + try: + owner, repository = self.getSlug(remote = remote) + except ValueError: + return None + + uri = ( + "https://api.github.com/repos/" + "{owner}/{repository}/commits/{ref}" + ).format(owner = owner, repository = repository, ref = version) + + headers = dict(self.headers) + headers["accept"] = "application/vnd.github.sha" + + try: + response = requests.get( + uri, verify = False, + headers = headers, timeout = timeout + ) + response.raise_for_status() + except requests.RequestException: + return None + + sha = response.text.strip() + return sha or None + + def _getExtensions( self, remote : str, name : str, source : Path, destination : Path, version : str = "master", From 2c39666ad3bec871e0a16921c6efb98635eb3dd8 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 19:23:52 +0530 Subject: [PATCH 13/17] =?UTF-8?q?=E2=9C=A8=20feat(cli):=20track=20fetches,?= =?UTF-8?q?=20add=20--no-tracking=20flag=20and=20records=20command?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the tracking database into the command-line interface so every `polyskills manager` fetch is recorded by default and can be inspected later. Tracking is wrapped end-to-end so a database or network fault never alters the fetch outcome or the process exit status, honouring the framework's best-effort contract. Follows the file's existing camelCase helper style and reST docstrings; the CI lint gate (`flake8 --select=E9,F63,F7,F82`) stays at zero. - add a global-style `--no-tracking` flag on the `manager` subcommand that skips the database write for that invocation - wrap the `manager` dispatch to time the fetch and record a success or failure event via `_recordFetch`, resolving the commit SHA with `manager.resolveCommit` and the on-disk footprint via `_summariseDestination` - add a read-only `records` subcommand (`--name`, `--db`) that lists tracked installations or shows one extension's full fetch history, and never creates the database when it is absent - set the `cli` invocation context before dispatch so events are attributed to CLI usage - verification: full offline functional run (network mocked) confirms tracked install records the SHA, `--no-tracking` skips the write, and read on an absent DB does not create it; existing `polyskills.tests.test_cli` -> 27 tests OK with the real user database left untouched Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/cli.py | 338 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 331 insertions(+), 7 deletions(-) diff --git a/polyskills/cli.py b/polyskills/cli.py index 3bc38cb..2f5b0cf 100644 --- a/polyskills/cli.py +++ b/polyskills/cli.py @@ -20,10 +20,11 @@ import os import sys +import time import argparse from pathlib import Path -from typing import Callable, Optional, Union +from typing import Any, Callable, Dict, Optional, Tuple, Union def _expandUserPath(value : Union[str, Path]) -> Path: @@ -59,6 +60,10 @@ def _expandUserPath(value : Union[str, Path]) -> Path: from polyskills.remote.sources import ( GithubManager, SourceControl, SourceManager, ValidSources ) +from polyskills.database import ( + default_database_path, get_installation, list_installations, + record_fetch, set_invocation_context +) # ? registry mapping each supported remote enumeration to its concrete # ? :class:`SourceManager` factory; extending the framework with a new @@ -68,6 +73,42 @@ def _expandUserPath(value : Union[str, Path]) -> Path: } +def _summariseDestination(destination : Path) -> Tuple[int, int]: + """ + Walk ``destination`` and return ``(file_count, total_bytes)`` so a + successful fetch can be recorded with its on-disk footprint. + + The helper is best-effort: any :class:`OSError` raised while + iterating the tree is swallowed and the partial counts gathered so + far are returned, preserving the "tracking is never fatal" + contract of the command-line interface. + + :type destination: pathlib.Path + :param destination: Directory whose files are counted and sized. + + :rtype: Tuple[int, int] + :returns: A two-tuple ``(file_count, total_bytes)`` of the regular + files found beneath ``destination``. + """ + + file_count = 0 + total_bytes = 0 + + try: + for path in Path(destination).rglob("*"): + if not path.is_file(): + continue + file_count += 1 + try: + total_bytes += path.stat().st_size + except OSError: + continue + except OSError: + pass + + return file_count, total_bytes + + def buildParser() -> argparse.ArgumentParser: """ Constructs the :func:`argparse.parser` for the :mod:`polyskills` @@ -194,6 +235,36 @@ def buildRemoteControls() -> argparse.ArgumentParser: ]) ) + # ? Creating Subparsers:: RECORDS - Inspect the Tracking Database + records = subparser.add_parser( + "records", help = ( + "Inspect the local tracking database " + "(~/.polyskills/records.db) that records every extension " + "fetched on this system - where it is installed, the " + "resolved commit SHA, and when it was first fetched and " + "last updated. Without '--name' every tracked installation " + "is listed; with '--name' the full fetch history of the " + "matching extension is shown. This command is read-only " + "and never creates the database." + ) + ) + + records.add_argument( + "-n", "--name", type = str, default = None, metavar = "", help = ( + "Show the detailed fetch history for the extension with " + "this leaf name (e.g., 'sql-code-format') instead of the " + "summary listing of every tracked installation." + ) + ) + + records.add_argument( + "--db", type = str, default = None, metavar = "", help = ( + "Path to the tracking database to read, defaults to " + "'~/.polyskills/records.db'. Primarily an advanced or " + "testing affordance." + ) + ) + # shared remote arguments across sub-parsers remoteControls = buildRemoteControls() @@ -281,6 +352,18 @@ def buildRemoteControls() -> argparse.ArgumentParser: ) ) + manager.add_argument( + "--no-tracking", action = "store_true", default = False, + help = ( + "Disable recording this installation in the local tracking " + "database (~/.polyskills/records.db). By default every " + "fetch - successful or failed - is tracked so the install " + "location, resolved commit SHA, and first-fetched / " + "last-updated timestamps are retained; pass this flag to " + "skip the database write entirely for this invocation." + ) + ) + # ? Create a Sub-Parser for the ``manager`` Parser for Libraries # ? {``skills``, ``agents``, ...} for different actions libraries = manager.add_subparsers( @@ -394,6 +477,218 @@ def listExtensions( ) +def _recordFetch( + args : argparse.Namespace, control : SourceControl, + status : str, error : Optional[str], duration_ms : int +) -> None: + """ + Record one ``manager`` fetch attempt in the tracking database. + + The helper resolves the remote slug, the provider metadata, the + resolved commit SHA, and the on-disk footprint, then delegates to + :func:`polyskills.database.record_fetch`. It is strictly + best-effort: every step is wrapped so a tracking failure - a + network error while resolving the SHA, an unreadable destination, + or a database fault - never propagates into the CLI exit path. + + :type args: argparse.Namespace + :param args: Parsed ``manager`` namespace carrying the remote, + name, library, source, destination, version, and exists policy. + + :type control: SourceControl + :param control: Source control built from ``--pagination`` and + ``--token``, reused to resolve the provider and commit SHA. + + :type status: str + :param status: Either ``"success"`` or ``"failed"``. + + :type error: Optional[str] + :param error: Error description recorded on a failed attempt. + + :type duration_ms: int + :param duration_ms: Wall-clock duration of the attempt in + milliseconds. + + :rtype: None + :returns: Nothing - the row is written as a best-effort side + effect; failures are swallowed. + """ + + try: + manager = _resolveManager(remote = args.remote, control = control) + owner, repository = manager.getSlug(remote = args.remote) + + commit_sha : Optional[str] = None + file_count : Optional[int] = None + total_bytes : Optional[int] = None + + if status == "success": + commit_sha = manager.resolveCommit(args.remote, args.version) + file_count, total_bytes = _summariseDestination( + args.destination + ) + + action = "fetch" if args.exists == "fail" else args.exists + + record_fetch( + source_name = manager.source.name, + source_base_url = manager.source.value, + owner = owner, repository = repository, + remote_url = args.remote, + library = args.library, name = args.name, + source_dir = Path(str(args.source)).as_posix(), + install_path = str(Path(args.destination).resolve()), + requested_version = args.version, + action = action, status = status, + resolved_commit_sha = commit_sha, + file_count = file_count, total_bytes = total_bytes, + duration_ms = duration_ms, error = error, + ) + except Exception: + # ! tracking is strictly best-effort; a failure here must never + # ! break the install path or alter the CLI exit status. + pass + + +def _formatTimestamp(value : Any) -> str: + """ + Render a stored timestamp for display, or ``n/a`` when absent. + + :type value: Any + :param value: A :class:`datetime.datetime` (or ``None``) read back + from the tracking database. + + :rtype: str + :returns: A ``YYYY-MM-DD HH:MM:SS`` string, ``n/a`` for ``None``, + or the raw string form for any unexpected value. + """ + + if value is None: + return "n/a" + try: + return value.strftime("%Y-%m-%d %H:%M:%S") + except (AttributeError, ValueError): + return str(value) + + +def _formatRecordSummary(index : int, row : Dict[str, Any]) -> str: + """ + Render one installation summary block for the ``records`` listing. + + :type index: int + :param index: Zero-based position used to build the display number. + + :type row: Dict[str, Any] + :param row: One summary mapping from + :func:`polyskills.database.list_installations`. + + :rtype: str + :returns: A multi-line, indented summary block for the + installation. + """ + + number = str(index + 1).zfill(2) + sha = row.get("resolved_commit_sha") or "n/a" + + return "\n".join([ + f" >> {number}. {row['name']} [{row['library']}]", + f" remote : {row['remote_url']} ({row['source_dir']})", + f" path : {row['install_path']}", + f" commit : {sha} (requested {row['requested_version']})", + f" history : first {_formatTimestamp(row['first_fetched_at'])}" + f" | last {_formatTimestamp(row['last_updated_at'])}" + f" | {row['fetch_count']} fetch(es), last {row['last_status']}", + ]) + + +def _formatRecordDetail(detail : Dict[str, Any]) -> str: + """ + Render a detailed installation block with its full event history. + + :type detail: Dict[str, Any] + :param detail: One detail mapping from + :func:`polyskills.database.get_installation`, including its + chronological ``events`` list. + + :rtype: str + :returns: A multi-line block describing the installation and every + recorded fetch attempt against it. + """ + + lines = [ + f">> {detail['name']} [{detail['library']}]", + f" remote : {detail['remote_url']} ({detail['source_dir']})", + f" path : {detail['install_path']}", + f" commit : {detail.get('resolved_commit_sha') or 'n/a'}", + f" first : {_formatTimestamp(detail['first_fetched_at'])}", + f" last : {_formatTimestamp(detail['last_updated_at'])}", + f" events ({len(detail['events'])}):", + ] + + for event in detail["events"]: + sha = event.get("resolved_commit_sha") or "n/a" + lines.append( + f" - {_formatTimestamp(event['occurred_at'])}" + f" {event['status']:7} {event['action']:9}" + f" {event['requested_version']} -> {sha}" + f" ({event['invoked_via']})" + ) + if event.get("error"): + lines.append(f" error: {event['error']}") + + return "\n".join(lines) + + +def _showRecords(name : Optional[str], db : Optional[str]) -> None: + """ + Render the contents of the tracking database to the terminal. + + Without ``name`` a per-installation summary is printed; with + ``name`` the full chronological fetch history of every matching + installation is shown. The command is read-only: when the database + file does not exist it reports the fact and returns without + creating it. + + :type name: Optional[str] + :param name: Extension leaf name to detail, or ``None`` to list + every tracked installation. + + :type db: Optional[str] + :param db: Explicit database path, or ``None`` for the canonical + ``~/.polyskills/records.db``. + + :rtype: None + :returns: Nothing - the records are printed as a side effect. + """ + + database_path = _expandUserPath(db) if db else default_database_path() + + if not database_path.exists(): + print(f"No tracking database found at `{database_path}`.") + print("Fetch an extension with 'polyskills manager ...' first.") + return None + + if name: + details = get_installation(name, database_path = database_path) + if not details: + print(f"No tracked installation found for `{name}`.") + return None + + for detail in details: + print(_formatRecordDetail(detail)) + return None + + rows = list_installations(database_path = database_path) + if not rows: + print(f"No tracked installations in `{database_path}`.") + return None + + print(f"Tracked installations ({len(rows)}):") + for idx, row in enumerate(rows): + print(_formatRecordSummary(idx, row)) + return None + + def main() -> None: """ Entry point for CLI tool for :mod:`polyskills` that parses the @@ -439,6 +734,12 @@ def main() -> None: print(f"\t>> {str(idx + 1).zfill(2)}. {name}") return None + # ? dispatch the 'records' subcommand: read-only inspection of the + # ? local tracking database; never touches a remote source. + if args.command == "records": + _showRecords(name = args.name, db = args.db) + return None + # ? set default source, destination directory based on library # ? expanding ``~`` and environment variables here lets users # ? supply familiar shell-style paths like @@ -456,12 +757,35 @@ def main() -> None: pagination = args.pagination, token = args.token ) - get( - remote = args.remote, name = args.name, - source = Path(args.source), destination = args.destination, - version = args.version, exists = args.exists, - control = control, - ) + # ? record every fetch in the local tracking database unless the + # ? caller opted out with ``--no-tracking``; the recording is + # ? wrapped so a database fault never alters the fetch outcome or + # ? the process exit status. + tracking = not getattr(args, "no_tracking", False) + if tracking: + set_invocation_context("cli") + + started = time.monotonic() + try: + get( + remote = args.remote, name = args.name, + source = Path(args.source), destination = args.destination, + version = args.version, exists = args.exists, + control = control, + ) + except Exception as exc: + if tracking: + _recordFetch( + args, control, status = "failed", error = repr(exc), + duration_ms = int((time.monotonic() - started) * 1000), + ) + raise + else: + if tracking: + _recordFetch( + args, control, status = "success", error = None, + duration_ms = int((time.monotonic() - started) * 1000), + ) return None From 29f3788c72f5b354031c85f055d97bb46d595426 Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 19:25:08 +0530 Subject: [PATCH 14/17] =?UTF-8?q?=F0=9F=92=A3=20refactor(db):=20drop=20sup?= =?UTF-8?q?erseded=20raw-sqlite=20tracker=20package?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the original stdlib-`sqlite3` tracking package now that the SQLAlchemy-backed `polyskills.database` package supersedes it. The old package wrote a denormalised `~/.polyskills/polyskills.db` with no ORM, which the new strictly-normalised `records.db` schema replaces in full. The development history of the original implementation is retained: its commit (9fc6469) remains a reachable ancestor through the earlier merge, so nothing is lost from `git log`. - delete `polyskills/db/{__init__,paths,schema,tracker}.py` - delete `polyskills/tests/test_tracker.py` which tested that package - no production module referenced `polyskills.db`; the only references were internal to the package and its own test - verification: `import polyskills`, `polyskills.cli`, `polyskills.remote.sources`, and `polyskills.database` all import cleanly after removal Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/db/__init__.py | 56 ---- polyskills/db/paths.py | 59 ---- polyskills/db/schema.py | 143 --------- polyskills/db/tracker.py | 489 ------------------------------- polyskills/tests/test_tracker.py | 394 ------------------------- 5 files changed, 1141 deletions(-) delete mode 100644 polyskills/db/__init__.py delete mode 100644 polyskills/db/paths.py delete mode 100644 polyskills/db/schema.py delete mode 100644 polyskills/db/tracker.py delete mode 100644 polyskills/tests/test_tracker.py diff --git a/polyskills/db/__init__.py b/polyskills/db/__init__.py deleted file mode 100644 index 7a92230..0000000 --- a/polyskills/db/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# -*- encoding: utf-8 -*- - -""" -Local Tracking Database for the Polyskills Framework -===================================================== - -The :mod:`polyskills.db` sub-package owns the local SQLite tracking -database used by the framework to record every extension that is -fetched, downloaded, and flushed into a destination directory by -either the CLI tool or a direct Python API caller. - -The database lives at a fixed location under the user home directory: - - ``~/.polyskills/polyskills.db`` - -The database file is created lazily on the very first import of the -:mod:`polyskills` package and is then re-used by every subsequent -import - the schema bootstrap is idempotent and uses -``CREATE TABLE IF NOT EXISTS`` for every table. - -Public Surface --------------- - -The sub-package exposes a small, deliberately narrow API so the rest -of the framework only ever has to learn three names: - - * :func:`get_tracker` - returns a process-wide :class:`Tracker` - singleton; the first call also runs :func:`apply_schema`. - * :func:`set_invocation_context` - records whether the calling - frame is the CLI ``main()`` or a direct library API caller; the - value is persisted into ``events.invoked_via`` so downstream - analytics can split CLI from API usage. - * :class:`Tracker` - the concrete object returned by - :func:`get_tracker`; offers ``record_start``, ``record_success``, - and ``record_failure``. - -:NOTE: Every public method of :class:`Tracker` is engineered to be a -best-effort no-op on failure - a broken database, a read-only home -directory, or a permission error never raises into the caller. The -install path is therefore guaranteed to succeed (or fail for its own -reasons) regardless of the database health. -""" - -from polyskills.db.paths import resolve_db_path -from polyskills.db.schema import apply_schema -from polyskills.db.tracker import ( - Tracker, get_tracker, set_invocation_context -) - -__all__ = [ - "Tracker", - "apply_schema", - "get_tracker", - "resolve_db_path", - "set_invocation_context", -] diff --git a/polyskills/db/paths.py b/polyskills/db/paths.py deleted file mode 100644 index bcd30cd..0000000 --- a/polyskills/db/paths.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- encoding: utf-8 -*- - -""" -Filesystem Path Resolver for the Polyskills Tracking Database - -Resolves the canonical on-disk location of the polyskills tracking -SQLite database. The location is fixed by design at -``~/.polyskills/polyskills.db`` so every install of the framework -writes to the same single source of truth and the user never needs -to learn an environment variable to opt in. - -Tests redirect the resolver by monkey-patching -:func:`resolve_db_path` to a temporary file so the live user-level -database is never touched by the test suite. -""" - -from pathlib import Path - -# ? canonical on-disk layout for the polyskills tracking database; -# ? the parent directory is also used by future bookkeeping artefacts -# ? (for example, lock files) so it is exposed as a constant. -POLYSKILLS_HOME : Path = Path.home() / ".polyskills" -DATABASE_FILE : str = "polyskills.db" - - -def resolve_db_path() -> Path: - """ - Return the absolute path to the polyskills tracking database and - create its parent directory if it does not yet exist. - - The function is intentionally side-effecting on the parent - directory so callers can immediately ``sqlite3.connect`` to the - returned path without an extra ``mkdir`` step. The database file - itself is created lazily by :mod:`sqlite3` on first connection. - - **Example Usage** - - The resolver is used by :func:`polyskills.db.tracker.get_tracker` - to open the connection on first use. - - .. code-block:: python - - from polyskills.db.paths import resolve_db_path - - db_path = resolve_db_path() - print(db_path) - >> /home/user/.polyskills/polyskills.db - - :raises OSError: If the parent directory cannot be created. The - caller (typically :class:`Tracker`) is expected to swallow - the error and degrade to a no-op so the install path is - never broken by a read-only home directory. - - :rtype: pathlib.Path - :returns: Absolute path to ``~/.polyskills/polyskills.db``. - """ - - POLYSKILLS_HOME.mkdir(parents = True, exist_ok = True) - return POLYSKILLS_HOME / DATABASE_FILE diff --git a/polyskills/db/schema.py b/polyskills/db/schema.py deleted file mode 100644 index ea4f20f..0000000 --- a/polyskills/db/schema.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- encoding: utf-8 -*- - -""" -Schema Definition for the Polyskills Tracking Database - -Defines the v1 schema for ``~/.polyskills/polyskills.db`` and the -idempotent :func:`apply_schema` helper that materialises it on a live -:class:`sqlite3.Connection`. The schema is composed of three tables: - - * ``meta`` - one row per bookkeeping key; seeded with the schema - version and the polyskills package version on first creation. - * ``extensions`` - latest known state per natural key - ``(remote_url, library, name, destination_dir)`` so callers can - answer "what do I currently have installed where?" in one query. - * ``events`` - append-only audit log; one row per fetch attempt, - success or failure, so the full history is preserved. - -All ``CREATE`` statements use ``IF NOT EXISTS`` so :func:`apply_schema` -can be called on every process start without raising. -""" - -import sqlite3 - -from typing import List - -# ? schema version is recorded in ``PRAGMA user_version`` so a future -# ? migration step can detect "v1" databases and upgrade them in -# ? place without inspecting any table contents. -SCHEMA_VERSION : int = 1 - -_PRAGMAS : List[str] = [ - "PRAGMA journal_mode = WAL", - "PRAGMA foreign_keys = ON", - "PRAGMA synchronous = NORMAL", -] - -_DDL_META : str = """ -CREATE TABLE IF NOT EXISTS meta ( - key TEXT PRIMARY KEY, - value TEXT NOT NULL -) -""" - -_DDL_EXTENSIONS : str = """ -CREATE TABLE IF NOT EXISTS extensions ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_kind TEXT NOT NULL, - remote_url TEXT NOT NULL, - owner TEXT, - repository TEXT, - library TEXT NOT NULL, - name TEXT NOT NULL, - requested_version TEXT NOT NULL, - resolved_version TEXT, - source_dir TEXT NOT NULL, - destination_dir TEXT NOT NULL, - exists_policy TEXT NOT NULL, - file_count INTEGER, - byte_count INTEGER, - last_status TEXT NOT NULL, - last_error TEXT, - invoked_via TEXT NOT NULL, - first_installed_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP), - last_installed_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP), - UNIQUE (remote_url, library, name, destination_dir) -) -""" - -_DDL_EVENTS : str = """ -CREATE TABLE IF NOT EXISTS events ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - extension_id INTEGER REFERENCES extensions(id) ON DELETE SET NULL, - action TEXT NOT NULL, - requested_version TEXT, - resolved_version TEXT, - file_count INTEGER, - byte_count INTEGER, - duration_ms INTEGER, - status TEXT NOT NULL, - error TEXT, - invoked_via TEXT NOT NULL, - occurred_at TEXT NOT NULL DEFAULT (CURRENT_TIMESTAMP) -) -""" - -_DDL_INDEXES : List[str] = [ - "CREATE INDEX IF NOT EXISTS ix_extensions_name " - "ON extensions(name)", - "CREATE INDEX IF NOT EXISTS ix_extensions_remote " - "ON extensions(remote_url)", - "CREATE INDEX IF NOT EXISTS ix_extensions_status " - "ON extensions(last_status)", - "CREATE INDEX IF NOT EXISTS ix_events_extension " - "ON events(extension_id)", - "CREATE INDEX IF NOT EXISTS ix_events_time " - "ON events(occurred_at)", -] - - -def apply_schema(connection : sqlite3.Connection) -> None: - """ - Materialise the v1 schema on ``connection`` if it is not already - present and bump ``PRAGMA user_version`` to :data:`SCHEMA_VERSION`. - - The function is safe to call on every process start - every DDL - uses ``IF NOT EXISTS`` and the seed rows in ``meta`` use - ``INSERT OR IGNORE``. - - :type connection: sqlite3.Connection - :param connection: An open SQLite connection. The function - commits its own transaction so the caller does not need to - wrap the call in a context manager. - - :rtype: None - :returns: This function returns nothing - the schema is applied - as a side effect on ``connection``. - """ - - cursor = connection.cursor() - - for pragma in _PRAGMAS: - cursor.execute(pragma) - - cursor.execute(_DDL_META) - cursor.execute(_DDL_EXTENSIONS) - cursor.execute(_DDL_EVENTS) - - for ddl in _DDL_INDEXES: - cursor.execute(ddl) - - cursor.execute(f"PRAGMA user_version = {SCHEMA_VERSION}") - - cursor.execute( - "INSERT OR IGNORE INTO meta (key, value) VALUES (?, ?)", - ("schema_version", str(SCHEMA_VERSION)) - ) - cursor.execute( - "INSERT OR IGNORE INTO meta (key, value) VALUES " - "(?, strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))", - ("created_at",) - ) - - connection.commit() diff --git a/polyskills/db/tracker.py b/polyskills/db/tracker.py deleted file mode 100644 index 65609f4..0000000 --- a/polyskills/db/tracker.py +++ /dev/null @@ -1,489 +0,0 @@ -# -*- encoding: utf-8 -*- - -""" -Tracking Façade for the Polyskills SQLite Database - -Provides a thin, best-effort façade around the SQLite tracking -database materialised by :mod:`polyskills.db.schema`. Every public -method of :class:`Tracker` is engineered to be a no-op on failure - -a corrupt database, a read-only home directory, or a transient lock -must never break the install path of the calling code. - -The module also owns a tiny, process-wide invocation-context flag -that downstream remote managers consult to record whether a fetch -was triggered from the CLI ``main()`` or from a direct library API -call. The flag is intentionally process-wide because the CLI is -single-threaded and library callers can build their own -:class:`Tracker` instances when finer granularity is required. -""" - -import sqlite3 -import warnings - -from pathlib import Path -from typing import Optional, Tuple - -from polyskills.db import paths as _paths -from polyskills.db.schema import apply_schema - -# ? process-wide flag toggled by polyskills.cli.main() before the -# ? remote dispatcher is invoked; defaults to "api" so any direct -# ? library caller is recorded correctly without extra wiring. -_INVOCATION_CONTEXT : str = "api" - -_VALID_CONTEXTS : Tuple[str, ...] = ("cli", "api") - - -def set_invocation_context(context : str) -> None: - """ - Record the invocation context for subsequent tracker writes. - - The value is persisted into the ``events.invoked_via`` column on - every recorded fetch so downstream analytics can split CLI usage - from direct library API usage. - - :type context: str - :param context: One of ``"cli"`` or ``"api"``. Any other value is - silently coerced to ``"api"`` so a typo in a caller never - breaks the tracker. - - :rtype: None - :returns: This function returns nothing - the context is stored - in a module-level variable consulted by :class:`Tracker`. - """ - - global _INVOCATION_CONTEXT - _INVOCATION_CONTEXT = ( - context if context in _VALID_CONTEXTS else "api" - ) - - -def get_invocation_context() -> str: - """ - Return the current invocation context recorded by - :func:`set_invocation_context`. - - :rtype: str - :returns: Either ``"cli"`` or ``"api"``. - """ - - return _INVOCATION_CONTEXT - - -class Tracker: - """ - Best-effort SQLite tracking façade for polyskills extension - fetches. The class is deliberately small: it owns one connection, - serialises writes through ``BEGIN IMMEDIATE`` transactions, and - swallows every exception arising from the database layer so the - install path of the framework is never broken by a tracker fault. - - :type database_path: Optional[pathlib.Path] - :param database_path: Optional explicit database path. When - omitted the tracker calls :func:`resolve_db_path` so tests can - monkey-patch the resolver to a temporary file without having - to touch the :class:`Tracker` constructor. - """ - - def __init__( - self, database_path : Optional[Path] = None - ) -> None: - self._path : Optional[Path] = None - self._conn : Optional[sqlite3.Connection] = None - self._healthy : bool = True - self._warned : bool = False - - try: - self._path = ( - database_path - if database_path is not None - else _paths.resolve_db_path() - ) - self._conn = sqlite3.connect( - str(self._path), - timeout = 5.0, - isolation_level = None, - check_same_thread = False - ) - apply_schema(self._conn) - except Exception as exc: - self._healthy = False - self._warn_once( - f"polyskills tracker disabled (init failed): {exc!r}" - ) - - - def _warn_once(self, message : str) -> None: - """ - Emit a single :func:`warnings.warn` per process so a broken - database does not flood stderr on every fetch. - - :type message: str - :param message: The human-readable warning text. - - :rtype: None - :returns: Nothing - the warning is emitted as a side effect. - """ - - if self._warned: - return None - self._warned = True - warnings.warn(message, RuntimeWarning, stacklevel = 3) - - - def is_healthy(self) -> bool: - """ - Return whether the tracker is currently able to write rows. - - :rtype: bool - :returns: ``True`` when the underlying connection is open and - the schema is in place, ``False`` otherwise. - """ - - return self._healthy and self._conn is not None - - - def record_start( - self, - source_kind : str, - remote_url : str, - owner : Optional[str], - repository : Optional[str], - library : str, - name : str, - requested_version : str, - source_dir : str, - destination_dir : str, - exists_policy : str - ) -> Optional[int]: - """ - Upsert a row into ``extensions`` with ``last_status = - 'in_progress'`` and return the newly assigned (or pre-existing) - primary key. The id is later passed to :meth:`record_success` - or :meth:`record_failure` so the matching ``events`` row is - attributed to the correct extension. - - :type source_kind: str - :param source_kind: The :class:`ValidSources` member name, for - example ``"GITHUB"``. - - :type remote_url: str - :param remote_url: Canonical remote URL, used as part of the - natural key in the ``UNIQUE`` constraint on ``extensions``. - - :type owner: Optional[str] - :param owner: Slug owner component, when available. Pure - informational. - - :type repository: Optional[str] - :param repository: Slug repository component, when available. - - :type library: str - :param library: Library kind, ``"skills"`` or ``"agents"``. - - :type name: str - :param name: Extension leaf name, used as part of the - natural key on ``extensions``. - - :type requested_version: str - :param requested_version: Requested tag, branch, or sha. - - :type source_dir: str - :param source_dir: POSIX-style source directory path on the - remote. - - :type destination_dir: str - :param destination_dir: Absolute, normalised destination - directory path on the local filesystem. - - :type exists_policy: str - :param exists_policy: One of ``"fail"``, ``"overwrite"``, - ``"merge"``. - - :rtype: Optional[int] - :returns: The ``extensions.id`` of the upserted row, or - ``None`` if the tracker is unhealthy or the write failed. - """ - - if not self.is_healthy(): - return None - - invoked_via = get_invocation_context() - - try: - assert self._conn is not None - cursor = self._conn.cursor() - cursor.execute("BEGIN IMMEDIATE") - cursor.execute( - """ - INSERT INTO extensions ( - source_kind, remote_url, owner, repository, - library, name, requested_version, source_dir, - destination_dir, exists_policy, last_status, - invoked_via - ) VALUES ( - ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, - 'in_progress', ? - ) - ON CONFLICT (remote_url, library, name, destination_dir) - DO UPDATE SET - source_kind = excluded.source_kind, - owner = excluded.owner, - repository = excluded.repository, - requested_version = excluded.requested_version, - source_dir = excluded.source_dir, - exists_policy = excluded.exists_policy, - last_status = 'in_progress', - last_error = NULL, - invoked_via = excluded.invoked_via, - last_installed_at = CURRENT_TIMESTAMP - RETURNING id - """, - ( - source_kind, remote_url, owner, repository, - library, name, requested_version, source_dir, - destination_dir, exists_policy, invoked_via - ) - ) - row = cursor.fetchone() - cursor.execute("COMMIT") - return int(row[0]) if row else None - except Exception as exc: - self._safe_rollback() - self._warn_once( - f"polyskills tracker record_start failed: {exc!r}" - ) - return None - - - def record_success( - self, - extension_id : Optional[int], - action : str, - file_count : int, - byte_count : int, - duration_ms : int, - resolved_version : Optional[str] = None - ) -> None: - """ - Mark the previously started extension row as successful and - append a corresponding ``events`` row. - - :type extension_id: Optional[int] - :param extension_id: The id returned by :meth:`record_start`. - ``None`` results in a no-op so callers do not need to - branch on the start outcome. - - :type action: str - :param action: Concrete action taken, for example - ``"fetch"``, ``"overwrite"``, or ``"merge"``. - - :type file_count: int - :param file_count: Number of files that landed in the - destination directory. - - :type byte_count: int - :param byte_count: Total byte count of the files in the - destination directory. - - :type duration_ms: int - :param duration_ms: Wall clock duration of the fetch in - milliseconds. - - :type resolved_version: Optional[str] - :param resolved_version: Resolved tag or commit sha, when - different from the requested version. Pure informational. - - :rtype: None - :returns: Nothing - the rows are written as a side effect. - """ - - if not self.is_healthy() or extension_id is None: - return None - - invoked_via = get_invocation_context() - - try: - assert self._conn is not None - cursor = self._conn.cursor() - cursor.execute("BEGIN IMMEDIATE") - cursor.execute( - """ - UPDATE extensions - SET resolved_version = ?, - file_count = ?, - byte_count = ?, - last_status = 'success', - last_error = NULL, - last_installed_at = CURRENT_TIMESTAMP - WHERE id = ? - """, - (resolved_version, file_count, byte_count, extension_id) - ) - cursor.execute( - """ - INSERT INTO events ( - extension_id, action, resolved_version, - file_count, byte_count, duration_ms, - status, error, invoked_via - ) VALUES (?, ?, ?, ?, ?, ?, 'success', NULL, ?) - """, - ( - extension_id, action, resolved_version, - file_count, byte_count, duration_ms, invoked_via - ) - ) - cursor.execute("COMMIT") - except Exception as exc: - self._safe_rollback() - self._warn_once( - f"polyskills tracker record_success failed: {exc!r}" - ) - - - def record_failure( - self, - extension_id : Optional[int], - action : str, - error : str, - duration_ms : int - ) -> None: - """ - Mark the previously started extension row as failed and - append a corresponding ``events`` row carrying the error - message. - - :type extension_id: Optional[int] - :param extension_id: The id returned by :meth:`record_start`. - ``None`` results in a no-op. - - :type action: str - :param action: Concrete action attempted, mirroring - :meth:`record_success`. - - :type error: str - :param error: Human-readable error description, typically the - ``repr`` of the original exception. - - :type duration_ms: int - :param duration_ms: Wall clock duration of the failed attempt. - - :rtype: None - :returns: Nothing - the rows are written as a side effect. - """ - - if not self.is_healthy() or extension_id is None: - return None - - invoked_via = get_invocation_context() - - try: - assert self._conn is not None - cursor = self._conn.cursor() - cursor.execute("BEGIN IMMEDIATE") - cursor.execute( - """ - UPDATE extensions - SET last_status = 'failed', - last_error = ?, - last_installed_at = CURRENT_TIMESTAMP - WHERE id = ? - """, - (error, extension_id) - ) - cursor.execute( - """ - INSERT INTO events ( - extension_id, action, duration_ms, - status, error, invoked_via - ) VALUES (?, ?, ?, 'failed', ?, ?) - """, - ( - extension_id, action, duration_ms, - error, invoked_via - ) - ) - cursor.execute("COMMIT") - except Exception as exc: - self._safe_rollback() - self._warn_once( - f"polyskills tracker record_failure failed: {exc!r}" - ) - - - def _safe_rollback(self) -> None: - """ - Best-effort rollback used by every write helper when an - exception is raised mid-transaction. - - :rtype: None - :returns: Nothing - the rollback is emitted on a best-effort - basis and any further error is swallowed. - """ - - if self._conn is None: - return None - try: - self._conn.execute("ROLLBACK") - except Exception: - pass - - - def close(self) -> None: - """ - Close the underlying SQLite connection. Safe to call multiple - times and on an already-broken tracker. - - :rtype: None - :returns: Nothing - the connection is closed as a side effect. - """ - - if self._conn is None: - return None - try: - self._conn.close() - except Exception: - pass - finally: - self._conn = None - self._healthy = False - - -_TRACKER : Optional[Tracker] = None - - -def get_tracker() -> Tracker: - """ - Return the process-wide :class:`Tracker` singleton, creating it - on the first call. Subsequent calls return the same instance so - the SQLite connection is opened exactly once per process. - - :rtype: Tracker - :returns: The shared :class:`Tracker` instance. The instance may - report :meth:`Tracker.is_healthy` as ``False`` when the - database cannot be opened - all public methods of an - unhealthy tracker are silent no-ops. - """ - - global _TRACKER - if _TRACKER is None: - _TRACKER = Tracker() - return _TRACKER - - -def reset_tracker_for_tests() -> None: - """ - Drop the cached singleton so the next :func:`get_tracker` call - re-runs the bootstrap. Used exclusively by the test suite after a - monkey-patch of :func:`polyskills.db.paths.resolve_db_path` to - pick up the new database location. - - :rtype: None - :returns: Nothing - the cached singleton is cleared as a side - effect. - """ - - global _TRACKER - if _TRACKER is not None: - _TRACKER.close() - _TRACKER = None diff --git a/polyskills/tests/test_tracker.py b/polyskills/tests/test_tracker.py deleted file mode 100644 index 3e58813..0000000 --- a/polyskills/tests/test_tracker.py +++ /dev/null @@ -1,394 +0,0 @@ -# -*- encoding: utf-8 -*- - -""" -Hermetic Tests for the Polyskills Tracking Database ---------------------------------------------------- - -Validates the public surface of the :mod:`polyskills.db` package -without performing any HTTP requests. The cases exercise: - - * Schema bootstrap: a connection passed to :func:`apply_schema` - ends up with the v1 tables, indexes, and ``user_version``. - * Idempotency: applying the schema twice does not raise and does - not duplicate rows in the ``meta`` table. - * :class:`Tracker` happy path: ``record_start`` followed by - ``record_success`` produces exactly one row in ``extensions`` - and one in ``events``. - * :class:`Tracker` failure path: ``record_failure`` writes the - error column without raising. - * Upsert semantics: re-installing the same extension into the - same destination keeps a single ``extensions`` row but appends - additional ``events`` rows. - * Invocation context: ``set_invocation_context('cli')`` is - persisted into the ``invoked_via`` column. - * Robustness: a corrupt database file does not raise into the - caller; the tracker degrades to a silent no-op instead. -""" - -import sqlite3 -import tempfile -import unittest -import warnings - -from pathlib import Path -from typing import Tuple - -from polyskills.db import paths as _db_paths -from polyskills.db import tracker as _db_tracker -from polyskills.db.schema import SCHEMA_VERSION, apply_schema -from polyskills.db.tracker import ( - Tracker, get_invocation_context, set_invocation_context -) - - -def _row_counts(database_path : Path) -> Tuple[int, int]: - """ - Return ``(extensions_count, events_count)`` from ``database_path``. - - The helper opens its own short-lived connection so the assertion - code stays free of SQLite boilerplate. - - :type database_path: pathlib.Path - :param database_path: Database file under inspection. - - :rtype: Tuple[int, int] - :returns: Two-tuple of row counts. - """ - - connection = sqlite3.connect(str(database_path)) - try: - cursor = connection.cursor() - ext_count = cursor.execute( - "SELECT COUNT(*) FROM extensions" - ).fetchone()[0] - evt_count = cursor.execute( - "SELECT COUNT(*) FROM events" - ).fetchone()[0] - return (int(ext_count), int(evt_count)) - finally: - connection.close() - - -class TestSchemaBootstrap(unittest.TestCase): - """ - Validates :func:`apply_schema` materialises the v1 tables, indexes - and ``user_version`` on a fresh connection without raising. - """ - - def setUp(self) -> None: - self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-schema-")) - self.db = self.tmpdir / "polyskills.db" - - - def tearDown(self) -> None: - import shutil - shutil.rmtree(self.tmpdir, ignore_errors = True) - - - def test_schema_creates_expected_tables(self) -> None: - """ - After :func:`apply_schema` the database must contain the - ``meta``, ``extensions`` and ``events`` tables. - """ - - connection = sqlite3.connect(str(self.db)) - try: - apply_schema(connection) - tables = { - row[0] for row in connection.execute( - "SELECT name FROM sqlite_master " - "WHERE type = 'table'" - ) - } - finally: - connection.close() - - self.assertIn("meta", tables) - self.assertIn("extensions", tables) - self.assertIn("events", tables) - - - def test_schema_sets_user_version(self) -> None: - """ - ``PRAGMA user_version`` must equal :data:`SCHEMA_VERSION` - after bootstrap so future migrations can detect the version. - """ - - connection = sqlite3.connect(str(self.db)) - try: - apply_schema(connection) - version = connection.execute( - "PRAGMA user_version" - ).fetchone()[0] - finally: - connection.close() - - self.assertEqual(int(version), SCHEMA_VERSION) - - - def test_schema_is_idempotent(self) -> None: - """ - Applying the schema twice must not raise and must not insert - duplicate ``meta`` rows. - """ - - connection = sqlite3.connect(str(self.db)) - try: - apply_schema(connection) - apply_schema(connection) - count = connection.execute( - "SELECT COUNT(*) FROM meta WHERE key = 'schema_version'" - ).fetchone()[0] - finally: - connection.close() - - self.assertEqual(int(count), 1) - - -class TestTrackerLifecycle(unittest.TestCase): - """ - Validates :class:`Tracker` ``record_start``, ``record_success`` - and ``record_failure`` against an isolated temp database. - """ - - def setUp(self) -> None: - self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-track-")) - self.db = self.tmpdir / "polyskills.db" - set_invocation_context("api") - self.tracker = Tracker(database_path = self.db) - - - def tearDown(self) -> None: - import shutil - self.tracker.close() - shutil.rmtree(self.tmpdir, ignore_errors = True) - - - def _start(self) -> int: - """ - Helper that performs a canonical ``record_start`` call and - returns the new ``extensions.id``. - """ - - ext_id = self.tracker.record_start( - source_kind = "GITHUB", - remote_url = "https://github.com/PyUtility/polyskills", - owner = "PyUtility", - repository = "polyskills", - library = "skills", - name = "sql-code-format", - requested_version = "master", - source_dir = "extensions/skills", - destination_dir = str(self.tmpdir / "dest"), - exists_policy = "fail" - ) - self.assertIsNotNone(ext_id) - return int(ext_id) - - - def test_tracker_is_healthy_after_init(self) -> None: - """ - A freshly constructed tracker must report itself as healthy - and have created the database file on disk. - """ - - self.assertTrue(self.tracker.is_healthy()) - self.assertTrue(self.db.exists()) - - - def test_record_success_writes_one_row_per_table(self) -> None: - """ - A start + success pair must produce one ``extensions`` row - and one ``events`` row carrying ``status='success'``. - """ - - ext_id = self._start() - self.tracker.record_success( - extension_id = ext_id, action = "fetch", - file_count = 3, byte_count = 1024, duration_ms = 12, - resolved_version = "v2.0.0" - ) - - ext_count, evt_count = _row_counts(self.db) - self.assertEqual(ext_count, 1) - self.assertEqual(evt_count, 1) - - connection = sqlite3.connect(str(self.db)) - try: - row = connection.execute( - "SELECT last_status, file_count, byte_count, " - "resolved_version FROM extensions WHERE id = ?", - (ext_id,) - ).fetchone() - finally: - connection.close() - - self.assertEqual(row[0], "success") - self.assertEqual(row[1], 3) - self.assertEqual(row[2], 1024) - self.assertEqual(row[3], "v2.0.0") - - - def test_record_failure_marks_extension_failed(self) -> None: - """ - ``record_failure`` must mark the row as ``failed`` and append - an ``events`` row with the error message preserved. - """ - - ext_id = self._start() - self.tracker.record_failure( - extension_id = ext_id, action = "fetch", - error = "FileNotFoundError('boom')", duration_ms = 9 - ) - - connection = sqlite3.connect(str(self.db)) - try: - ext_row = connection.execute( - "SELECT last_status, last_error FROM extensions " - "WHERE id = ?", (ext_id,) - ).fetchone() - evt_row = connection.execute( - "SELECT status, error FROM events " - "WHERE extension_id = ?", (ext_id,) - ).fetchone() - finally: - connection.close() - - self.assertEqual(ext_row[0], "failed") - self.assertIn("boom", ext_row[1]) - self.assertEqual(evt_row[0], "failed") - self.assertIn("boom", evt_row[1]) - - - def test_reinstall_upserts_extension_and_appends_events(self) -> None: - """ - Two consecutive installs against the same natural key must - keep one ``extensions`` row but accumulate ``events`` rows. - """ - - ext_id_1 = self._start() - self.tracker.record_success( - extension_id = ext_id_1, action = "fetch", - file_count = 1, byte_count = 100, duration_ms = 5 - ) - ext_id_2 = self._start() - self.tracker.record_success( - extension_id = ext_id_2, action = "overwrite", - file_count = 2, byte_count = 200, duration_ms = 5 - ) - - ext_count, evt_count = _row_counts(self.db) - self.assertEqual(ext_id_1, ext_id_2) - self.assertEqual(ext_count, 1) - self.assertEqual(evt_count, 2) - - - def test_invocation_context_is_persisted(self) -> None: - """ - Switching the invocation context to ``"cli"`` must surface in - the ``invoked_via`` column of new event rows. - """ - - set_invocation_context("cli") - self.assertEqual(get_invocation_context(), "cli") - - ext_id = self._start() - self.tracker.record_success( - extension_id = ext_id, action = "fetch", - file_count = 0, byte_count = 0, duration_ms = 1 - ) - - connection = sqlite3.connect(str(self.db)) - try: - invoked = connection.execute( - "SELECT invoked_via FROM events " - "WHERE extension_id = ?", (ext_id,) - ).fetchone()[0] - finally: - connection.close() - - self.assertEqual(invoked, "cli") - set_invocation_context("api") - - -class TestCorruptDatabaseDegradesSafely(unittest.TestCase): - """ - Asserts that a corrupt or unwritable database does not propagate - its failure into the install path - the tracker must degrade to - a silent no-op and emit a single warning. - """ - - def setUp(self) -> None: - self.tmpdir = Path(tempfile.mkdtemp(prefix = "polyskills-corrupt-")) - self.db = self.tmpdir / "polyskills.db" - # ! intentionally write garbage so apply_schema fails - self.db.write_bytes(b"this is not a sqlite database") - - - def tearDown(self) -> None: - import shutil - shutil.rmtree(self.tmpdir, ignore_errors = True) - - - def test_corrupt_database_is_handled_silently(self) -> None: - """ - Constructing a tracker on a corrupt file must not raise and - every public method must subsequently no-op safely. - """ - - with warnings.catch_warnings(record = True): - warnings.simplefilter("always") - tracker = Tracker(database_path = self.db) - - self.assertFalse(tracker.is_healthy()) - self.assertIsNone( - tracker.record_start( - source_kind = "GITHUB", - remote_url = "https://github.com/x/y", - owner = "x", repository = "y", - library = "skills", name = "z", - requested_version = "master", - source_dir = "skills", - destination_dir = "/tmp/z", - exists_policy = "fail" - ) - ) - # both downstream calls must also no-op without raising - tracker.record_success( - extension_id = None, action = "fetch", - file_count = 0, byte_count = 0, duration_ms = 0 - ) - tracker.record_failure( - extension_id = None, action = "fetch", - error = "irrelevant", duration_ms = 0 - ) - tracker.close() - - -class TestResolveDbPathCreatesParent(unittest.TestCase): - """ - Validates the canonical path resolver creates the polyskills home - directory on demand so callers can immediately open the database. - The test isolates the assertion from the suite-wide monkey-patch - by importing the path constants directly. - """ - - def test_resolve_db_path_returns_polyskills_db_under_home(self) -> None: - """ - The canonical path must be ``~/.polyskills/polyskills.db`` - and the parent directory must exist on the filesystem. - """ - - from polyskills.db.paths import ( - DATABASE_FILE, POLYSKILLS_HOME - ) - - canonical = POLYSKILLS_HOME / DATABASE_FILE - - self.assertEqual(canonical.name, "polyskills.db") - self.assertEqual(canonical.parent.name, ".polyskills") - self.assertEqual(canonical.parent, Path.home() / ".polyskills") - - -if __name__ == "__main__": - unittest.main() From 2d02be043e99710675d5b44ffcfd9f9e3b6b31ac Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 19:29:05 +0530 Subject: [PATCH 15/17] =?UTF-8?q?=F0=9F=A7=AA=20test(database):=20add=20of?= =?UTF-8?q?fline=20tracking=20tests=20and=20suite=20DB=20redirect?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cover the new `polyskills.database` package with a fully offline, hermetic test module and protect every test run from touching the real user-level database. Each tracker case writes to its own temporary SQLite file passed explicitly, so correctness never depends on the global redirect. Follows the suite's unittest + temp-dir fixture style and passes the CI lint gate. - add `tests/test_database.py` (17 cases): the OS-independent path resolver and its override, schema materialisation, SQLite pragmas and cascade delete, event recording, strict-normalisation of the lookup rows, derived first / last / SHA facts, failure capture, graceful no-op without SQLAlchemy, and argparse construction of `--no-tracking` and the `records` subcommand - redirect `POLYSKILLS_DB_PATH` to a temporary file in `tests/__init__.py` as a safety net so no test reads or writes the real `~/.polyskills/records.db`; `setdefault` keeps a CI override - verification: `python -m unittest polyskills.tests.test_database` -> 17 OK; combined offline subset (database + cli + path) -> 52 OK with the real user database confirmed absent afterwards Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/tests/__init__.py | 16 ++ polyskills/tests/test_database.py | 427 ++++++++++++++++++++++++++++++ 2 files changed, 443 insertions(+) create mode 100644 polyskills/tests/test_database.py diff --git a/polyskills/tests/__init__.py b/polyskills/tests/__init__.py index eccc767..e575184 100644 --- a/polyskills/tests/__init__.py +++ b/polyskills/tests/__init__.py @@ -23,6 +23,22 @@ to authenticate the requests. """ +import os +import tempfile + __version__ = "v1.0.0" +# ? Redirect the tracking database onto an isolated temporary file for +# ? the whole test run so no test ever reads from or writes to the real +# ? user-level ``~/.polyskills/records.db``. ``default_database_path`` +# ? consults this variable at call time, so setting it here - before +# ? any test triggers a tracking write - is sufficient. The value is +# ? only a safety net; the dedicated database tests still pass an +# ? explicit path for strict per-test isolation. ``setdefault`` keeps +# ? an externally supplied override (for example in CI) intact. +os.environ.setdefault( + "POLYSKILLS_DB_PATH", + os.path.join(tempfile.gettempdir(), "polyskills-test-records.db") +) + # init-time options registrations, use base.py for shared fixtures diff --git a/polyskills/tests/test_database.py b/polyskills/tests/test_database.py new file mode 100644 index 0000000..dbd80ca --- /dev/null +++ b/polyskills/tests/test_database.py @@ -0,0 +1,427 @@ +# -*- encoding: utf-8 -*- + +""" +Offline Tests For The Polyskills Tracking Database +================================================== + +Hermetic :mod:`unittest` cases for the :mod:`polyskills.database` +sub-package. Every case is fully offline - no network is touched - and +each tracker case writes to its own temporary SQLite file passed +explicitly as ``database_path`` so the real user-level database is +never read or written. + +The cases cover four concerns: + + * :class:`TestDatabasePaths` - the OS-independent path resolver and + its ``POLYSKILLS_DB_PATH`` override. + * :class:`TestSchemaAndEngine` - schema materialisation, the SQLite + pragmas, and the ``ON DELETE CASCADE`` behaviour. + * :class:`TestTracker` - the write and read façade: event recording, + strict-normalisation of the lookup rows, derived first / last / + SHA facts, failure capture, and graceful degradation. + * :class:`TestTrackingCliArguments` - argparse construction of the + new ``--no-tracking`` flag and the ``records`` subcommand. +""" + +import os +import shutil +import tempfile +import unittest + +from pathlib import Path +from unittest import mock + +from polyskills import cli +from polyskills import database as db +from polyskills.database import engine as db_engine +from polyskills.database import tracker as db_tracker +from polyskills.database.paths import default_database_path + +# ? a representative, fully-specified fetch used by the tracker cases; +# ? individual tests override single keys through ``_record`` below. +_FETCH = dict( + source_name = "GITHUB", + source_base_url = "https://www.github.com/", + owner = "PyUtility", + repository = "polyskills", + remote_url = "https://github.com/PyUtility/polyskills", + library = "skills", + name = "sql-code-format", + source_dir = "extensions/skills", + install_path = "/home/user/.claude/skills/sql-code-format", + requested_version = "master", + action = "fetch", + status = "success", +) + +_EXPECTED_TABLES = { + "sources", "libraries", "remotes", "extensions", + "installations", "environments", "fetch_events", +} + + +class TestDatabasePaths(unittest.TestCase): + """ + Path-resolution tests for :func:`default_database_path`. + + These cases make no network request and write nothing to disk; they + only assert the resolved path string and the override contract. + """ + + def test_default_is_records_db_under_polyskills_home(self) -> None: + """ + With no override, the path must end in ``.polyskills/records.db`` + and be absolute on every platform. + """ + + with mock.patch.dict(os.environ, {}, clear = False): + os.environ.pop("POLYSKILLS_DB_PATH", None) + path = default_database_path() + + self.assertTrue(path.is_absolute()) + self.assertEqual(path.name, "records.db") + self.assertEqual(path.parent.name, ".polyskills") + + + def test_environment_override_is_honoured(self) -> None: + """ + ``POLYSKILLS_DB_PATH`` must take precedence and be expanded for + the ``~`` home token. + """ + + with mock.patch.dict(os.environ, {}, clear = False): + os.environ["POLYSKILLS_DB_PATH"] = os.path.join("~", "x.db") + path = default_database_path() + + self.assertEqual(path.name, "x.db") + self.assertNotIn("~", path.as_posix()) + + + def test_resolution_is_side_effect_free(self) -> None: + """ + Resolving the default path must not create the parent + directory; materialisation belongs to the engine layer. + """ + + staging = Path(tempfile.mkdtemp()) + target = staging / "nested" / "records.db" + try: + with mock.patch.dict(os.environ, {}, clear = False): + os.environ["POLYSKILLS_DB_PATH"] = str(target) + resolved = default_database_path() + self.assertFalse(resolved.parent.exists()) + finally: + shutil.rmtree(staging, ignore_errors = True) + + +class TestSchemaAndEngine(unittest.TestCase): + """ + Engine and schema tests for :mod:`polyskills.database.engine`. + """ + + def setUp(self) -> None: + """Allocate an isolated temporary database file per test.""" + + self.staging = Path(tempfile.mkdtemp(prefix = "polyskills-db-")) + self.db = self.staging / "records.db" + + + def tearDown(self) -> None: + """Dispose cached engines and remove the staging directory.""" + + db_tracker.reset_cache_for_tests() + shutil.rmtree(self.staging, ignore_errors = True) + + + def test_build_session_factory_creates_every_table(self) -> None: + """ + :func:`build_session_factory` must materialise all seven + normalised tables. + """ + + from sqlalchemy import inspect + + engine, _factory = db_engine.build_session_factory(self.db) + names = set(inspect(engine).get_table_names()) + engine.dispose() + + self.assertEqual(names, _EXPECTED_TABLES) + + + def test_sqlite_pragmas_are_applied(self) -> None: + """ + Every connection must enable foreign keys and use the WAL + journal so the schema's cascade behaves correctly. + """ + + engine, _factory = db_engine.build_session_factory(self.db) + try: + with engine.connect() as connection: + fk = connection.exec_driver_sql( + "PRAGMA foreign_keys" + ).scalar() + journal = connection.exec_driver_sql( + "PRAGMA journal_mode" + ).scalar() + finally: + engine.dispose() + + self.assertEqual(fk, 1) + self.assertEqual(str(journal).lower(), "wal") + + + def test_deleting_installation_cascades_to_events(self) -> None: + """ + Removing an installation must cascade-delete its fetch events + so no orphan audit rows remain. + """ + + from polyskills.database.models import ( + Extension, FetchEvent, Installation, Library, Remote, Source + ) + + engine, factory = db_engine.build_session_factory(self.db) + try: + with factory() as session: + source = Source( + name = "GITHUB", base_url = "https://www.github.com/" + ) + library = Library(name = "skills") + session.add_all([source, library]) + session.flush() + remote = Remote( + source_id = source.id, owner = "PyUtility", + repository = "polyskills", + url = "https://github.com/PyUtility/polyskills" + ) + session.add(remote) + session.flush() + extension = Extension( + remote_id = remote.id, library_id = library.id, + name = "x", source_dir = "extensions/skills" + ) + session.add(extension) + session.flush() + installation = Installation( + extension_id = extension.id, install_path = "/tmp/x" + ) + session.add(installation) + session.flush() + session.add(FetchEvent( + installation_id = installation.id, action = "fetch", + requested_version = "master", status = "success" + )) + session.commit() + + session.delete(installation) + session.commit() + self.assertEqual(session.query(FetchEvent).count(), 0) + finally: + engine.dispose() + + +class TestTracker(unittest.TestCase): + """ + Write and read tests for :mod:`polyskills.database.tracker`. + + Each case targets a private temporary database via the explicit + ``database_path`` argument, so the suite-level redirect is never + relied upon for correctness. + """ + + def setUp(self) -> None: + """Allocate a temp database and clear the engine cache.""" + + self.staging = Path(tempfile.mkdtemp(prefix = "polyskills-trk-")) + self.db = self.staging / "records.db" + db_tracker.reset_cache_for_tests() + db_tracker.set_invocation_context("api") + + + def tearDown(self) -> None: + """Dispose cached engines and remove the staging directory.""" + + db_tracker.reset_cache_for_tests() + shutil.rmtree(self.staging, ignore_errors = True) + + + def _record(self, **overrides) -> object: + """Record a fetch built from ``_FETCH`` plus ``overrides``.""" + + payload = dict(_FETCH) + payload.update(overrides) + return db.record_fetch(database_path = self.db, **payload) + + + def test_record_fetch_persists_and_returns_event_id(self) -> None: + """A successful record must return the new event primary key.""" + + event_id = self._record( + resolved_commit_sha = "abc123", file_count = 3, + total_bytes = 4096, duration_ms = 120 + ) + self.assertIsInstance(event_id, int) + + + def test_repeated_fetch_reuses_rows_and_normalises(self) -> None: + """ + Two fetches of the same extension into the same path must yield + one installation and two events while the lookup tables stay + deduplicated - the core normalisation guarantee. + """ + + from polyskills.database.models import ( + Extension, FetchEvent, Installation, Remote, Source + ) + + self._record(resolved_commit_sha = "aaa") + self._record(resolved_commit_sha = "bbb") + + _engine, factory = db_engine.build_session_factory(self.db) + try: + with factory() as session: + self.assertEqual(session.query(Source).count(), 1) + self.assertEqual(session.query(Remote).count(), 1) + self.assertEqual(session.query(Extension).count(), 1) + self.assertEqual(session.query(Installation).count(), 1) + self.assertEqual(session.query(FetchEvent).count(), 2) + finally: + _engine.dispose() + + + def test_derived_first_last_and_latest_sha(self) -> None: + """ + The read summary must derive first-fetched, last-updated, and + the latest commit SHA from the success events. + """ + + self._record(resolved_commit_sha = "aaa") + self._record(resolved_commit_sha = "bbb") + + rows = db.list_installations(database_path = self.db) + self.assertEqual(len(rows), 1) + row = rows[0] + self.assertEqual(row["fetch_count"], 2) + self.assertEqual(row["resolved_commit_sha"], "bbb") + self.assertIsNotNone(row["first_fetched_at"]) + self.assertIsNotNone(row["last_updated_at"]) + self.assertEqual(row["last_status"], "success") + + + def test_failure_event_recorded_without_derived_timestamps(self) -> None: + """ + A failed-only installation must record the error yet expose no + first-fetched or last-updated timestamp. + """ + + self._record( + name = "broken", install_path = "/tmp/broken", + status = "failed", error = "HTTP 404" + ) + + rows = db.list_installations(database_path = self.db) + match = [r for r in rows if r["name"] == "broken"] + self.assertEqual(len(match), 1) + self.assertEqual(match[0]["last_status"], "failed") + self.assertIsNone(match[0]["first_fetched_at"]) + self.assertIsNone(match[0]["resolved_commit_sha"]) + + + def test_get_installation_includes_event_history(self) -> None: + """ + The detailed view must return one entry per matching + installation and embed its chronological event list. + """ + + self._record(resolved_commit_sha = "aaa") + self._record(resolved_commit_sha = "bbb") + + details = db.get_installation( + "sql-code-format", database_path = self.db + ) + self.assertEqual(len(details), 1) + self.assertEqual(len(details[0]["events"]), 2) + self.assertEqual( + details[0]["events"][0]["resolved_commit_sha"], "aaa" + ) + + + def test_invocation_context_persisted_on_event(self) -> None: + """ + The active invocation context must be stamped onto the event. + """ + + db_tracker.set_invocation_context("cli") + self._record() + + details = db.get_installation( + "sql-code-format", database_path = self.db + ) + self.assertEqual(details[0]["events"][0]["invoked_via"], "cli") + + + def test_record_fetch_is_noop_without_sqlalchemy(self) -> None: + """ + With SQLAlchemy reported absent, recording must degrade to a + silent no-op returning ``None`` instead of raising. + """ + + with mock.patch.object( + db_tracker, "SQLALCHEMY_AVAILABLE", False + ): + result = self._record() + self.assertIsNone(result) + + +class TestTrackingCliArguments(unittest.TestCase): + """ + Hermetic argparse-construction tests for the tracking-related CLI + surface. These never invoke :func:`cli.main` and touch no database. + """ + + def test_manager_exposes_no_tracking_flag(self) -> None: + """``--no-tracking`` must parse to ``True`` on the manager.""" + + parser = cli.buildParser() + args = parser.parse_args([ + "manager", "https://github.com/PyUtility/polyskills", + "--name", "sql-code-format", "--no-tracking", "skills" + ]) + self.assertTrue(args.no_tracking) + + + def test_no_tracking_defaults_to_false(self) -> None: + """Omitting the flag must leave tracking enabled.""" + + parser = cli.buildParser() + args = parser.parse_args([ + "manager", "https://github.com/PyUtility/polyskills", + "--name", "sql-code-format", "skills" + ]) + self.assertFalse(args.no_tracking) + + + def test_records_subcommand_parses_name_and_db(self) -> None: + """The ``records`` subcommand must accept ``--name`` / ``--db``.""" + + parser = cli.buildParser() + args = parser.parse_args([ + "records", "--name", "sql-code-format", "--db", "/tmp/x.db" + ]) + self.assertEqual(args.command, "records") + self.assertEqual(args.name, "sql-code-format") + self.assertEqual(args.db, "/tmp/x.db") + + + def test_records_subcommand_defaults(self) -> None: + """Bare ``records`` must default ``--name`` and ``--db`` to None.""" + + parser = cli.buildParser() + args = parser.parse_args(["records"]) + self.assertEqual(args.command, "records") + self.assertIsNone(args.name) + self.assertIsNone(args.db) + + +if __name__ == "__main__": + unittest.main(verbosity = 2) From 42f72e9c8fb76ae0305fa462f9d281c15930ef9f Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 19:31:47 +0530 Subject: [PATCH 16/17] =?UTF-8?q?=F0=9F=93=9D=20docs:=20document=20trackin?= =?UTF-8?q?g=20database,=20--no-tracking=20and=20records?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the new local tracking database across the README and the CHANGELOG so users discover where installs are recorded, how to read them back, and how to opt out. Both files follow their existing in-repo conventions - the README's emoji `###` subsections and the CHANGELOG's `### PolySkills vX.Y.Z` + emoji `####` layout - rather than a generic template. - README: add a `records` row to the CLI command table, bump the count to five sub-commands, and add a "Local Tracking Database" section covering the `~/.polyskills/records.db` location, the best-effort guarantee, `--no-tracking`, the `records` listing and detail views, and the Python API - CHANGELOG: add a `v2.1.0 | 2026-06-27` section describing the tracking database, the `records` command, `--no-tracking`, `resolveCommit`, the offline test suite, and the SQLAlchemy dependency Co-authored-by: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 41 +++++++++++++++++++++++++++++++++- 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6feeaf9..9c74992 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,69 @@ under `h3` tags, while the `micro` and "version identifiers" are listed under `h +### PolySkills v2.1.0 | 2026-06-27 + +The `v2.1.0` line adds an opt-out **local tracking database** so `polyskills` can answer +"what did I install, from where, into which directory, and at which commit?" across every +project and machine. Each `polyskills manager` fetch is recorded by default into a strictly +normalised SQLite database at `~/.polyskills/records.db`, persisted through the SQLAlchemy ORM. +Tracking is best-effort and never breaks a fetch; pass `--no-tracking` to skip it. + +#### 🎉 Major Features + + * **Local tracking database.** A new `polyskills.database` sub-package records every fetch + into `~/.polyskills/records.db` (resolved OS-independently via `Path.home()`). The schema + is third-normal-form across seven tables (`sources`, `libraries`, `remotes`, `extensions`, + `installations`, `environments`, `fetch_events`) mapped with the SQLAlchemy 2.0 ORM. + Derived facts - first-fetched, last-updated, current commit SHA - are computed from the + append-only event log rather than cached on any row. + * **`records` sub-command.** A read-only `polyskills records [--name ] [--db ]` + command lists every tracked installation or shows one extension's full fetch history - + install location, resolved commit SHA, and first-fetched / last-updated timestamps. It + never creates the database when it is absent. + +#### ✨ Feature Enhancements + + * **`--no-tracking` flag.** `polyskills manager ... --no-tracking` skips the database write + for a single invocation; tracking is otherwise on by default. + * **`resolveCommit` on source managers.** `SourceManager.resolveCommit(remote, version)` + resolves a tag, branch, or SHA to its concrete commit SHA; `GithubManager` implements it via + the commits REST endpoint so the exact installed commit is recorded. + * **Best-effort by design.** Every tracking write is wrapped so a missing dependency, a + read-only home directory, or a locked database degrades to a single warning and never + alters the fetch outcome or the process exit status. + +#### 🧪 Testing & CI + + * **Offline database suite (`test_database.py`).** 17 hermetic cases cover path resolution, + schema and pragmas, cascade delete, event recording, strict normalisation, derived facts, + failure capture, graceful degradation, and the new CLI arguments. + * **Suite database redirect.** `tests/__init__.py` redirects `POLYSKILLS_DB_PATH` onto a + temporary file so no test ever reads or writes the real user database. + +#### 💣 Code Refactoring & Internals + + * Added `sqlalchemy>=2.0` to the runtime dependencies. + * Superseded and removed the earlier stdlib-`sqlite3` tracker prototype; its development + history is retained as a reachable git ancestor through the merge into this line. + +#### 📦 Installation + +```shell +$ pip install "polyskills==2.1.0" + +$ polyskills manager https://github.com// \ + --name sql-code-format \ + --destination ~/.claude/skills/sql-code-format \ + skills # tracked by default + +$ polyskills manager https://github.com// \ + --name sql-code-format --no-tracking skills # skip tracking + +$ polyskills records # list tracked installs +$ polyskills records --name sql-code-format # full history of one +``` + ### PolySkills v2.0.0 | 2026-06-07 The `v2.0.0` line redesigns `polyskills` from a "skills-only fetcher" into a portable, multi-library diff --git a/README.md b/README.md index bf8e60e..de63430 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ default) of Python language and AI tools. ### 🧰 CLI Overview Once installed, the `polyskills` command is exposed on the system `PATH`. Use the `--help` flag at any level to discover -documentation for all sub-commands and flags. The CLI is organized around four sub-commands: +documentation for all sub-commands and flags. The CLI is organized around five sub-commands: | 🔖 Command | 🎯 Purpose | | :--- | :--- | @@ -87,6 +87,7 @@ documentation for all sub-commands and flags. The CLI is organized around four s | `sources` | List the supported remote source providers (e.g., GitHub) and exit. | | `list` | Enumerate available extensions under a remote `` directory, no download. | | `manager` | Fetch a single extension (`skills` / `agents`) from the remote into a local directory. | +| `records` | Inspect the local tracking database (install location, commit SHA, timestamps). | ```shell $ polyskills --help # top level documentation and sub-commands @@ -174,6 +175,44 @@ The `--pagination` flag (defaults to `100`, the GitHub maximum) tunes how many e during enumeration. The `--version` flag (defaults to `master`) pins the fetch to an exact tag or commit SHA so the extension content is reproducible across systems. +### 🗃️ Local Tracking Database + +Every `polyskills manager` fetch is recorded by default into a local SQLite database at `~/.polyskills/records.db` +(resolved OS-independently via `Path.home()`) so you can answer "what did I install, from where, into which directory, +and at which commit?" across projects and machines. The schema is strictly normalised and managed through the +*SQLAlchemy* ORM, and every write is **best-effort**: a missing dependency, a read-only home directory, or a locked +database degrades to a single warning and never breaks the fetch. Pass `--no-tracking` to skip the write for a single +invocation. + +```shell +$ polyskills manager https://github.com/PyUtility/polyskills \ + --name sql-code-format \ + --destination ~/.claude/skills/sql-code-format \ + skills # recorded in ~/.polyskills/records.db + +$ polyskills manager https://github.com/PyUtility/polyskills \ + --name sql-code-format --no-tracking skills # not recorded +``` + +Use the read-only `records` sub-command to inspect what has been tracked. Without `--name` it lists every tracked +installation; with `--name` it shows one extension's full fetch history (install location, resolved commit SHA, and +first-fetched / last-updated timestamps). The command never creates the database when it is absent. + +```shell +$ polyskills records +>> Tracked installations (1): +>> >> 01. sql-code-format [skills] +>> remote : https://github.com/PyUtility/polyskills (extensions/skills) +>> path : /home/user/.claude/skills/sql-code-format +>> commit : 5b23f55de9a1... (requested master) +>> history : first 2026-06-27 12:53:52 | last 2026-06-27 12:53:52 | 1 fetch(es), last success + +$ polyskills records --name sql-code-format # full per-fetch history of one extension +``` + +The same data is available through the Python API - `record_fetch`, `list_installations`, and `get_installation` +in `polyskills.database` - so tracking can be embedded in automation without the CLI. + ### 🐍 Programmatic Usage In addition to the CLI, every primitive is exposed as a Python API so the same workflow can be embedded inside automation From 5a517a0078ddb076eda0061c2a3f284851b4f07d Mon Sep 17 00:00:00 2001 From: ZenithClown Date: Sat, 27 Jun 2026 19:44:57 +0530 Subject: [PATCH 17/17] =?UTF-8?q?=F0=9F=90=9B=F0=9F=9B=A0=EF=B8=8F=20fix(d?= =?UTF-8?q?atabase):=20harden=20audit=20findings=20(path=20key,=20UTC,=20r?= =?UTF-8?q?aces)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve the real defects an adversarial correctness review surfaced in the tracking layer. The headline fix removes a data-integrity bug where two different extensions installed into the same directory could be silently conflated under one identity; the rest harden timestamp handling, concurrency, and lookup hygiene. The security review found no high or medium issues. Verified with the offline suite (53 cases) and a new regression test. - key `Installation` on `(extension_id, install_path)` instead of `install_path` alone, and drop the branch that silently rewrote an installation's `extension_id`, so distinct extensions sharing a path keep independent histories (regression test added) - store `occurred_at` as naive UTC (`_utc_now` strips tzinfo) so a freshly recorded event and one reloaded from SQLite stay directly comparable and sortable, removing a latent aware/naive mismatch - recover `_get_or_create` from a concurrent-writer `IntegrityError` via a SAVEPOINT (`begin_nested`) plus re-query, so parallel processes record their fetch instead of silently dropping the row - default a missing `polyskills` version to `"unknown"` so the `environments` lookup is not bloated by NULL-distinct rows - verification: `python -m unittest` offline subset -> 53 OK; CI gate `flake8 --select=E9,F63,F7,F82` -> 0; naive-UTC round-trip and distinct-extension-same-path both confirmed Co-authored-by: Claude Opus 4.8 (1M context) --- polyskills/database/models.py | 33 ++++++++++++++++++++----------- polyskills/database/tracker.py | 22 ++++++++++++++------- polyskills/tests/test_database.py | 25 +++++++++++++++++++++++ 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/polyskills/database/models.py b/polyskills/database/models.py index e3c6838..21f0e16 100644 --- a/polyskills/database/models.py +++ b/polyskills/database/models.py @@ -56,18 +56,20 @@ def _utc_now() -> datetime: """ - Return the current time as a timezone-aware UTC - :class:`datetime.datetime`. + Return the current UTC time as a naive :class:`datetime.datetime`. - The helper is used as the default factory for - :attr:`FetchEvent.occurred_at` so every audit row is stamped in a - single, unambiguous time zone regardless of the host locale. + The value is the current instant in UTC with its ``tzinfo`` removed. + SQLite has no timezone-aware storage, so a stored timestamp always + reads back naive; producing a naive UTC value here keeps a freshly + recorded event and a reloaded event directly comparable and + sortable. Every audit timestamp in the database is therefore naive + UTC by convention. :rtype: datetime.datetime - :returns: The current instant in the UTC time zone. + :returns: The current UTC instant as a naive ``datetime``. """ - return datetime.now(timezone.utc) + return datetime.now(timezone.utc).replace(tzinfo = None) class Base(DeclarativeBase): @@ -234,10 +236,11 @@ class Installation(Base): A physical installation of an extension at one absolute path on the local filesystem. - The absolute destination path is the natural key and is enforced - unique so repeated fetches into the same directory append a new - :class:`FetchEvent` to the existing installation rather than - creating a duplicate location. + The natural key is the pair ``(extension_id, install_path)``: the + same extension re-fetched into the same directory reuses its + installation row (appending a new :class:`FetchEvent`), while a + different extension fetched into the same path gets its own + installation so the two histories are never conflated. :type extension_id: int :param extension_id: Foreign key into :class:`Extension`. @@ -248,13 +251,19 @@ class Installation(Base): """ __tablename__ = "installations" + __table_args__ = ( + UniqueConstraint( + "extension_id", "install_path", + name = "uq_installation_extension_path" + ), + ) id : Mapped[int] = mapped_column(primary_key = True) extension_id : Mapped[int] = mapped_column( ForeignKey("extensions.id"), nullable = False, index = True ) install_path : Mapped[str] = mapped_column( - Text, nullable = False, unique = True + Text, nullable = False, index = True ) extension : Mapped["Extension"] = relationship( diff --git a/polyskills/database/tracker.py b/polyskills/database/tracker.py index 7570626..fcb5183 100644 --- a/polyskills/database/tracker.py +++ b/polyskills/database/tracker.py @@ -183,6 +183,8 @@ def _get_or_create( :returns: The existing or newly created mapped instance. """ + from sqlalchemy.exc import IntegrityError + instance = session.query(model).filter_by(**keys).one_or_none() if instance is not None: return instance @@ -192,8 +194,17 @@ def _get_or_create( params.update(defaults) instance = model(**params) - session.add(instance) - session.flush() + try: + with session.begin_nested(): + session.add(instance) + session.flush() + except IntegrityError: + # ? a concurrent writer inserted the same natural key between the + # ? lookup and the flush; the SAVEPOINT rolls back only this + # ? insert, leaving the surrounding transaction intact, then the + # ? now-committed row is re-fetched. + instance = session.query(model).filter_by(**keys).one() + return instance @@ -218,7 +229,7 @@ def _resolve_environment(session : Any) -> Any: try: from polyskills import __version__ as polyskills_version except Exception: # pragma: no cover - defensive guard - polyskills_version = None + polyskills_version = "unknown" return _get_or_create( session, Environment, @@ -365,11 +376,8 @@ def record_fetch( ) installation = _get_or_create( session, Installation, - defaults = {"extension_id": extension.id}, - install_path = install_path, + extension_id = extension.id, install_path = install_path, ) - if installation.extension_id != extension.id: - installation.extension_id = extension.id environment = _resolve_environment(session) diff --git a/polyskills/tests/test_database.py b/polyskills/tests/test_database.py index dbd80ca..12386d9 100644 --- a/polyskills/tests/test_database.py +++ b/polyskills/tests/test_database.py @@ -289,6 +289,31 @@ def test_repeated_fetch_reuses_rows_and_normalises(self) -> None: _engine.dispose() + def test_distinct_extensions_same_path_not_conflated(self) -> None: + """ + Two different extensions fetched into the same install path must + yield two installations with independent histories, never one + installation silently relabelled to the second extension. + """ + + self._record( + name = "skill-a", install_path = "/shared/path", + resolved_commit_sha = "aaa" + ) + self._record( + name = "skill-b", install_path = "/shared/path", + resolved_commit_sha = "bbb" + ) + + rows = db.list_installations(database_path = self.db) + shared = [r for r in rows if r["install_path"] == "/shared/path"] + self.assertEqual( + sorted(r["name"] for r in shared), ["skill-a", "skill-b"] + ) + for row in shared: + self.assertEqual(row["fetch_count"], 1) + + def test_derived_first_last_and_latest_sha(self) -> None: """ The read summary must derive first-fetched, last-updated, and