diff --git a/.claude/hooks/session-start.sh b/.claude/hooks/session-start.sh new file mode 100755 index 0000000..e095ebb --- /dev/null +++ b/.claude/hooks/session-start.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Install deps + wire prek git hooks so cloud commits run the same checks as local. +# Scoped to remote (web/cloud) sessions; remove the guard to run locally too. +set -euo pipefail +[ "${CLAUDE_CODE_REMOTE:-}" != "true" ] && exit 0 +cd "${CLAUDE_PROJECT_DIR:-.}" + +export PATH="$HOME/.local/bin:$PATH" +line='export PATH="$HOME/.local/bin:$PATH"' +if [ -n "${CLAUDE_ENV_FILE:-}" ] && ! grep -qF "$line" "$CLAUDE_ENV_FILE" 2>/dev/null; then + echo "$line" >> "$CLAUDE_ENV_FILE" +fi + +# --- Install deps for the TypeScript stack --- +command -v bun >/dev/null || curl -fsSL https://bun.sh/install | bash +export PATH="$HOME/.bun/bin:$PATH" +bun install + +# Install prek (Rust binary, language-agnostic), then wire the git hooks. +command -v prek >/dev/null 2>&1 || curl -LsSf https://prek.j178.dev/install.sh | sh +prek install +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..ea39e04 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,15 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup|resume", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.sh" + } + ] + } + ] + } +} diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6418b62..52f3dda 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,6 +15,15 @@ permissions: id-token: write jobs: + ai-writing: + name: AI Writing Check + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@v7 + - uses: oven-sh/setup-bun@v2 + - run: bun scripts/check-ai-writing.ts + quality: runs-on: ubuntu-latest timeout-minutes: 10 diff --git a/.github/workflows/folder-size.yaml b/.github/workflows/folder-size.yaml new file mode 100644 index 0000000..be1ca13 --- /dev/null +++ b/.github/workflows/folder-size.yaml @@ -0,0 +1,27 @@ +name: Folder Size Check +on: + workflow_dispatch: + pull_request: + paths: + - '**.ts' + - '**.tsx' + - '**.js' + - '**.jsx' + - '**.mjs' + - '**.cjs' +jobs: + check-folder-sizes: + name: Folder File Count Limit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v7 + with: { fetch-depth: 0 } + - name: Check for oversized folders + run: | + if [ -n "${{ github.event.pull_request.base.sha }}" ]; then + mapfile -t files < <(git diff --name-only --diff-filter=d "${{ github.event.pull_request.base.sha }}...HEAD") + [ "${#files[@]}" -eq 0 ] && { echo "No files changed."; exit 0; } + scripts/check_folder_sizes.sh "${files[@]}" # self-filters non-source via SOURCE_EXTS + else + scripts/check_folder_sizes.sh --all + fi diff --git a/.github/workflows/large-files.yaml b/.github/workflows/large-files.yaml new file mode 100644 index 0000000..f6237bf --- /dev/null +++ b/.github/workflows/large-files.yaml @@ -0,0 +1,27 @@ +name: Large File Check +on: + workflow_dispatch: + pull_request: + paths: + - '**.ts' + - '**.tsx' + - '**.js' + - '**.jsx' + - '**.mjs' + - '**.cjs' +jobs: + check-file-sizes: + name: Source File Line Limit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v7 + with: { fetch-depth: 0 } + - name: Check for large source files + run: | + if [ -n "${{ github.event.pull_request.base.sha }}" ]; then + mapfile -t files < <(git diff --name-only --diff-filter=d "${{ github.event.pull_request.base.sha }}...HEAD") + [ "${#files[@]}" -eq 0 ] && { echo "No files changed."; exit 0; } + scripts/check_large_files.sh "${files[@]}" # self-filters non-source via SOURCE_EXTS + else + scripts/check_large_files.sh --all + fi diff --git a/.gitignore b/.gitignore index 7bc3978..c31dd12 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ test-results/ playwright-report/ *.tsbuildinfo +# bun is only a runtime for scripts/check-ai-writing.ts; npm manages deps +bun.lock + diff --git a/prek.toml b/prek.toml new file mode 100644 index 0000000..091fb33 --- /dev/null +++ b/prek.toml @@ -0,0 +1,30 @@ +[[repos]] +repo = "https://github.com/pre-commit/pre-commit-hooks" +rev = "v4.6.0" +hooks = [ + { id = "check-added-large-files" }, +] + +[[repos]] +repo = "local" +hooks = [ + { id = "ai-writing-check", name = "AI writing check", entry = "bun scripts/check-ai-writing.ts", language = "system", pass_filenames = false, always_run = true }, +] + +# ── Source-size guardrails (mirror GitHub Actions) ──────────────── +[[repos]] +repo = "local" + +[[repos.hooks]] +id = "check-large-files" +name = "fail if any source file exceeds the line-count error threshold" +language = "system" +entry = "scripts/check_large_files.sh" +files = "\\.(ts|tsx|js|jsx|mjs|cjs)$" + +[[repos.hooks]] +id = "check-folder-sizes" +name = "fail if any source folder exceeds the file-count error threshold" +language = "system" +entry = "scripts/check_folder_sizes.sh" +files = "\\.(ts|tsx|js|jsx|mjs|cjs)$" diff --git a/scripts/check-ai-writing.ts b/scripts/check-ai-writing.ts new file mode 100644 index 0000000..c5f7d0d --- /dev/null +++ b/scripts/check-ai-writing.ts @@ -0,0 +1,95 @@ +#!/usr/bin/env bun +import { readdirSync, statSync, readFileSync } from 'node:fs' +import { join, relative, extname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +const REPO_ROOT = resolve(fileURLToPath(import.meta.url), '../..') +const SELF = resolve(fileURLToPath(import.meta.url)) +const EM_DASH = '\u2014' +const ROOT_SKIP = new Set([ + '.git', + '.venv', + 'node_modules', + 'dist', + 'build', + 'target', + '.next', + 'coverage', + '.cache' +]) +const REC_SKIP = new Set(['__pycache__', 'node_modules', 'dist', 'target', '.next']) +const SKIP_EXT = new Set([ + '.png', + '.jpg', + '.jpeg', + '.gif', + '.webp', + '.ico', + '.svg', + '.mp4', + '.mov', + '.mp3', + '.woff', + '.woff2', + '.ttf', + '.otf', + '.pdf', + '.zip', + '.gz', + '.bin', + '.lock' +]) +const CONTRASTIVE: RegExp[] = [ + /\bnot (?:just|only|merely|simply)\b[^.?!\n]{0,60}?\bbut\b/i, + /\b(?:it'?s|that'?s|this is)\s+not\b[^.?!\n]{0,60}?,?\s*(?:it'?s|that'?s|they'?re)\b/i, + /\b(?:isn'?t|aren'?t|wasn'?t|weren'?t)\s+(?:just|only|merely|simply)\b/i, + /\b(?:isn'?t|aren'?t)\s+(?:just\s+)?about\b[^.?!\n]{0,60}?\bit'?s about\b/i, + /\bmore than just\b/i, + /\bless about\b[^.?!\n]{0,60}?\bmore about\b/i, + /\bnot\b[^.?!\n]{0,40}?\bso much as\b/i, + /\bgoes? beyond\b/i // noisiest; drop if it over-flags +] + +function* walk(dir: string): Generator { + for (const name of readdirSync(dir)) { + const full = join(dir, name) + const top = relative(REPO_ROOT, full).split('/')[0] + if (ROOT_SKIP.has(top)) continue + if (statSync(full).isDirectory()) { + if (REC_SKIP.has(name)) continue + yield* walk(full) + } else { + if (resolve(full) === SELF || SKIP_EXT.has(extname(full).toLowerCase())) continue + yield full + } + } +} + +const em: string[] = [], + contra: string[] = [] +for (const file of walk(REPO_ROOT)) { + let text: string + try { + text = readFileSync(file, 'utf8') + } catch { + continue + } + const rel = relative(REPO_ROOT, file) + text.split('\n').forEach((line, i) => { + if (line.includes(EM_DASH)) em.push(`${rel}:${i + 1}: ${line.trim()}`) + if (CONTRASTIVE.some((re) => re.test(line))) contra.push(`${rel}:${i + 1}: ${line.trim()}`) + }) +} +if (em.length || contra.length) { + if (em.length) { + console.log('AI writing check failed: em dash (U+2014) detected') + console.log(em.join('\n')) + } + if (contra.length) { + console.log("AI writing check failed: contrastive parallelism ('not just X, but Y') detected") + console.log(contra.join('\n')) + } + console.log('Remove the flagged construction or explain why it is acceptable.') + process.exit(1) +} +console.log('AI writing check passed.') diff --git a/scripts/check_folder_sizes.sh b/scripts/check_folder_sizes.sh new file mode 100755 index 0000000..62a063c --- /dev/null +++ b/scripts/check_folder_sizes.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +# +# Enforce a file-count limit on source folders (non-recursive). Shared by +# .github/workflows/folder-size.yaml and prek.toml. +# check_folder_sizes.sh [file ...] # check folders containing the given files +# check_folder_sizes.sh --all # scan every folder +# Thresholds via FOLDER_WARN_THRESHOLD / FOLDER_ERROR_THRESHOLD. +set -euo pipefail + +WARN_THRESHOLD="${FOLDER_WARN_THRESHOLD:-20}" +ERROR_THRESHOLD="${FOLDER_ERROR_THRESHOLD:-35}" + +# --- Set to YOUR stack (pick one): TS=(ts tsx js jsx mjs cjs) Py=(py) Rust=(rs) --- +SOURCE_EXTS=(ts tsx js jsx mjs cjs) + +GRANDFATHERED=() # folders allowed to exceed (warn instead of error) + +EXCLUDE_PATH_RE='(^|/)(node_modules|vendor|dist|build|\.next|coverage|storybook-static|visual-tests|e2e|tests|test|__tests__|\.git)(/|$)' +GENERATED_RE='(^|/)(alembic[^/]*/versions|migrations)(/|$)' + +is_grandfathered() { local t="$1"; for g in "${GRANDFATHERED[@]}"; do [ "$t" = "$g" ] && return 0; done; return 1; } +should_skip() { + local f="$1"; [ -z "$f" ] && return 0; [ "$f" = "." ] && return 0 + echo "$f" | grep -qE "$EXCLUDE_PATH_RE" && return 0 + echo "$f" | grep -qE "$GENERATED_RE" && return 0 + return 1 +} + +count_folder() { + local fa=() first=1 + for e in "${SOURCE_EXTS[@]}"; do + if [ "$first" = 1 ]; then fa+=( -name "*.$e" ); first=0; else fa+=( -o -name "*.$e" ); fi + done + find "$1" -mindepth 1 -maxdepth 1 -type f \( "${fa[@]}" \) \ + -not -name 'test_*' -not -name '*_test.*' -not -name '*.test.*' \ + -not -name '*.spec.*' -not -name '*.d.ts' -not -name 'conftest.py' | wc -l +} + +collect_all_folders() { + find . -type d -not -path './.git/*' -not -path '*/node_modules/*' \ + -not -path '*/dist/*' -not -path '*/build/*' | sed 's|^\./||' +} + +folder_list=$(mktemp); trap 'rm -f "$folder_list"' EXIT +if [ "${1:-}" = "--all" ]; then collect_all_folders > "$folder_list" +else for f in "$@"; do [ -z "$f" ] && continue; dirname "$f"; done | sort -u > "$folder_list"; fi + +warnings=0; errors=0; warn_list=""; error_list="" +while IFS= read -r folder; do + folder="${folder#./}" + should_skip "$folder" && continue + [ ! -d "$folder" ] && continue + count=$(count_folder "$folder") + if [ "$count" -gt "$ERROR_THRESHOLD" ]; then + if is_grandfathered "$folder"; then + warnings=$((warnings + 1)); warn_list="${warn_list}| \`${folder}/\` | ${count} | :warning: exceeds ${ERROR_THRESHOLD} (grandfathered) |\n" + else + errors=$((errors + 1)); error_list="${error_list}| \`${folder}/\` | ${count} | :x: exceeds ${ERROR_THRESHOLD} |\n" + fi + elif [ "$count" -gt "$WARN_THRESHOLD" ]; then + warnings=$((warnings + 1)); warn_list="${warn_list}| \`${folder}/\` | ${count} | :warning: exceeds ${WARN_THRESHOLD} |\n" + fi +done < "$folder_list" + +if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && { [ "$errors" -gt 0 ] || [ "$warnings" -gt 0 ]; }; then + { + echo "## Folder Size Report"; echo "" + echo "| Folder | Files | Status |"; echo "|--------|-------|--------|" + [ "$errors" -gt 0 ] && printf '%b' "$error_list" + [ "$warnings" -gt 0 ] && printf '%b' "$warn_list" + echo ""; echo "**Thresholds:** warn at ${WARN_THRESHOLD} files, error at ${ERROR_THRESHOLD} files. Immediate source children only - subfolders are the fix." + } >> "$GITHUB_STEP_SUMMARY" +fi + +format_list() { if command -v column >/dev/null 2>&1; then printf '%b' "$1" | column -t -s '|'; else printf '%b' "$1"; fi; } + +if [ "$errors" -gt 0 ]; then echo "::error::${errors} folder(s) exceed the ${ERROR_THRESHOLD}-file error threshold" >&2; format_list "$error_list" >&2; fi +if [ "$warnings" -gt 0 ]; then echo "::warning::${warnings} folder(s) exceed the ${WARN_THRESHOLD}-file warning threshold" >&2; format_list "$warn_list" >&2; fi +if [ "$errors" -eq 0 ] && [ "$warnings" -eq 0 ]; then echo "All folders are within the ${WARN_THRESHOLD}-file limit."; fi + +[ "$errors" -gt 0 ] && exit 1 +exit 0 diff --git a/scripts/check_large_files.sh b/scripts/check_large_files.sh new file mode 100755 index 0000000..b52dcad --- /dev/null +++ b/scripts/check_large_files.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# +# Enforce a line-count limit on source files. Shared by +# .github/workflows/large-files.yaml and prek.toml. +# check_large_files.sh [file ...] # check the given files +# check_large_files.sh --all # scan the whole tree +# Thresholds via LARGE_FILE_WARN_THRESHOLD / LARGE_FILE_ERROR_THRESHOLD. +# Exit 1 on errors, 0 on warnings-only or clean. +set -euo pipefail + +WARN_THRESHOLD="${LARGE_FILE_WARN_THRESHOLD:-500}" +ERROR_THRESHOLD="${LARGE_FILE_ERROR_THRESHOLD:-800}" + +# --- Set to YOUR stack (pick one): TS=(ts tsx js jsx mjs cjs) Py=(py) Rust=(rs) --- +SOURCE_EXTS=(ts tsx js jsx mjs cjs) + +declare -A _EXT_SET=() +for e in "${SOURCE_EXTS[@]}"; do _EXT_SET[".$e"]=1; done + +# Trim to your stack: node_modules|dist|.next (TS), __pycache__|.venv (Py), target (Rust) +EXCLUDE_PATH_RE='(^|/)(node_modules|vendor|dist|build|\.next|coverage|storybook-static|visual-tests|e2e|tests|test|__tests__|\.git)(/|$)' +GENERATED_RE='(^|/)(alembic[^/]*/versions|migrations)(/|$)' +EXCLUDE_NAME_RE='(.+\.test\..+|.+\.spec\..+|.+\.d\.ts)$' + +is_source_file() { local ext=".${1##*.}"; [ -n "${_EXT_SET[$ext]:-}" ]; } + +is_excluded() { + local f="$1" base + echo "$f" | grep -qE "$EXCLUDE_PATH_RE" && return 0 + echo "$f" | grep -qE "$GENERATED_RE" && return 0 + base=$(basename "$f") + echo "$base" | grep -qE "$EXCLUDE_NAME_RE" && return 0 + return 1 +} + +collect_all() { + local fa=() first=1 + for e in "${SOURCE_EXTS[@]}"; do + if [ "$first" = 1 ]; then fa+=( -name "*.$e" ); first=0; else fa+=( -o -name "*.$e" ); fi + done + find . -type f \( "${fa[@]}" \) \ + -not -path './.git/*' -not -path '*/node_modules/*' \ + -not -path '*/dist/*' -not -path '*/build/*' \ + | sed 's|^\./||' +} + +files=() +if [ "${1:-}" = "--all" ]; then mapfile -t files < <(collect_all); else files=("$@"); fi + +warnings=0; errors=0; warn_list=""; error_list="" +for file in "${files[@]}"; do + [ -z "$file" ] && continue + [ ! -f "$file" ] && continue + is_source_file "$file" || continue + is_excluded "$file" && continue + lines=$(wc -l < "$file") + if [ "$lines" -gt "$ERROR_THRESHOLD" ]; then + errors=$((errors + 1)) + error_list="${error_list}| \`${file}\` | ${lines} | :x: exceeds ${ERROR_THRESHOLD} |\n" + elif [ "$lines" -gt "$WARN_THRESHOLD" ]; then + warnings=$((warnings + 1)) + warn_list="${warn_list}| \`${file}\` | ${lines} | :warning: exceeds ${WARN_THRESHOLD} |\n" + fi +done + +if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && { [ "$errors" -gt 0 ] || [ "$warnings" -gt 0 ]; }; then + { + echo "## Large File Report"; echo "" + echo "| File | Lines | Status |"; echo "|------|-------|--------|" + [ "$errors" -gt 0 ] && printf '%b' "$error_list" + [ "$warnings" -gt 0 ] && printf '%b' "$warn_list" + echo ""; echo "**Thresholds:** warn at ${WARN_THRESHOLD} lines, error at ${ERROR_THRESHOLD} lines" + } >> "$GITHUB_STEP_SUMMARY" +fi + +format_list() { if command -v column >/dev/null 2>&1; then printf '%b' "$1" | column -t -s '|'; else printf '%b' "$1"; fi; } + +if [ "$errors" -gt 0 ]; then echo "::error::${errors} file(s) exceed the ${ERROR_THRESHOLD}-line error threshold" >&2; format_list "$error_list" >&2; fi +if [ "$warnings" -gt 0 ]; then echo "::warning::${warnings} file(s) exceed the ${WARN_THRESHOLD}-line warning threshold" >&2; format_list "$warn_list" >&2; fi +if [ "$errors" -eq 0 ] && [ "$warnings" -eq 0 ]; then echo "All source files are within the ${WARN_THRESHOLD}-line limit."; fi + +[ "$errors" -gt 0 ] && exit 1 +exit 0