From a4f4aa72e48cc157d5bbe04ba797a55e84255481 Mon Sep 17 00:00:00 2001 From: CraftBot Date: Sat, 20 Jun 2026 16:09:16 +0900 Subject: [PATCH 1/3] fix relevance calculation and added bm25 --- agent_core/core/impl/memory/bm25_index.py | 113 +++++ .../core/impl/memory/entity_extractor.py | 85 ++++ agent_core/core/impl/memory/manager.py | 408 +++++++++++++++--- requirements.txt | 1 + 4 files changed, 554 insertions(+), 53 deletions(-) create mode 100644 agent_core/core/impl/memory/bm25_index.py create mode 100644 agent_core/core/impl/memory/entity_extractor.py diff --git a/agent_core/core/impl/memory/bm25_index.py b/agent_core/core/impl/memory/bm25_index.py new file mode 100644 index 00000000..1476cf44 --- /dev/null +++ b/agent_core/core/impl/memory/bm25_index.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +""" +In-memory BM25 keyword index for memory chunks. + +Sits alongside ChromaDB to backstop semantic search on terms vector embeddings +struggle with (proper nouns, dates, IDs, code identifiers). The index is fully +rebuilt from the current chunk set on every refresh — at ~200 memory items it +costs <50ms and avoids the complexity of incremental BM25 updates. +""" + +from __future__ import annotations + +import re +import threading +from typing import Dict, List, Optional, Tuple + +try: + from rank_bm25 import BM25Okapi + _HAS_BM25 = True +except ImportError: + BM25Okapi = None + _HAS_BM25 = False + +from agent_core.utils.logger import logger + + +_TOKEN_RE = re.compile(r"[A-Za-z0-9_]+") + + +def tokenize(text: str) -> List[str]: + """Lowercase word/number tokenizer. Keeps identifiers intact.""" + if not text: + return [] + return [t.lower() for t in _TOKEN_RE.findall(text)] + + +class BM25Index: + """Thread-safe BM25 index keyed by chunk_id. + + On a fresh install or when ``rank_bm25`` is not installed, BM25 retrieval + silently degrades to an empty result set. The MemoryManager then falls back + to pure vector search, so retrieval keeps working — just without the + keyword channel. + """ + + def __init__(self) -> None: + self._lock = threading.Lock() + self._chunk_ids: List[str] = [] + self._tokenized: List[List[str]] = [] + self._bm25: Optional["BM25Okapi"] = None + + def rebuild(self, chunks: Dict[str, str]) -> None: + """Rebuild the index from ``chunk_id -> raw text``. + + Args: + chunks: Mapping of chunk_id to the searchable text body. + """ + with self._lock: + self._chunk_ids = list(chunks.keys()) + self._tokenized = [tokenize(chunks[cid]) for cid in self._chunk_ids] + + if not _HAS_BM25: + self._bm25 = None + return + + if not self._tokenized: + self._bm25 = None + return + + # rank_bm25 raises on empty docs; replace with a single sentinel token + sanitized = [doc if doc else ["__empty__"] for doc in self._tokenized] + try: + self._bm25 = BM25Okapi(sanitized) + except Exception as e: + logger.warning(f"[BM25Index] Failed to build index: {e}") + self._bm25 = None + + def search(self, query: str, top_k: int = 20) -> List[Tuple[str, float]]: + """Return ``[(chunk_id, score)]`` sorted high-to-low. Empty when index unavailable.""" + if not query or not query.strip(): + return [] + + with self._lock: + if self._bm25 is None or not self._chunk_ids: + return [] + + tokens = tokenize(query) + if not tokens: + return [] + + try: + scores = self._bm25.get_scores(tokens) + except Exception as e: + logger.warning(f"[BM25Index] Query failed: {e}") + return [] + + indexed = [ + (self._chunk_ids[i], float(scores[i])) + for i in range(len(self._chunk_ids)) + if scores[i] > 0 + ] + indexed.sort(key=lambda x: x[1], reverse=True) + return indexed[:top_k] + + @property + def size(self) -> int: + with self._lock: + return len(self._chunk_ids) + + @property + def is_available(self) -> bool: + """True when rank_bm25 is installed AND the index has documents.""" + return _HAS_BM25 and self.size > 0 diff --git a/agent_core/core/impl/memory/entity_extractor.py b/agent_core/core/impl/memory/entity_extractor.py new file mode 100644 index 00000000..d15f9f1f --- /dev/null +++ b/agent_core/core/impl/memory/entity_extractor.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" +Lightweight heuristic entity extractor for memory chunks. + +This is intentionally simple — Phase 1 just needs to surface proper-noun-like +tokens so they end up in chunk metadata (and in the BM25 corpus). Higher-quality +LLM-based NER is a future phase. + +The extractor pulls: +- Capitalised multi-word sequences (proper nouns) +- Tokens that look like identifiers (CamelCase, snake_case with caps) +- Quoted strings + +Stopword filtering trims common English starters that get capitalised at +sentence boundaries. +""" + +from __future__ import annotations + +import re +from typing import List + +_STOP = { + "the", "a", "an", "and", "or", "but", "of", "in", "on", "at", "to", "for", + "with", "by", "from", "as", "is", "are", "was", "were", "be", "been", "being", + "have", "has", "had", "do", "does", "did", "will", "would", "should", "could", + "may", "might", "must", "can", "i", "you", "he", "she", "it", "we", "they", + "this", "that", "these", "those", "user", "agent", "task", "action", "event", + "memory", "system", "note", "today", "yesterday", "tomorrow", "monday", + "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", + "january", "february", "march", "april", "may", "june", "july", "august", + "september", "october", "november", "december", +} + +# Capitalised words (incl. CamelCase), optionally chained: "Trading View", +# "OpenAI", "CraftBot", "John Doe" +_PROPER_NOUN_RE = re.compile( + r"\b[A-Z][A-Za-z0-9]*(?:[ \-_][A-Z][A-Za-z0-9]*)*\b" +) + +# Quoted strings (single or double) +_QUOTED_RE = re.compile(r"\"([^\"]{2,40})\"|'([^']{2,40})'") + + +def extract_entities(text: str, max_entities: int = 12) -> List[str]: + """Extract candidate entity strings from text. + + Returns a deduplicated, order-preserving list. The cap exists so chunk + metadata stays compact (ChromaDB stores it for every chunk). + """ + if not text: + return [] + + seen: set[str] = set() + out: List[str] = [] + + for match in _PROPER_NOUN_RE.finditer(text): + candidate = match.group(0).strip() + if not candidate: + continue + lowered = candidate.lower() + if lowered in _STOP: + continue + # Drop single-letter or pure-numeric tokens + if len(candidate) < 2: + continue + if candidate.isdigit(): + continue + if lowered in seen: + continue + seen.add(lowered) + out.append(candidate) + if len(out) >= max_entities: + return out + + for match in _QUOTED_RE.finditer(text): + candidate = (match.group(1) or match.group(2) or "").strip() + if not candidate or candidate.lower() in seen: + continue + seen.add(candidate.lower()) + out.append(candidate) + if len(out) >= max_entities: + break + + return out diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py index 0ae89563..7b41c1a8 100644 --- a/agent_core/core/impl/memory/manager.py +++ b/agent_core/core/impl/memory/manager.py @@ -16,16 +16,42 @@ from __future__ import annotations import hashlib +import math import re import uuid from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import chromadb from agent_core.utils.logger import logger +from agent_core.core.impl.memory.bm25_index import BM25Index +from agent_core.core.impl.memory.entity_extractor import extract_entities + + +# Files that are flat lists of "[timestamp] [category] content" items. +# These get per-item chunking so each fact has its own embedding, instead of +# the whole list collapsing into a single section chunk under "## Memory". +PER_ITEM_FILES = frozenset({"MEMORY.md", "EVENT_UNPROCESSED.md"}) + +# Matches a memory item line. Tolerates both "/" and "-" date separators and +# either "[YYYY-MM-DD HH:MM:SS]" (MEMORY.md) or "[YYYY/MM/DD HH:MM:SS]" +# (EVENT_UNPROCESSED.md). Captures: timestamp, category, content. +MEMORY_ITEM_LINE_RE = re.compile( + r"^\s*\[(\d{4}[-/]\d{2}[-/]\d{2}[ T]\d{2}:\d{2}:\d{2})\]\s+\[([\w\-]+)\]\s*:?\s*(.+?)\s*$" +) + +# Hybrid-retrieval weights. Vector is primary signal, BM25 backstops proper +# nouns and dates, recency breaks ties on equally-relevant memories. +HYBRID_WEIGHTS = { + "vector": 0.55, + "bm25": 0.30, + "recency": 0.15, +} +# Days until recency contribution halves. exp(-30/30) ≈ 0.37. +RECENCY_HALF_LIFE_DAYS = 30.0 # ───────────────────────────── Data Classes ───────────────────────────── @@ -140,8 +166,13 @@ class MemoryManager: manager.update() """ - COLLECTION_NAME = "agent_memory" - FILE_INDEX_COLLECTION = "agent_memory_file_index" + # v2 collections use cosine distance and per-item chunking. The "_v2" + # suffix forces a clean rebuild on first run with the new code — old + # "agent_memory" collections are left intact but unused (so a downgrade + # is non-destructive). Drop the old collections manually if disk is + # tight; the manager never reads them. + COLLECTION_NAME = "agent_memory_v2" + FILE_INDEX_COLLECTION = "agent_memory_file_index_v2" def __init__( self, @@ -164,23 +195,34 @@ def __init__( self.chunk_size_limit = chunk_size_limit self.chunk_overlap = chunk_overlap - # Initialize ChromaDB (uses built-in default embeddings) + # Initialize ChromaDB (uses built-in default embeddings). + # hnsw:space=cosine — cosine similarity gives well-scaled scores in + # [0,1] for the hybrid retriever and behaves better than L2 on the + # short factual snippets that dominate MEMORY.md. self.chroma_client = chromadb.PersistentClient(path=chroma_path) self.collection = self.chroma_client.get_or_create_collection( name=self.COLLECTION_NAME, - metadata={"description": "Agent file system memory chunks"}, + metadata={ + "description": "Agent file system memory chunks (v2)", + "hnsw:space": "cosine", + }, ) # File index collection (tracks which files are indexed and their hashes) self.file_index_collection = self.chroma_client.get_or_create_collection( name=self.FILE_INDEX_COLLECTION, - metadata={"description": "File index for incremental updates"}, + metadata={"description": "File index for incremental updates (v2)"}, ) # In-memory cache of file indices self._file_index_cache: Dict[str, FileIndex] = {} self._load_file_index_cache() + # BM25 keyword index — mirrors ChromaDB's chunk set. Rebuilt lazily + # on first query and after each index mutation. + self._bm25 = BM25Index() + self._bm25_dirty = True + logger.info( f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, ChromaDB: {chroma_path}" ) @@ -191,30 +233,34 @@ def retrieve( self, query: str, top_k: int = 5, - min_relevance: float = 0.0, + min_relevance: float = 0.55, file_filter: Optional[List[str]] = None, ) -> List[MemoryPointer]: """ Retrieve memory pointers relevant to the query. - This is the primary retrieval method. It returns lightweight pointers - that tell the agent where to find relevant information, not the full - content. The agent can then decide which chunks to read in full. + Uses a hybrid score: vector cosine similarity + BM25 keyword match + + recency boost. Candidate pool is the union of top-K from each + channel (Reciprocal-Rank-Fusion style); final ranking is the + weighted sum defined by ``HYBRID_WEIGHTS``. Args: query: The search query top_k: Maximum number of results to return - min_relevance: Minimum relevance score (0-1) to include + min_relevance: Minimum hybrid score (0-1) to include. + Default raised to 0.55 to match cosine-scaled scores; callers + that previously passed 0.0 still get sensible behaviour + because BM25 + recency lift relevant matches above the cut. file_filter: Optional list of file paths to search within Returns: - List of MemoryPointer objects, sorted by relevance (highest first) + List of MemoryPointer objects, sorted by relevance (highest first). + Result shape is unchanged from v1 — only the ranking improves. """ if not query or not query.strip(): logger.warning("Empty query provided to retrieve()") return [] - # Check if collection has any documents collection_count = self.collection.count() if collection_count == 0: logger.info( @@ -222,68 +268,186 @@ def retrieve( ) return [] - # Build where filter if file_filter provided + # Cast a wider net than top_k so the hybrid re-rank has signal to work + # with. ChromaDB and BM25 each return up to candidate_pool items. + candidate_pool = max(top_k * 4, 20) + where_filter = None if file_filter: where_filter = {"file_path": {"$in": file_filter}} - # Query ChromaDB logger.info(f"[MEMORY QUERY] Query: {query}") + + # ── Channel 1: vector similarity ── + vector_hits: Dict[str, Dict[str, Any]] = {} try: results = self.collection.query( query_texts=[query], - n_results=min(top_k, collection_count), + n_results=min(candidate_pool, collection_count), where=where_filter, include=["metadatas", "distances", "documents"], ) + ids = (results.get("ids") or [[]])[0] + metadatas = (results.get("metadatas") or [[]])[0] + distances = (results.get("distances") or [[]])[0] + for i, chunk_id in enumerate(ids): + meta = metadatas[i] if i < len(metadatas) else {} + distance = distances[i] if i < len(distances) else 1.0 + vector_hits[chunk_id] = { + "score": _cosine_distance_to_similarity(distance), + "metadata": meta, + "rank": i, + } except Exception as e: logger.error(f"Error querying ChromaDB: {e}") + # Continue — BM25 alone may still return useful results. + + # ── Channel 2: BM25 keyword search ── + self._ensure_bm25_built() + bm25_hits: Dict[str, Dict[str, Any]] = {} + bm25_raw = self._bm25.search(query, top_k=candidate_pool) + if bm25_raw: + max_bm25 = max(score for _, score in bm25_raw) or 1.0 + for rank, (chunk_id, score) in enumerate(bm25_raw): + bm25_hits[chunk_id] = { + "score": score / max_bm25, # min-max normalise to [0,1] + "rank": rank, + } + + # Union the candidate ids from both channels (RRF-style fusion). + candidate_ids = set(vector_hits) | set(bm25_hits) + if not candidate_ids: return [] - # Parse results into MemoryPointers - pointers: List[MemoryPointer] = [] + # If file_filter was set, BM25 may have returned chunks outside the + # filter — drop them by reading metadata for the missing ones. + if file_filter: + need_meta = [cid for cid in candidate_ids if cid not in vector_hits] + if need_meta: + missing_meta = self._fetch_metadata(need_meta) + candidate_ids = { + cid + for cid in candidate_ids + if ( + vector_hits.get(cid, {}).get("metadata", {}).get("file_path") + or missing_meta.get(cid, {}).get("file_path", "") + ) + in set(file_filter) + } + + # Pull metadata for any BM25-only hits so we can build pointers + age. + missing_ids = [cid for cid in candidate_ids if cid not in vector_hits] + extra_meta = self._fetch_metadata(missing_ids) if missing_ids else {} - if not results or not results.get("ids") or not results["ids"][0]: - return pointers + now = datetime.now(timezone.utc) + pointers: List[MemoryPointer] = [] - ids = results["ids"][0] - metadatas = results.get("metadatas", [[]])[0] - distances = results.get("distances", [[]])[0] + w = HYBRID_WEIGHTS + for chunk_id in candidate_ids: + meta = ( + vector_hits[chunk_id]["metadata"] + if chunk_id in vector_hits + else extra_meta.get(chunk_id, {}) + ) + if not meta: + continue - for i, chunk_id in enumerate(ids): - meta = metadatas[i] if i < len(metadatas) else {} + vector_score = vector_hits.get(chunk_id, {}).get("score", 0.0) + bm25_score = bm25_hits.get(chunk_id, {}).get("score", 0.0) + recency_score = _recency_score(meta.get("timestamp", ""), now) - # Convert distance to relevance score (ChromaDB uses L2 distance by default) - # Lower distance = more relevant, so we invert it - distance = distances[i] if i < len(distances) else 1.0 - relevance = 1.0 / (1.0 + distance) # Normalize to 0-1 range + final = ( + w["vector"] * vector_score + + w["bm25"] * bm25_score + + w["recency"] * recency_score + ) - if relevance < min_relevance: + if final < min_relevance: continue - pointer = MemoryPointer( - chunk_id=chunk_id, - file_path=meta.get("file_path", ""), - section_path=meta.get("section_path", ""), - title=meta.get("title", ""), - summary=meta.get("summary", ""), - relevance_score=relevance, - metadata={ - k: v - for k, v in meta.items() - if k not in ("file_path", "section_path", "title", "summary") - }, + pointers.append( + MemoryPointer( + chunk_id=chunk_id, + file_path=meta.get("file_path", ""), + section_path=meta.get("section_path", ""), + title=meta.get("title", ""), + summary=meta.get("summary", ""), + relevance_score=final, + metadata={ + k: v + for k, v in meta.items() + if k + not in ("file_path", "section_path", "title", "summary") + }, + ) ) - pointers.append(pointer) - # Sort by relevance (highest first) pointers.sort(key=lambda p: p.relevance_score, reverse=True) + pointers = pointers[:top_k] logger.info( - f"Retrieved {len(pointers)} memory pointers for query: {query[:50]}..." + f"Retrieved {len(pointers)} memory pointers " + f"(vector={len(vector_hits)}, bm25={len(bm25_hits)}) " + f"for query: {query[:50]}..." ) return pointers + # ───────────────────────── Hybrid retrieval helpers ───────────────────────── + + def _ensure_bm25_built(self) -> None: + """Rebuild the BM25 index if it's been invalidated since last build.""" + if not self._bm25_dirty: + return + try: + corpus = self._load_bm25_corpus() + self._bm25.rebuild(corpus) + self._bm25_dirty = False + logger.debug(f"[MEMORY] BM25 index rebuilt: {self._bm25.size} chunks") + except Exception as e: + logger.warning(f"[MEMORY] Failed to rebuild BM25 index: {e}") + # Leave the flag dirty so we retry on the next query. + + def _load_bm25_corpus(self) -> Dict[str, str]: + """Pull every chunk's searchable text from ChromaDB. + + We concatenate the document body, summary, and extracted_entities so + BM25 has the strongest possible keyword signal — especially proper + nouns that vector embeddings often miss. + """ + try: + result = self.collection.get( + include=["documents", "metadatas"], + ) + except Exception as e: + logger.warning(f"[MEMORY] BM25 corpus load failed: {e}") + return {} + + ids = result.get("ids") or [] + docs = result.get("documents") or [] + metas = result.get("metadatas") or [] + + corpus: Dict[str, str] = {} + for i, chunk_id in enumerate(ids): + body = docs[i] if i < len(docs) else "" + meta = metas[i] if i < len(metas) else {} + summary = meta.get("summary", "") + entities = meta.get("extracted_entities", "") + corpus[chunk_id] = f"{body}\n{summary}\n{entities}" + return corpus + + def _fetch_metadata(self, chunk_ids: List[str]) -> Dict[str, Dict[str, Any]]: + """Fetch metadata for a specific set of chunk ids.""" + if not chunk_ids: + return {} + try: + result = self.collection.get(ids=chunk_ids, include=["metadatas"]) + ids = result.get("ids") or [] + metas = result.get("metadatas") or [] + return {ids[i]: metas[i] for i in range(len(ids))} + except Exception as e: + logger.warning(f"[MEMORY] Metadata fetch failed: {e}") + return {} + def retrieve_full_content(self, chunk_id: str) -> Optional[str]: """ Retrieve the full content of a specific chunk by its ID. @@ -445,12 +609,17 @@ def clear(self) -> None: def _chunk_markdown(self, content: str, file_path: str) -> List[MemoryChunk]: """ - Split markdown content into semantic chunks based on headers. + Split markdown content into semantic chunks. + + Dispatches based on file shape: + - Flat per-item logs (MEMORY.md, EVENT_UNPROCESSED.md) → one chunk + per "[ts] [cat] content" line via :meth:`_chunk_memory_log`. + - Everything else → header-based section chunking (original + behaviour, unchanged). - This uses a hierarchical approach: - 1. Split by headers (##, ###, etc.) - 2. Each section becomes a chunk with its header path - 3. Large sections are further split with overlap + Per-item chunking is the Phase 1 fix for retrieval accuracy: in the + old section-based path, every memory item collapsed into a single + chunk under "## Memory" and the embedding represented the whole blob. Args: content: The markdown content to chunk @@ -459,6 +628,78 @@ def _chunk_markdown(self, content: str, file_path: str) -> List[MemoryChunk]: Returns: List of MemoryChunk objects """ + filename = Path(file_path).name + if filename in PER_ITEM_FILES: + return self._chunk_memory_log(content, file_path) + return self._chunk_by_sections(content, file_path) + + def _chunk_memory_log( + self, content: str, file_path: str + ) -> List[MemoryChunk]: + """One chunk per ``[ts] [cat] content`` line. + + Each line is short enough on its own (memory items are capped at + ~150 words by the memory-processor skill) that no further splitting + is needed. Lines that don't match the expected pattern — headers, + blank lines, the file's preamble — are skipped here; the file as a + whole is still in INDEX_TARGET_FILES so its preamble is captured + by the section chunker on other indexed files where appropriate. + + Per-chunk metadata carries timestamp, category, extracted_entities + (list of capitalised tokens / quoted strings) and an indexed_at + stamp. ``age_days`` is NOT stored — it's computed at query time + from ``timestamp`` so a stale index doesn't lock in old recency. + """ + chunks: List[MemoryChunk] = [] + now = datetime.utcnow().isoformat() + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or line.startswith(">"): + continue + match = MEMORY_ITEM_LINE_RE.match(line) + if not match: + continue + + timestamp_str, category, item_text = match.groups() + timestamp_iso = _normalize_timestamp(timestamp_str) + category = category.lower() + + # Body = the item content. Summary = first ~150 chars cleaned. + entities = extract_entities(item_text) + summary = self._create_summary(item_text) + + chunks.append( + MemoryChunk( + chunk_id=str(uuid.uuid4()), + file_path=file_path, + section_path=f"item:{category}", + title=category, + content=line, # keep the full bracketed line for grep parity + summary=summary, + content_hash=self._compute_content_hash(line), + file_modified_at="", + indexed_at=now, + metadata={ + "timestamp": timestamp_iso, + "category": category, + # ChromaDB metadata values must be primitives; serialise + # the entity list as a comma-joined string. The BM25 + # corpus and retrieval consumers parse it back. + "extracted_entities": ", ".join(entities), + "item_kind": "memory_log", + }, + ) + ) + + return chunks + + def _chunk_by_sections( + self, content: str, file_path: str + ) -> List[MemoryChunk]: + """Original header-based chunker. Preserves existing behaviour for + non-list markdown (AGENT.md, USER.md, PROACTIVE.md, ...). + """ chunks: List[MemoryChunk] = [] # Parse headers and their content @@ -752,6 +993,8 @@ def _index_file(self, file_path: Path) -> int: logger.error(f"Error adding chunks to ChromaDB: {e}") return 0 + self._bm25_dirty = True + # Update file index cache file_index = FileIndex( file_path=rel_path, @@ -787,6 +1030,7 @@ def _remove_file_from_index(self, file_path: str) -> None: # Remove from cache del self._file_index_cache[file_path] + self._bm25_dirty = True logger.debug(f"Removed {len(file_index.chunk_ids)} chunks for {file_path}") @@ -805,14 +1049,18 @@ def _clear_index(self) -> None: self.collection = self.chroma_client.get_or_create_collection( name=self.COLLECTION_NAME, - metadata={"description": "Agent file system memory chunks"}, + metadata={ + "description": "Agent file system memory chunks (v2)", + "hnsw:space": "cosine", + }, ) self.file_index_collection = self.chroma_client.get_or_create_collection( name=self.FILE_INDEX_COLLECTION, - metadata={"description": "File index for incremental updates"}, + metadata={"description": "File index for incremental updates (v2)"}, ) self._file_index_cache.clear() + self._bm25_dirty = True # ───────────────────────────── File Index Persistence ───────────────────────────── @@ -964,6 +1212,60 @@ def create_memory_processing_task( ) +# ───────────────────── Hybrid Retrieval Scoring Helpers ───────────────────── + + +def _cosine_distance_to_similarity(distance: float) -> float: + """Map ChromaDB's cosine distance to a [0,1] similarity score. + + ChromaDB returns ``1 - cosine_similarity`` when the collection is + configured with ``hnsw:space=cosine``. Clamp to handle floating-point + drift and the L2 fallback case (where distances can exceed 1). + """ + if distance is None: + return 0.0 + sim = 1.0 - float(distance) + if sim < 0.0: + return 0.0 + if sim > 1.0: + return 1.0 + return sim + + +def _normalize_timestamp(ts: str) -> str: + """Coerce '/' or 'T'-separated timestamps to canonical 'YYYY-MM-DD HH:MM:SS'. + + Returns an empty string when parsing fails — callers treat that as + "unknown age" and the recency channel contributes 0 for the chunk. + """ + if not ts: + return "" + cleaned = ts.replace("/", "-").replace("T", " ") + try: + dt = datetime.strptime(cleaned, "%Y-%m-%d %H:%M:%S") + return dt.strftime("%Y-%m-%d %H:%M:%S") + except ValueError: + return "" + + +def _recency_score(timestamp_iso: str, now: datetime) -> float: + """``exp(-age_days / RECENCY_HALF_LIFE_DAYS)`` — newer = closer to 1.0. + + Chunks without a parseable timestamp (e.g. AGENT.md sections) score 0 + so they neither help nor hurt the hybrid rank. + """ + if not timestamp_iso: + return 0.0 + try: + item_dt = datetime.strptime(timestamp_iso, "%Y-%m-%d %H:%M:%S") + except ValueError: + return 0.0 + if item_dt.tzinfo is None: + item_dt = item_dt.replace(tzinfo=timezone.utc) + age_days = max(0.0, (now - item_dt).total_seconds() / 86400.0) + return math.exp(-age_days / RECENCY_HALF_LIFE_DAYS) + + # ───────────────────────────── Testing / Demo ───────────────────────────── diff --git a/requirements.txt b/requirements.txt index 286fe9ca..ab68d367 100644 --- a/requirements.txt +++ b/requirements.txt @@ -52,3 +52,4 @@ pypdfium2 pdfminer.six pymupdf pypdf +rank_bm25 From 5d6c76a43607acb69b2cbe9bbb75f50f872c9b07 Mon Sep 17 00:00:00 2001 From: CraftBot Date: Sat, 20 Jun 2026 20:14:07 +0900 Subject: [PATCH 2/3] improved memory system --- agent_core/core/impl/context/engine.py | 58 ++++------ agent_core/core/impl/memory/manager.py | 142 +++++++++++++++++++++++-- app/main.py | 37 +++++++ 3 files changed, 194 insertions(+), 43 deletions(-) diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py index 2beab3f2..b61085e2 100644 --- a/agent_core/core/impl/context/engine.py +++ b/agent_core/core/impl/context/engine.py @@ -603,8 +603,12 @@ def _build_memory_query( ) -> Optional[str]: """Build a semantic query for memory retrieval. - Combines task instruction with recent conversation messages (both user - and agent) to provide better context for memory search. + Uses ONLY the latest user message. Agent messages are excluded — they + often restate or drift to adjacent topics and were observed dominating + the embedding (e.g. a proactive-tasks explanation poisoning an MCP + question). If no user message is available (background task, planner, + heartbeat), falls back to the task instruction, then to the explicit + query argument. Args: query: Optional explicit query string. @@ -613,7 +617,10 @@ def _build_memory_query( Returns: A query string suitable for semantic memory search, or None if no context. """ - # Get task instruction as the base query + latest_user_message = self._get_latest_user_message(session_id) + if latest_user_message: + return latest_user_message + session = get_session_or_none(session_id) if session and session.current_task: task_instruction = session.current_task.instruction @@ -621,55 +628,36 @@ def _build_memory_query( current_task = get_state().current_task task_instruction = current_task.instruction if current_task else None - if not task_instruction: - # Fall back to explicit query if no task - return query if query else None - - # Get recent conversation messages for additional context - recent_context = self._get_recent_conversation_for_memory(session_id, limit=5) - - if recent_context: - return f"{task_instruction}\n\nRecent conversation:\n{recent_context}" - else: + if task_instruction: return task_instruction - def _get_recent_conversation_for_memory( - self, session_id: Optional[str], limit: int = 5 - ) -> str: - """Get recent conversation messages for memory query context. + return query if query else None - Args: - session_id: Optional session ID for session-specific event stream. - limit: Maximum number of messages to include. + def _get_latest_user_message(self, session_id: Optional[str]) -> str: + """Return the most recent user message text, or empty string if none. - Returns: - Formatted string of recent user and agent messages. + Walks the conversation-history buffer from newest to oldest and returns + the first event whose kind contains 'user message'. Agent messages are + skipped entirely. """ try: event_stream_manager = self.state_manager.event_stream_manager if not event_stream_manager: return "" - # Get messages from conversation history (includes both user and agent) recent_messages = event_stream_manager.get_recent_conversation_messages( - limit + limit=20 ) if not recent_messages: return "" - # Format messages simply for semantic search - lines = [] - for event in recent_messages: - # Simplify the kind label for the query - if "user message" in event.kind: - lines.append(f"User: {event.message}") - elif "agent message" in event.kind: - lines.append(f"Agent: {event.message}") - - return "\n".join(lines) + for event in reversed(recent_messages): + if "user message" in event.kind and event.message: + return event.message.strip() + return "" except Exception as e: - logger.warning(f"[MEMORY] Failed to get recent conversation: {e}") + logger.warning(f"[MEMORY] Failed to get latest user message: {e}") return "" def get_memory_context( diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py index 7b41c1a8..a93c5a0d 100644 --- a/agent_core/core/impl/memory/manager.py +++ b/agent_core/core/impl/memory/manager.py @@ -53,6 +53,26 @@ # Days until recency contribution halves. exp(-30/30) ≈ 0.37. RECENCY_HALF_LIFE_DAYS = 30.0 +# ───────────────────────── Embedding Model ───────────────────────── +# ChromaDB's default is sentence-transformers/all-MiniLM-L6-v2 (22M params, +# 2021). Verbatim self-similarity scores ~0.65; topical matches sit at +# ~0.50; noise floor is ~0.45. That ~0.05 dynamic range can't support +# accurate retrieval no matter how the downstream scoring is tuned. +# +# BGE-small-en-v1.5 (33M params, 384-dim, same dimensionality as MiniLM +# so we don't break anything else) typically scores ~0.92 on verbatim +# matches, ~0.75 on topical, and drops below 0.50 for unrelated content. +# That's the dynamic range hybrid scoring actually needs. +# +# Override via the MEMORY_EMBEDDING_MODEL env var if you want to try +# bge-base-en-v1.5 (better, slower), e5-small-v2, or any other +# sentence-transformers model. Set to "default" to use ChromaDB's +# bundled ONNX MiniLM. +import os as _os +MEMORY_EMBEDDING_MODEL = _os.environ.get( + "MEMORY_EMBEDDING_MODEL", "BAAI/bge-small-en-v1.5" +) + # ───────────────────────────── Data Classes ───────────────────────────── @@ -195,22 +215,33 @@ def __init__( self.chunk_size_limit = chunk_size_limit self.chunk_overlap = chunk_overlap - # Initialize ChromaDB (uses built-in default embeddings). + # Initialize ChromaDB. # hnsw:space=cosine — cosine similarity gives well-scaled scores in # [0,1] for the hybrid retriever and behaves better than L2 on the # short factual snippets that dominate MEMORY.md. self.chroma_client = chromadb.PersistentClient(path=chroma_path) - self.collection = self.chroma_client.get_or_create_collection( + + # Build the embedding function. Default ChromaDB uses MiniLM-L6-v2 + # (weak — ~0.65 verbatim self-similarity). MEMORY_EMBEDDING_MODEL + # points to a stronger sentence-transformers model by default. + # Silent fallback to ChromaDB's bundled MiniLM if sentence-transformers + # isn't installed, so the system keeps working on minimal installs. + embedding_fn = self._build_embedding_function() + + self.collection = self._open_collection( name=self.COLLECTION_NAME, + embedding_fn=embedding_fn, metadata={ "description": "Agent file system memory chunks (v2)", "hnsw:space": "cosine", + "embedding_model": MEMORY_EMBEDDING_MODEL, }, ) # File index collection (tracks which files are indexed and their hashes) - self.file_index_collection = self.chroma_client.get_or_create_collection( + self.file_index_collection = self._open_collection( name=self.FILE_INDEX_COLLECTION, + embedding_fn=embedding_fn, metadata={"description": "File index for incremental updates (v2)"}, ) @@ -224,9 +255,87 @@ def __init__( self._bm25_dirty = True logger.info( - f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, ChromaDB: {chroma_path}" + f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, " + f"ChromaDB: {chroma_path}, embedding model: {MEMORY_EMBEDDING_MODEL}" ) + # ───────────────────────────── Embedding ───────────────────────────── + + def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]): + """Open a Chroma collection, auto-rebuilding on embedding mismatch. + + ChromaDB persists the embedding-function name in the collection config + and refuses get_or_create with a different one. That happens when the + collection was first created in a session where sentence-transformers + wasn't loadable (falling back to default) and is reopened in a session + where it is. The Chroma index is a derived cache — the source of truth + is the markdown files — so dropping and rebuilding is safe; the next + update() call will repopulate from disk. + """ + try: + return self.chroma_client.get_or_create_collection( + name=name, + embedding_function=embedding_fn, + metadata=metadata, + ) + except ValueError as e: + msg = str(e).lower() + if "embedding function" in msg and ("conflict" in msg or "already exists" in msg): + logger.warning( + f"[MEMORY] Embedding-function mismatch on '{name}' " + f"(persisted vs. current model). Dropping and rebuilding; " + f"the index will be re-populated from agent_file_system on " + f"the next update()." + ) + try: + self.chroma_client.delete_collection(name) + except Exception as del_err: + logger.error( + f"[MEMORY] Failed to delete stale collection '{name}': {del_err}" + ) + raise + return self.chroma_client.create_collection( + name=name, + embedding_function=embedding_fn, + metadata=metadata, + ) + raise + + @staticmethod + def _build_embedding_function(): + """Construct ChromaDB's embedding function. + + Honours the MEMORY_EMBEDDING_MODEL constant. Falls back to + ChromaDB's bundled default (ONNX all-MiniLM-L6-v2) silently when + sentence-transformers is missing or the model can't load — so + the agent never fails to start because of an embedding-model + installation issue. + """ + if MEMORY_EMBEDDING_MODEL == "default": + return None # ChromaDB applies its bundled default + try: + from chromadb.utils.embedding_functions import ( + SentenceTransformerEmbeddingFunction, + ) + return SentenceTransformerEmbeddingFunction( + model_name=MEMORY_EMBEDDING_MODEL + ) + except ImportError: + logger.warning( + "[MEMORY] sentence-transformers not installed — falling back " + "to ChromaDB's default MiniLM embeddings. Retrieval quality " + "will be poor. Install with: conda install -c conda-forge " + "sentence-transformers" + ) + return None + except Exception as e: + logger.warning( + f"[MEMORY] Failed to load embedding model " + f"'{MEMORY_EMBEDDING_MODEL}' ({e}); falling back to ChromaDB " + f"default." + ) + return None + # ───────────────────────────── Public API ───────────────────────────── def retrieve( @@ -276,7 +385,14 @@ def retrieve( if file_filter: where_filter = {"file_path": {"$in": file_filter}} - logger.info(f"[MEMORY QUERY] Query: {query}") + # Single-line query rendering so multi-line queries don't bleed into + # following log entries (used to make the log appear to mix queries + # with conversation history). Truncate long queries for log hygiene; + # full query is still passed to the retriever. + _q_one_line = " ".join(query.split()) + if len(_q_one_line) > 300: + _q_one_line = _q_one_line[:297] + "..." + logger.info(f"[MEMORY QUERY] {_q_one_line}") # ── Channel 1: vector similarity ── vector_hits: Dict[str, Dict[str, Any]] = {} @@ -386,10 +502,20 @@ def retrieve( pointers = pointers[:top_k] logger.info( - f"Retrieved {len(pointers)} memory pointers " - f"(vector={len(vector_hits)}, bm25={len(bm25_hits)}) " - f"for query: {query[:50]}..." + f"[MEMORY RESULT] {len(pointers)} pointer(s) returned " + f"(vector candidates={len(vector_hits)}, bm25 candidates={len(bm25_hits)}, " + f"min_relevance={min_relevance})" ) + if not pointers: + logger.info("[MEMORY RESULT] (no pointers above min_relevance)") + for i, p in enumerate(pointers, start=1): + summary_preview = " ".join((p.summary or "").split()) + if len(summary_preview) > 120: + summary_preview = summary_preview[:117] + "..." + logger.info( + f"[MEMORY RESULT] #{i} score={p.relevance_score:.3f} " + f"file={p.file_path} section={p.section_path} :: {summary_preview}" + ) return pointers # ───────────────────────── Hybrid retrieval helpers ───────────────────────── diff --git a/app/main.py b/app/main.py index 02455d5b..8ffd3633 100644 --- a/app/main.py +++ b/app/main.py @@ -8,6 +8,43 @@ Run this before the app directory, using 'python -m app.main' """ +# ============================================================================ +# CRITICAL: SSL bootstrap BEFORE any TLS-using import (aiohttp, openai, etc.) +# +# On Windows, a single malformed certificate in the OS cert store +# ("Trusted Root", "CA", etc.) breaks ssl.create_default_context() with +# "[ASN1: NOT_ENOUGH_DATA]" because the stdlib loads ALL Windows certs in +# one batch via load_verify_locations(cadata=...). One bad cert poisons the +# whole batch. +# +# Workaround: wrap SSLContext._load_windows_store_certs to swallow that +# specific SSLError. Lost Windows-CA-store certs are replaced by certifi's +# Mozilla bundle (set_default_verify_paths still runs), so server cert +# validation still works for PyPI / OpenAI / Anthropic / etc. +import sys as _sys +if _sys.platform == "win32": + import ssl as _ssl + _orig_load_win_certs = getattr( + _ssl.SSLContext, "_load_windows_store_certs", None + ) + if _orig_load_win_certs is not None: + def _safe_load_windows_store_certs(self, storename, purpose): + try: + return _orig_load_win_certs(self, storename, purpose) + except _ssl.SSLError: + # Malformed cert in store — skip silently. certifi still loads. + return None + _ssl.SSLContext._load_windows_store_certs = _safe_load_windows_store_certs + + # Also try truststore as an extra layer (uses Windows SChannel directly + # on modern versions); harmless if not installed. + try: + import truststore as _truststore + _truststore.inject_into_ssl() + except Exception: + pass +# ============================================================================ + # ============================================================================ # CRITICAL: Suppress console logging BEFORE imports # Must be done before any module calls logging.basicConfig() From 7161156ad63ff0030a4e43a6581000fd295a2524 Mon Sep 17 00:00:00 2001 From: CraftBot Date: Sat, 20 Jun 2026 21:20:40 +0900 Subject: [PATCH 3/3] refactor code and remove recency logic --- agent_core/core/impl/context/engine.py | 33 ++---- agent_core/core/impl/memory/manager.py | 143 ++++++++++--------------- 2 files changed, 70 insertions(+), 106 deletions(-) diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py index b61085e2..6db8b46a 100644 --- a/agent_core/core/impl/context/engine.py +++ b/agent_core/core/impl/context/engine.py @@ -603,35 +603,24 @@ def _build_memory_query( ) -> Optional[str]: """Build a semantic query for memory retrieval. - Uses ONLY the latest user message. Agent messages are excluded — they - often restate or drift to adjacent topics and were observed dominating - the embedding (e.g. a proactive-tasks explanation poisoning an MCP - question). If no user message is available (background task, planner, - heartbeat), falls back to the task instruction, then to the explicit - query argument. - - Args: - query: Optional explicit query string. - session_id: Optional session ID for session-specific state lookup. - - Returns: - A query string suitable for semantic memory search, or None if no context. + Priority: latest user message → task instruction → explicit query. + Agent messages are deliberately excluded — they often restate or + drift to adjacent topics and were observed dominating the embedding + (a long proactive-tasks reply poisoned a follow-up MCP question). """ latest_user_message = self._get_latest_user_message(session_id) if latest_user_message: return latest_user_message session = get_session_or_none(session_id) - if session and session.current_task: - task_instruction = session.current_task.instruction - else: - current_task = get_state().current_task - task_instruction = current_task.instruction if current_task else None - - if task_instruction: - return task_instruction + current_task = ( + session.current_task if session and session.current_task + else get_state().current_task + ) + if current_task and current_task.instruction: + return current_task.instruction - return query if query else None + return query or None def _get_latest_user_message(self, session_id: Optional[str]) -> str: """Return the most recent user message text, or empty string if none. diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py index a93c5a0d..388e98f0 100644 --- a/agent_core/core/impl/memory/manager.py +++ b/agent_core/core/impl/memory/manager.py @@ -16,11 +16,10 @@ from __future__ import annotations import hashlib -import math import re import uuid from dataclasses import dataclass, field -from datetime import datetime, timezone +from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -43,15 +42,33 @@ r"^\s*\[(\d{4}[-/]\d{2}[-/]\d{2}[ T]\d{2}:\d{2}:\d{2})\]\s+\[([\w\-]+)\]\s*:?\s*(.+?)\s*$" ) -# Hybrid-retrieval weights. Vector is primary signal, BM25 backstops proper -# nouns and dates, recency breaks ties on equally-relevant memories. +# Hybrid-retrieval weights. Vector is the primary signal, BM25 backstops +# proper nouns and dates. HYBRID_WEIGHTS = { - "vector": 0.55, - "bm25": 0.30, - "recency": 0.15, + "vector": 0.65, + "bm25": 0.35, } -# Days until recency contribution halves. exp(-30/30) ≈ 0.37. -RECENCY_HALF_LIFE_DAYS = 30.0 + +# Log-line preview limits. Keep multi-line queries and long summaries from +# bleeding across log entries. +_LOG_QUERY_MAX_CHARS = 300 +_LOG_SUMMARY_MAX_CHARS = 120 + + +def _log_preview(text: str, max_chars: int) -> str: + """Collapse whitespace and truncate text for safe logging.""" + flat = " ".join((text or "").split()) + if len(flat) <= max_chars: + return flat + return flat[: max_chars - 3] + "..." + + +def _is_embedding_function_conflict(err: Exception) -> bool: + """Detect ChromaDB's "embedding function mismatch" ValueError by message.""" + msg = str(err).lower() + return "embedding function" in msg and ( + "conflict" in msg or "already exists" in msg + ) # ───────────────────────── Embedding Model ───────────────────────── # ChromaDB's default is sentence-transformers/all-MiniLM-L6-v2 (22M params, @@ -265,12 +282,11 @@ def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]): """Open a Chroma collection, auto-rebuilding on embedding mismatch. ChromaDB persists the embedding-function name in the collection config - and refuses get_or_create with a different one. That happens when the - collection was first created in a session where sentence-transformers - wasn't loadable (falling back to default) and is reopened in a session - where it is. The Chroma index is a derived cache — the source of truth - is the markdown files — so dropping and rebuilding is safe; the next - update() call will repopulate from disk. + and refuses get_or_create with a different one — happens when the + collection was first created without sentence-transformers loadable + and is reopened later with a real model. The index is a derived cache + (source of truth is the markdown files), so dropping and rebuilding + is safe; update() repopulates from disk on next call. """ try: return self.chroma_client.get_or_create_collection( @@ -279,27 +295,19 @@ def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]): metadata=metadata, ) except ValueError as e: - msg = str(e).lower() - if "embedding function" in msg and ("conflict" in msg or "already exists" in msg): - logger.warning( - f"[MEMORY] Embedding-function mismatch on '{name}' " - f"(persisted vs. current model). Dropping and rebuilding; " - f"the index will be re-populated from agent_file_system on " - f"the next update()." - ) - try: - self.chroma_client.delete_collection(name) - except Exception as del_err: - logger.error( - f"[MEMORY] Failed to delete stale collection '{name}': {del_err}" - ) - raise - return self.chroma_client.create_collection( - name=name, - embedding_function=embedding_fn, - metadata=metadata, - ) - raise + if not _is_embedding_function_conflict(e): + raise + + logger.warning( + f"[MEMORY] Embedding-function mismatch on '{name}' — dropping and " + f"rebuilding; index will repopulate from agent_file_system on next update()." + ) + self.chroma_client.delete_collection(name) + return self.chroma_client.create_collection( + name=name, + embedding_function=embedding_fn, + metadata=metadata, + ) @staticmethod def _build_embedding_function(): @@ -348,18 +356,17 @@ def retrieve( """ Retrieve memory pointers relevant to the query. - Uses a hybrid score: vector cosine similarity + BM25 keyword match - + recency boost. Candidate pool is the union of top-K from each - channel (Reciprocal-Rank-Fusion style); final ranking is the - weighted sum defined by ``HYBRID_WEIGHTS``. + Uses a hybrid score: vector cosine similarity + BM25 keyword match. + Candidate pool is the union of top-K from each channel + (Reciprocal-Rank-Fusion style); final ranking is the weighted sum + defined by ``HYBRID_WEIGHTS``. Args: query: The search query top_k: Maximum number of results to return min_relevance: Minimum hybrid score (0-1) to include. - Default raised to 0.55 to match cosine-scaled scores; callers - that previously passed 0.0 still get sensible behaviour - because BM25 + recency lift relevant matches above the cut. + Default 0.55 matches cosine-scaled scores; BM25 lifts + keyword-strong matches above the cut. file_filter: Optional list of file paths to search within Returns: @@ -385,14 +392,9 @@ def retrieve( if file_filter: where_filter = {"file_path": {"$in": file_filter}} - # Single-line query rendering so multi-line queries don't bleed into - # following log entries (used to make the log appear to mix queries - # with conversation history). Truncate long queries for log hygiene; - # full query is still passed to the retriever. - _q_one_line = " ".join(query.split()) - if len(_q_one_line) > 300: - _q_one_line = _q_one_line[:297] + "..." - logger.info(f"[MEMORY QUERY] {_q_one_line}") + # Render single-line so multi-line queries don't bleed into the next + # log entry. Full query is still passed to the retriever. + logger.info(f"[MEMORY QUERY] {_log_preview(query, _LOG_QUERY_MAX_CHARS)}") # ── Channel 1: vector similarity ── vector_hits: Dict[str, Dict[str, Any]] = {} @@ -455,7 +457,6 @@ def retrieve( missing_ids = [cid for cid in candidate_ids if cid not in vector_hits] extra_meta = self._fetch_metadata(missing_ids) if missing_ids else {} - now = datetime.now(timezone.utc) pointers: List[MemoryPointer] = [] w = HYBRID_WEIGHTS @@ -470,13 +471,8 @@ def retrieve( vector_score = vector_hits.get(chunk_id, {}).get("score", 0.0) bm25_score = bm25_hits.get(chunk_id, {}).get("score", 0.0) - recency_score = _recency_score(meta.get("timestamp", ""), now) - final = ( - w["vector"] * vector_score - + w["bm25"] * bm25_score - + w["recency"] * recency_score - ) + final = w["vector"] * vector_score + w["bm25"] * bm25_score if final < min_relevance: continue @@ -509,12 +505,10 @@ def retrieve( if not pointers: logger.info("[MEMORY RESULT] (no pointers above min_relevance)") for i, p in enumerate(pointers, start=1): - summary_preview = " ".join((p.summary or "").split()) - if len(summary_preview) > 120: - summary_preview = summary_preview[:117] + "..." logger.info( f"[MEMORY RESULT] #{i} score={p.relevance_score:.3f} " - f"file={p.file_path} section={p.section_path} :: {summary_preview}" + f"file={p.file_path} section={p.section_path} " + f":: {_log_preview(p.summary, _LOG_SUMMARY_MAX_CHARS)}" ) return pointers @@ -773,8 +767,7 @@ def _chunk_memory_log( Per-chunk metadata carries timestamp, category, extracted_entities (list of capitalised tokens / quoted strings) and an indexed_at - stamp. ``age_days`` is NOT stored — it's computed at query time - from ``timestamp`` so a stale index doesn't lock in old recency. + stamp. Timestamp is stored for display / debugging only. """ chunks: List[MemoryChunk] = [] now = datetime.utcnow().isoformat() @@ -1361,8 +1354,8 @@ def _cosine_distance_to_similarity(distance: float) -> float: def _normalize_timestamp(ts: str) -> str: """Coerce '/' or 'T'-separated timestamps to canonical 'YYYY-MM-DD HH:MM:SS'. - Returns an empty string when parsing fails — callers treat that as - "unknown age" and the recency channel contributes 0 for the chunk. + Returns an empty string when parsing fails — stored as metadata only; + not currently used in ranking. """ if not ts: return "" @@ -1374,24 +1367,6 @@ def _normalize_timestamp(ts: str) -> str: return "" -def _recency_score(timestamp_iso: str, now: datetime) -> float: - """``exp(-age_days / RECENCY_HALF_LIFE_DAYS)`` — newer = closer to 1.0. - - Chunks without a parseable timestamp (e.g. AGENT.md sections) score 0 - so they neither help nor hurt the hybrid rank. - """ - if not timestamp_iso: - return 0.0 - try: - item_dt = datetime.strptime(timestamp_iso, "%Y-%m-%d %H:%M:%S") - except ValueError: - return 0.0 - if item_dt.tzinfo is None: - item_dt = item_dt.replace(tzinfo=timezone.utc) - age_days = max(0.0, (now - item_dt).total_seconds() / 86400.0) - return math.exp(-age_days / RECENCY_HALF_LIFE_DAYS) - - # ───────────────────────────── Testing / Demo ─────────────────────────────