From a4f4aa72e48cc157d5bbe04ba797a55e84255481 Mon Sep 17 00:00:00 2001
From: CraftBot <craftbot@craftbot.dev>
Date: Sat, 20 Jun 2026 16:09:16 +0900
Subject: [PATCH 1/3] fix relevance calculation and added bm25

---
 agent_core/core/impl/memory/bm25_index.py     | 113 +++++
 .../core/impl/memory/entity_extractor.py      |  85 ++++
 agent_core/core/impl/memory/manager.py        | 408 +++++++++++++++---
 requirements.txt                              |   1 +
 4 files changed, 554 insertions(+), 53 deletions(-)
 create mode 100644 agent_core/core/impl/memory/bm25_index.py
 create mode 100644 agent_core/core/impl/memory/entity_extractor.py

diff --git a/agent_core/core/impl/memory/bm25_index.py b/agent_core/core/impl/memory/bm25_index.py
new file mode 100644
index 00000000..1476cf44
--- /dev/null
+++ b/agent_core/core/impl/memory/bm25_index.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+"""
+In-memory BM25 keyword index for memory chunks.
+
+Sits alongside ChromaDB to backstop semantic search on terms vector embeddings
+struggle with (proper nouns, dates, IDs, code identifiers). The index is fully
+rebuilt from the current chunk set on every refresh — at ~200 memory items it
+costs <50ms and avoids the complexity of incremental BM25 updates.
+"""
+
+from __future__ import annotations
+
+import re
+import threading
+from typing import Dict, List, Optional, Tuple
+
+try:
+    from rank_bm25 import BM25Okapi
+    _HAS_BM25 = True
+except ImportError:
+    BM25Okapi = None
+    _HAS_BM25 = False
+
+from agent_core.utils.logger import logger
+
+
+_TOKEN_RE = re.compile(r"[A-Za-z0-9_]+")
+
+
+def tokenize(text: str) -> List[str]:
+    """Lowercase word/number tokenizer. Keeps identifiers intact."""
+    if not text:
+        return []
+    return [t.lower() for t in _TOKEN_RE.findall(text)]
+
+
+class BM25Index:
+    """Thread-safe BM25 index keyed by chunk_id.
+
+    On a fresh install or when ``rank_bm25`` is not installed, BM25 retrieval
+    silently degrades to an empty result set. The MemoryManager then falls back
+    to pure vector search, so retrieval keeps working — just without the
+    keyword channel.
+    """
+
+    def __init__(self) -> None:
+        self._lock = threading.Lock()
+        self._chunk_ids: List[str] = []
+        self._tokenized: List[List[str]] = []
+        self._bm25: Optional["BM25Okapi"] = None
+
+    def rebuild(self, chunks: Dict[str, str]) -> None:
+        """Rebuild the index from ``chunk_id -> raw text``.
+
+        Args:
+            chunks: Mapping of chunk_id to the searchable text body.
+        """
+        with self._lock:
+            self._chunk_ids = list(chunks.keys())
+            self._tokenized = [tokenize(chunks[cid]) for cid in self._chunk_ids]
+
+            if not _HAS_BM25:
+                self._bm25 = None
+                return
+
+            if not self._tokenized:
+                self._bm25 = None
+                return
+
+            # rank_bm25 raises on empty docs; replace with a single sentinel token
+            sanitized = [doc if doc else ["__empty__"] for doc in self._tokenized]
+            try:
+                self._bm25 = BM25Okapi(sanitized)
+            except Exception as e:
+                logger.warning(f"[BM25Index] Failed to build index: {e}")
+                self._bm25 = None
+
+    def search(self, query: str, top_k: int = 20) -> List[Tuple[str, float]]:
+        """Return ``[(chunk_id, score)]`` sorted high-to-low. Empty when index unavailable."""
+        if not query or not query.strip():
+            return []
+
+        with self._lock:
+            if self._bm25 is None or not self._chunk_ids:
+                return []
+
+            tokens = tokenize(query)
+            if not tokens:
+                return []
+
+            try:
+                scores = self._bm25.get_scores(tokens)
+            except Exception as e:
+                logger.warning(f"[BM25Index] Query failed: {e}")
+                return []
+
+            indexed = [
+                (self._chunk_ids[i], float(scores[i]))
+                for i in range(len(self._chunk_ids))
+                if scores[i] > 0
+            ]
+            indexed.sort(key=lambda x: x[1], reverse=True)
+            return indexed[:top_k]
+
+    @property
+    def size(self) -> int:
+        with self._lock:
+            return len(self._chunk_ids)
+
+    @property
+    def is_available(self) -> bool:
+        """True when rank_bm25 is installed AND the index has documents."""
+        return _HAS_BM25 and self.size > 0
diff --git a/agent_core/core/impl/memory/entity_extractor.py b/agent_core/core/impl/memory/entity_extractor.py
new file mode 100644
index 00000000..d15f9f1f
--- /dev/null
+++ b/agent_core/core/impl/memory/entity_extractor.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+"""
+Lightweight heuristic entity extractor for memory chunks.
+
+This is intentionally simple — Phase 1 just needs to surface proper-noun-like
+tokens so they end up in chunk metadata (and in the BM25 corpus). Higher-quality
+LLM-based NER is a future phase.
+
+The extractor pulls:
+- Capitalised multi-word sequences (proper nouns)
+- Tokens that look like identifiers (CamelCase, snake_case with caps)
+- Quoted strings
+
+Stopword filtering trims common English starters that get capitalised at
+sentence boundaries.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import List
+
+_STOP = {
+    "the", "a", "an", "and", "or", "but", "of", "in", "on", "at", "to", "for",
+    "with", "by", "from", "as", "is", "are", "was", "were", "be", "been", "being",
+    "have", "has", "had", "do", "does", "did", "will", "would", "should", "could",
+    "may", "might", "must", "can", "i", "you", "he", "she", "it", "we", "they",
+    "this", "that", "these", "those", "user", "agent", "task", "action", "event",
+    "memory", "system", "note", "today", "yesterday", "tomorrow", "monday",
+    "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday",
+    "january", "february", "march", "april", "may", "june", "july", "august",
+    "september", "october", "november", "december",
+}
+
+# Capitalised words (incl. CamelCase), optionally chained: "Trading View",
+# "OpenAI", "CraftBot", "John Doe"
+_PROPER_NOUN_RE = re.compile(
+    r"\b[A-Z][A-Za-z0-9]*(?:[ \-_][A-Z][A-Za-z0-9]*)*\b"
+)
+
+# Quoted strings (single or double)
+_QUOTED_RE = re.compile(r"\"([^\"]{2,40})\"|'([^']{2,40})'")
+
+
+def extract_entities(text: str, max_entities: int = 12) -> List[str]:
+    """Extract candidate entity strings from text.
+
+    Returns a deduplicated, order-preserving list. The cap exists so chunk
+    metadata stays compact (ChromaDB stores it for every chunk).
+    """
+    if not text:
+        return []
+
+    seen: set[str] = set()
+    out: List[str] = []
+
+    for match in _PROPER_NOUN_RE.finditer(text):
+        candidate = match.group(0).strip()
+        if not candidate:
+            continue
+        lowered = candidate.lower()
+        if lowered in _STOP:
+            continue
+        # Drop single-letter or pure-numeric tokens
+        if len(candidate) < 2:
+            continue
+        if candidate.isdigit():
+            continue
+        if lowered in seen:
+            continue
+        seen.add(lowered)
+        out.append(candidate)
+        if len(out) >= max_entities:
+            return out
+
+    for match in _QUOTED_RE.finditer(text):
+        candidate = (match.group(1) or match.group(2) or "").strip()
+        if not candidate or candidate.lower() in seen:
+            continue
+        seen.add(candidate.lower())
+        out.append(candidate)
+        if len(out) >= max_entities:
+            break
+
+    return out
diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py
index 0ae89563..7b41c1a8 100644
--- a/agent_core/core/impl/memory/manager.py
+++ b/agent_core/core/impl/memory/manager.py
@@ -16,16 +16,42 @@
 from __future__ import annotations
 
 import hashlib
+import math
 import re
 import uuid
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import chromadb
 
 from agent_core.utils.logger import logger
+from agent_core.core.impl.memory.bm25_index import BM25Index
+from agent_core.core.impl.memory.entity_extractor import extract_entities
+
+
+# Files that are flat lists of "[timestamp] [category] content" items.
+# These get per-item chunking so each fact has its own embedding, instead of
+# the whole list collapsing into a single section chunk under "## Memory".
+PER_ITEM_FILES = frozenset({"MEMORY.md", "EVENT_UNPROCESSED.md"})
+
+# Matches a memory item line. Tolerates both "/" and "-" date separators and
+# either "[YYYY-MM-DD HH:MM:SS]" (MEMORY.md) or "[YYYY/MM/DD HH:MM:SS]"
+# (EVENT_UNPROCESSED.md). Captures: timestamp, category, content.
+MEMORY_ITEM_LINE_RE = re.compile(
+    r"^\s*\[(\d{4}[-/]\d{2}[-/]\d{2}[ T]\d{2}:\d{2}:\d{2})\]\s+\[([\w\-]+)\]\s*:?\s*(.+?)\s*$"
+)
+
+# Hybrid-retrieval weights. Vector is primary signal, BM25 backstops proper
+# nouns and dates, recency breaks ties on equally-relevant memories.
+HYBRID_WEIGHTS = {
+    "vector": 0.55,
+    "bm25": 0.30,
+    "recency": 0.15,
+}
+# Days until recency contribution halves. exp(-30/30) ≈ 0.37.
+RECENCY_HALF_LIFE_DAYS = 30.0
 
 
 # ───────────────────────────── Data Classes ─────────────────────────────
@@ -140,8 +166,13 @@ class MemoryManager:
         manager.update()
     """
 
-    COLLECTION_NAME = "agent_memory"
-    FILE_INDEX_COLLECTION = "agent_memory_file_index"
+    # v2 collections use cosine distance and per-item chunking. The "_v2"
+    # suffix forces a clean rebuild on first run with the new code — old
+    # "agent_memory" collections are left intact but unused (so a downgrade
+    # is non-destructive). Drop the old collections manually if disk is
+    # tight; the manager never reads them.
+    COLLECTION_NAME = "agent_memory_v2"
+    FILE_INDEX_COLLECTION = "agent_memory_file_index_v2"
 
     def __init__(
         self,
@@ -164,23 +195,34 @@ def __init__(
         self.chunk_size_limit = chunk_size_limit
         self.chunk_overlap = chunk_overlap
 
-        # Initialize ChromaDB (uses built-in default embeddings)
+        # Initialize ChromaDB (uses built-in default embeddings).
+        # hnsw:space=cosine — cosine similarity gives well-scaled scores in
+        # [0,1] for the hybrid retriever and behaves better than L2 on the
+        # short factual snippets that dominate MEMORY.md.
         self.chroma_client = chromadb.PersistentClient(path=chroma_path)
         self.collection = self.chroma_client.get_or_create_collection(
             name=self.COLLECTION_NAME,
-            metadata={"description": "Agent file system memory chunks"},
+            metadata={
+                "description": "Agent file system memory chunks (v2)",
+                "hnsw:space": "cosine",
+            },
         )
 
         # File index collection (tracks which files are indexed and their hashes)
         self.file_index_collection = self.chroma_client.get_or_create_collection(
             name=self.FILE_INDEX_COLLECTION,
-            metadata={"description": "File index for incremental updates"},
+            metadata={"description": "File index for incremental updates (v2)"},
         )
 
         # In-memory cache of file indices
         self._file_index_cache: Dict[str, FileIndex] = {}
         self._load_file_index_cache()
 
+        # BM25 keyword index — mirrors ChromaDB's chunk set. Rebuilt lazily
+        # on first query and after each index mutation.
+        self._bm25 = BM25Index()
+        self._bm25_dirty = True
+
         logger.info(
             f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, ChromaDB: {chroma_path}"
         )
@@ -191,30 +233,34 @@ def retrieve(
         self,
         query: str,
         top_k: int = 5,
-        min_relevance: float = 0.0,
+        min_relevance: float = 0.55,
         file_filter: Optional[List[str]] = None,
     ) -> List[MemoryPointer]:
         """
         Retrieve memory pointers relevant to the query.
 
-        This is the primary retrieval method. It returns lightweight pointers
-        that tell the agent where to find relevant information, not the full
-        content. The agent can then decide which chunks to read in full.
+        Uses a hybrid score: vector cosine similarity + BM25 keyword match
+        + recency boost. Candidate pool is the union of top-K from each
+        channel (Reciprocal-Rank-Fusion style); final ranking is the
+        weighted sum defined by ``HYBRID_WEIGHTS``.
 
         Args:
             query: The search query
             top_k: Maximum number of results to return
-            min_relevance: Minimum relevance score (0-1) to include
+            min_relevance: Minimum hybrid score (0-1) to include.
+                Default raised to 0.55 to match cosine-scaled scores; callers
+                that previously passed 0.0 still get sensible behaviour
+                because BM25 + recency lift relevant matches above the cut.
             file_filter: Optional list of file paths to search within
 
         Returns:
-            List of MemoryPointer objects, sorted by relevance (highest first)
+            List of MemoryPointer objects, sorted by relevance (highest first).
+            Result shape is unchanged from v1 — only the ranking improves.
         """
         if not query or not query.strip():
             logger.warning("Empty query provided to retrieve()")
             return []
 
-        # Check if collection has any documents
         collection_count = self.collection.count()
         if collection_count == 0:
             logger.info(
@@ -222,68 +268,186 @@ def retrieve(
             )
             return []
 
-        # Build where filter if file_filter provided
+        # Cast a wider net than top_k so the hybrid re-rank has signal to work
+        # with. ChromaDB and BM25 each return up to candidate_pool items.
+        candidate_pool = max(top_k * 4, 20)
+
         where_filter = None
         if file_filter:
             where_filter = {"file_path": {"$in": file_filter}}
 
-        # Query ChromaDB
         logger.info(f"[MEMORY QUERY] Query: {query}")
+
+        # ── Channel 1: vector similarity ──
+        vector_hits: Dict[str, Dict[str, Any]] = {}
         try:
             results = self.collection.query(
                 query_texts=[query],
-                n_results=min(top_k, collection_count),
+                n_results=min(candidate_pool, collection_count),
                 where=where_filter,
                 include=["metadatas", "distances", "documents"],
             )
+            ids = (results.get("ids") or [[]])[0]
+            metadatas = (results.get("metadatas") or [[]])[0]
+            distances = (results.get("distances") or [[]])[0]
+            for i, chunk_id in enumerate(ids):
+                meta = metadatas[i] if i < len(metadatas) else {}
+                distance = distances[i] if i < len(distances) else 1.0
+                vector_hits[chunk_id] = {
+                    "score": _cosine_distance_to_similarity(distance),
+                    "metadata": meta,
+                    "rank": i,
+                }
         except Exception as e:
             logger.error(f"Error querying ChromaDB: {e}")
+            # Continue — BM25 alone may still return useful results.
+
+        # ── Channel 2: BM25 keyword search ──
+        self._ensure_bm25_built()
+        bm25_hits: Dict[str, Dict[str, Any]] = {}
+        bm25_raw = self._bm25.search(query, top_k=candidate_pool)
+        if bm25_raw:
+            max_bm25 = max(score for _, score in bm25_raw) or 1.0
+            for rank, (chunk_id, score) in enumerate(bm25_raw):
+                bm25_hits[chunk_id] = {
+                    "score": score / max_bm25,  # min-max normalise to [0,1]
+                    "rank": rank,
+                }
+
+        # Union the candidate ids from both channels (RRF-style fusion).
+        candidate_ids = set(vector_hits) | set(bm25_hits)
+        if not candidate_ids:
             return []
 
-        # Parse results into MemoryPointers
-        pointers: List[MemoryPointer] = []
+        # If file_filter was set, BM25 may have returned chunks outside the
+        # filter — drop them by reading metadata for the missing ones.
+        if file_filter:
+            need_meta = [cid for cid in candidate_ids if cid not in vector_hits]
+            if need_meta:
+                missing_meta = self._fetch_metadata(need_meta)
+                candidate_ids = {
+                    cid
+                    for cid in candidate_ids
+                    if (
+                        vector_hits.get(cid, {}).get("metadata", {}).get("file_path")
+                        or missing_meta.get(cid, {}).get("file_path", "")
+                    )
+                    in set(file_filter)
+                }
+
+        # Pull metadata for any BM25-only hits so we can build pointers + age.
+        missing_ids = [cid for cid in candidate_ids if cid not in vector_hits]
+        extra_meta = self._fetch_metadata(missing_ids) if missing_ids else {}
 
-        if not results or not results.get("ids") or not results["ids"][0]:
-            return pointers
+        now = datetime.now(timezone.utc)
+        pointers: List[MemoryPointer] = []
 
-        ids = results["ids"][0]
-        metadatas = results.get("metadatas", [[]])[0]
-        distances = results.get("distances", [[]])[0]
+        w = HYBRID_WEIGHTS
+        for chunk_id in candidate_ids:
+            meta = (
+                vector_hits[chunk_id]["metadata"]
+                if chunk_id in vector_hits
+                else extra_meta.get(chunk_id, {})
+            )
+            if not meta:
+                continue
 
-        for i, chunk_id in enumerate(ids):
-            meta = metadatas[i] if i < len(metadatas) else {}
+            vector_score = vector_hits.get(chunk_id, {}).get("score", 0.0)
+            bm25_score = bm25_hits.get(chunk_id, {}).get("score", 0.0)
+            recency_score = _recency_score(meta.get("timestamp", ""), now)
 
-            # Convert distance to relevance score (ChromaDB uses L2 distance by default)
-            # Lower distance = more relevant, so we invert it
-            distance = distances[i] if i < len(distances) else 1.0
-            relevance = 1.0 / (1.0 + distance)  # Normalize to 0-1 range
+            final = (
+                w["vector"] * vector_score
+                + w["bm25"] * bm25_score
+                + w["recency"] * recency_score
+            )
 
-            if relevance < min_relevance:
+            if final < min_relevance:
                 continue
 
-            pointer = MemoryPointer(
-                chunk_id=chunk_id,
-                file_path=meta.get("file_path", ""),
-                section_path=meta.get("section_path", ""),
-                title=meta.get("title", ""),
-                summary=meta.get("summary", ""),
-                relevance_score=relevance,
-                metadata={
-                    k: v
-                    for k, v in meta.items()
-                    if k not in ("file_path", "section_path", "title", "summary")
-                },
+            pointers.append(
+                MemoryPointer(
+                    chunk_id=chunk_id,
+                    file_path=meta.get("file_path", ""),
+                    section_path=meta.get("section_path", ""),
+                    title=meta.get("title", ""),
+                    summary=meta.get("summary", ""),
+                    relevance_score=final,
+                    metadata={
+                        k: v
+                        for k, v in meta.items()
+                        if k
+                        not in ("file_path", "section_path", "title", "summary")
+                    },
+                )
             )
-            pointers.append(pointer)
 
-        # Sort by relevance (highest first)
         pointers.sort(key=lambda p: p.relevance_score, reverse=True)
+        pointers = pointers[:top_k]
 
         logger.info(
-            f"Retrieved {len(pointers)} memory pointers for query: {query[:50]}..."
+            f"Retrieved {len(pointers)} memory pointers "
+            f"(vector={len(vector_hits)}, bm25={len(bm25_hits)}) "
+            f"for query: {query[:50]}..."
         )
         return pointers
 
+    # ───────────────────────── Hybrid retrieval helpers ─────────────────────────
+
+    def _ensure_bm25_built(self) -> None:
+        """Rebuild the BM25 index if it's been invalidated since last build."""
+        if not self._bm25_dirty:
+            return
+        try:
+            corpus = self._load_bm25_corpus()
+            self._bm25.rebuild(corpus)
+            self._bm25_dirty = False
+            logger.debug(f"[MEMORY] BM25 index rebuilt: {self._bm25.size} chunks")
+        except Exception as e:
+            logger.warning(f"[MEMORY] Failed to rebuild BM25 index: {e}")
+            # Leave the flag dirty so we retry on the next query.
+
+    def _load_bm25_corpus(self) -> Dict[str, str]:
+        """Pull every chunk's searchable text from ChromaDB.
+
+        We concatenate the document body, summary, and extracted_entities so
+        BM25 has the strongest possible keyword signal — especially proper
+        nouns that vector embeddings often miss.
+        """
+        try:
+            result = self.collection.get(
+                include=["documents", "metadatas"],
+            )
+        except Exception as e:
+            logger.warning(f"[MEMORY] BM25 corpus load failed: {e}")
+            return {}
+
+        ids = result.get("ids") or []
+        docs = result.get("documents") or []
+        metas = result.get("metadatas") or []
+
+        corpus: Dict[str, str] = {}
+        for i, chunk_id in enumerate(ids):
+            body = docs[i] if i < len(docs) else ""
+            meta = metas[i] if i < len(metas) else {}
+            summary = meta.get("summary", "")
+            entities = meta.get("extracted_entities", "")
+            corpus[chunk_id] = f"{body}\n{summary}\n{entities}"
+        return corpus
+
+    def _fetch_metadata(self, chunk_ids: List[str]) -> Dict[str, Dict[str, Any]]:
+        """Fetch metadata for a specific set of chunk ids."""
+        if not chunk_ids:
+            return {}
+        try:
+            result = self.collection.get(ids=chunk_ids, include=["metadatas"])
+            ids = result.get("ids") or []
+            metas = result.get("metadatas") or []
+            return {ids[i]: metas[i] for i in range(len(ids))}
+        except Exception as e:
+            logger.warning(f"[MEMORY] Metadata fetch failed: {e}")
+            return {}
+
     def retrieve_full_content(self, chunk_id: str) -> Optional[str]:
         """
         Retrieve the full content of a specific chunk by its ID.
@@ -445,12 +609,17 @@ def clear(self) -> None:
 
     def _chunk_markdown(self, content: str, file_path: str) -> List[MemoryChunk]:
         """
-        Split markdown content into semantic chunks based on headers.
+        Split markdown content into semantic chunks.
+
+        Dispatches based on file shape:
+        - Flat per-item logs (MEMORY.md, EVENT_UNPROCESSED.md) → one chunk
+          per "[ts] [cat] content" line via :meth:`_chunk_memory_log`.
+        - Everything else → header-based section chunking (original
+          behaviour, unchanged).
 
-        This uses a hierarchical approach:
-        1. Split by headers (##, ###, etc.)
-        2. Each section becomes a chunk with its header path
-        3. Large sections are further split with overlap
+        Per-item chunking is the Phase 1 fix for retrieval accuracy: in the
+        old section-based path, every memory item collapsed into a single
+        chunk under "## Memory" and the embedding represented the whole blob.
 
         Args:
             content: The markdown content to chunk
@@ -459,6 +628,78 @@ def _chunk_markdown(self, content: str, file_path: str) -> List[MemoryChunk]:
         Returns:
             List of MemoryChunk objects
         """
+        filename = Path(file_path).name
+        if filename in PER_ITEM_FILES:
+            return self._chunk_memory_log(content, file_path)
+        return self._chunk_by_sections(content, file_path)
+
+    def _chunk_memory_log(
+        self, content: str, file_path: str
+    ) -> List[MemoryChunk]:
+        """One chunk per ``[ts] [cat] content`` line.
+
+        Each line is short enough on its own (memory items are capped at
+        ~150 words by the memory-processor skill) that no further splitting
+        is needed. Lines that don't match the expected pattern — headers,
+        blank lines, the file's preamble — are skipped here; the file as a
+        whole is still in INDEX_TARGET_FILES so its preamble is captured
+        by the section chunker on other indexed files where appropriate.
+
+        Per-chunk metadata carries timestamp, category, extracted_entities
+        (list of capitalised tokens / quoted strings) and an indexed_at
+        stamp. ``age_days`` is NOT stored — it's computed at query time
+        from ``timestamp`` so a stale index doesn't lock in old recency.
+        """
+        chunks: List[MemoryChunk] = []
+        now = datetime.utcnow().isoformat()
+
+        for raw_line in content.splitlines():
+            line = raw_line.strip()
+            if not line or line.startswith("#") or line.startswith(">"):
+                continue
+            match = MEMORY_ITEM_LINE_RE.match(line)
+            if not match:
+                continue
+
+            timestamp_str, category, item_text = match.groups()
+            timestamp_iso = _normalize_timestamp(timestamp_str)
+            category = category.lower()
+
+            # Body = the item content. Summary = first ~150 chars cleaned.
+            entities = extract_entities(item_text)
+            summary = self._create_summary(item_text)
+
+            chunks.append(
+                MemoryChunk(
+                    chunk_id=str(uuid.uuid4()),
+                    file_path=file_path,
+                    section_path=f"item:{category}",
+                    title=category,
+                    content=line,  # keep the full bracketed line for grep parity
+                    summary=summary,
+                    content_hash=self._compute_content_hash(line),
+                    file_modified_at="",
+                    indexed_at=now,
+                    metadata={
+                        "timestamp": timestamp_iso,
+                        "category": category,
+                        # ChromaDB metadata values must be primitives; serialise
+                        # the entity list as a comma-joined string. The BM25
+                        # corpus and retrieval consumers parse it back.
+                        "extracted_entities": ", ".join(entities),
+                        "item_kind": "memory_log",
+                    },
+                )
+            )
+
+        return chunks
+
+    def _chunk_by_sections(
+        self, content: str, file_path: str
+    ) -> List[MemoryChunk]:
+        """Original header-based chunker. Preserves existing behaviour for
+        non-list markdown (AGENT.md, USER.md, PROACTIVE.md, ...).
+        """
         chunks: List[MemoryChunk] = []
 
         # Parse headers and their content
@@ -752,6 +993,8 @@ def _index_file(self, file_path: Path) -> int:
             logger.error(f"Error adding chunks to ChromaDB: {e}")
             return 0
 
+        self._bm25_dirty = True
+
         # Update file index cache
         file_index = FileIndex(
             file_path=rel_path,
@@ -787,6 +1030,7 @@ def _remove_file_from_index(self, file_path: str) -> None:
 
         # Remove from cache
         del self._file_index_cache[file_path]
+        self._bm25_dirty = True
 
         logger.debug(f"Removed {len(file_index.chunk_ids)} chunks for {file_path}")
 
@@ -805,14 +1049,18 @@ def _clear_index(self) -> None:
 
         self.collection = self.chroma_client.get_or_create_collection(
             name=self.COLLECTION_NAME,
-            metadata={"description": "Agent file system memory chunks"},
+            metadata={
+                "description": "Agent file system memory chunks (v2)",
+                "hnsw:space": "cosine",
+            },
         )
         self.file_index_collection = self.chroma_client.get_or_create_collection(
             name=self.FILE_INDEX_COLLECTION,
-            metadata={"description": "File index for incremental updates"},
+            metadata={"description": "File index for incremental updates (v2)"},
         )
 
         self._file_index_cache.clear()
+        self._bm25_dirty = True
 
     # ───────────────────────────── File Index Persistence ─────────────────────────────
 
@@ -964,6 +1212,60 @@ def create_memory_processing_task(
     )
 
 
+# ───────────────────── Hybrid Retrieval Scoring Helpers ─────────────────────
+
+
+def _cosine_distance_to_similarity(distance: float) -> float:
+    """Map ChromaDB's cosine distance to a [0,1] similarity score.
+
+    ChromaDB returns ``1 - cosine_similarity`` when the collection is
+    configured with ``hnsw:space=cosine``. Clamp to handle floating-point
+    drift and the L2 fallback case (where distances can exceed 1).
+    """
+    if distance is None:
+        return 0.0
+    sim = 1.0 - float(distance)
+    if sim < 0.0:
+        return 0.0
+    if sim > 1.0:
+        return 1.0
+    return sim
+
+
+def _normalize_timestamp(ts: str) -> str:
+    """Coerce '/' or 'T'-separated timestamps to canonical 'YYYY-MM-DD HH:MM:SS'.
+
+    Returns an empty string when parsing fails — callers treat that as
+    "unknown age" and the recency channel contributes 0 for the chunk.
+    """
+    if not ts:
+        return ""
+    cleaned = ts.replace("/", "-").replace("T", " ")
+    try:
+        dt = datetime.strptime(cleaned, "%Y-%m-%d %H:%M:%S")
+        return dt.strftime("%Y-%m-%d %H:%M:%S")
+    except ValueError:
+        return ""
+
+
+def _recency_score(timestamp_iso: str, now: datetime) -> float:
+    """``exp(-age_days / RECENCY_HALF_LIFE_DAYS)`` — newer = closer to 1.0.
+
+    Chunks without a parseable timestamp (e.g. AGENT.md sections) score 0
+    so they neither help nor hurt the hybrid rank.
+    """
+    if not timestamp_iso:
+        return 0.0
+    try:
+        item_dt = datetime.strptime(timestamp_iso, "%Y-%m-%d %H:%M:%S")
+    except ValueError:
+        return 0.0
+    if item_dt.tzinfo is None:
+        item_dt = item_dt.replace(tzinfo=timezone.utc)
+    age_days = max(0.0, (now - item_dt).total_seconds() / 86400.0)
+    return math.exp(-age_days / RECENCY_HALF_LIFE_DAYS)
+
+
 # ───────────────────────────── Testing / Demo ─────────────────────────────
 
 
diff --git a/requirements.txt b/requirements.txt
index 286fe9ca..ab68d367 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,3 +52,4 @@ pypdfium2
 pdfminer.six
 pymupdf
 pypdf
+rank_bm25

From 5d6c76a43607acb69b2cbe9bbb75f50f872c9b07 Mon Sep 17 00:00:00 2001
From: CraftBot <craftbot@craftbot.dev>
Date: Sat, 20 Jun 2026 20:14:07 +0900
Subject: [PATCH 2/3] improved memory system

---
 agent_core/core/impl/context/engine.py |  58 ++++------
 agent_core/core/impl/memory/manager.py | 142 +++++++++++++++++++++++--
 app/main.py                            |  37 +++++++
 3 files changed, 194 insertions(+), 43 deletions(-)

diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py
index 2beab3f2..b61085e2 100644
--- a/agent_core/core/impl/context/engine.py
+++ b/agent_core/core/impl/context/engine.py
@@ -603,8 +603,12 @@ def _build_memory_query(
     ) -> Optional[str]:
         """Build a semantic query for memory retrieval.
 
-        Combines task instruction with recent conversation messages (both user
-        and agent) to provide better context for memory search.
+        Uses ONLY the latest user message. Agent messages are excluded — they
+        often restate or drift to adjacent topics and were observed dominating
+        the embedding (e.g. a proactive-tasks explanation poisoning an MCP
+        question). If no user message is available (background task, planner,
+        heartbeat), falls back to the task instruction, then to the explicit
+        query argument.
 
         Args:
             query: Optional explicit query string.
@@ -613,7 +617,10 @@ def _build_memory_query(
         Returns:
             A query string suitable for semantic memory search, or None if no context.
         """
-        # Get task instruction as the base query
+        latest_user_message = self._get_latest_user_message(session_id)
+        if latest_user_message:
+            return latest_user_message
+
         session = get_session_or_none(session_id)
         if session and session.current_task:
             task_instruction = session.current_task.instruction
@@ -621,55 +628,36 @@ def _build_memory_query(
             current_task = get_state().current_task
             task_instruction = current_task.instruction if current_task else None
 
-        if not task_instruction:
-            # Fall back to explicit query if no task
-            return query if query else None
-
-        # Get recent conversation messages for additional context
-        recent_context = self._get_recent_conversation_for_memory(session_id, limit=5)
-
-        if recent_context:
-            return f"{task_instruction}\n\nRecent conversation:\n{recent_context}"
-        else:
+        if task_instruction:
             return task_instruction
 
-    def _get_recent_conversation_for_memory(
-        self, session_id: Optional[str], limit: int = 5
-    ) -> str:
-        """Get recent conversation messages for memory query context.
+        return query if query else None
 
-        Args:
-            session_id: Optional session ID for session-specific event stream.
-            limit: Maximum number of messages to include.
+    def _get_latest_user_message(self, session_id: Optional[str]) -> str:
+        """Return the most recent user message text, or empty string if none.
 
-        Returns:
-            Formatted string of recent user and agent messages.
+        Walks the conversation-history buffer from newest to oldest and returns
+        the first event whose kind contains 'user message'. Agent messages are
+        skipped entirely.
         """
         try:
             event_stream_manager = self.state_manager.event_stream_manager
             if not event_stream_manager:
                 return ""
 
-            # Get messages from conversation history (includes both user and agent)
             recent_messages = event_stream_manager.get_recent_conversation_messages(
-                limit
+                limit=20
             )
             if not recent_messages:
                 return ""
 
-            # Format messages simply for semantic search
-            lines = []
-            for event in recent_messages:
-                # Simplify the kind label for the query
-                if "user message" in event.kind:
-                    lines.append(f"User: {event.message}")
-                elif "agent message" in event.kind:
-                    lines.append(f"Agent: {event.message}")
-
-            return "\n".join(lines)
+            for event in reversed(recent_messages):
+                if "user message" in event.kind and event.message:
+                    return event.message.strip()
+            return ""
 
         except Exception as e:
-            logger.warning(f"[MEMORY] Failed to get recent conversation: {e}")
+            logger.warning(f"[MEMORY] Failed to get latest user message: {e}")
             return ""
 
     def get_memory_context(
diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py
index 7b41c1a8..a93c5a0d 100644
--- a/agent_core/core/impl/memory/manager.py
+++ b/agent_core/core/impl/memory/manager.py
@@ -53,6 +53,26 @@
 # Days until recency contribution halves. exp(-30/30) ≈ 0.37.
 RECENCY_HALF_LIFE_DAYS = 30.0
 
+# ───────────────────────── Embedding Model ─────────────────────────
+# ChromaDB's default is sentence-transformers/all-MiniLM-L6-v2 (22M params,
+# 2021). Verbatim self-similarity scores ~0.65; topical matches sit at
+# ~0.50; noise floor is ~0.45. That ~0.05 dynamic range can't support
+# accurate retrieval no matter how the downstream scoring is tuned.
+#
+# BGE-small-en-v1.5 (33M params, 384-dim, same dimensionality as MiniLM
+# so we don't break anything else) typically scores ~0.92 on verbatim
+# matches, ~0.75 on topical, and drops below 0.50 for unrelated content.
+# That's the dynamic range hybrid scoring actually needs.
+#
+# Override via the MEMORY_EMBEDDING_MODEL env var if you want to try
+# bge-base-en-v1.5 (better, slower), e5-small-v2, or any other
+# sentence-transformers model. Set to "default" to use ChromaDB's
+# bundled ONNX MiniLM.
+import os as _os
+MEMORY_EMBEDDING_MODEL = _os.environ.get(
+    "MEMORY_EMBEDDING_MODEL", "BAAI/bge-small-en-v1.5"
+)
+
 
 # ───────────────────────────── Data Classes ─────────────────────────────
 
@@ -195,22 +215,33 @@ def __init__(
         self.chunk_size_limit = chunk_size_limit
         self.chunk_overlap = chunk_overlap
 
-        # Initialize ChromaDB (uses built-in default embeddings).
+        # Initialize ChromaDB.
         # hnsw:space=cosine — cosine similarity gives well-scaled scores in
         # [0,1] for the hybrid retriever and behaves better than L2 on the
         # short factual snippets that dominate MEMORY.md.
         self.chroma_client = chromadb.PersistentClient(path=chroma_path)
-        self.collection = self.chroma_client.get_or_create_collection(
+
+        # Build the embedding function. Default ChromaDB uses MiniLM-L6-v2
+        # (weak — ~0.65 verbatim self-similarity). MEMORY_EMBEDDING_MODEL
+        # points to a stronger sentence-transformers model by default.
+        # Silent fallback to ChromaDB's bundled MiniLM if sentence-transformers
+        # isn't installed, so the system keeps working on minimal installs.
+        embedding_fn = self._build_embedding_function()
+
+        self.collection = self._open_collection(
             name=self.COLLECTION_NAME,
+            embedding_fn=embedding_fn,
             metadata={
                 "description": "Agent file system memory chunks (v2)",
                 "hnsw:space": "cosine",
+                "embedding_model": MEMORY_EMBEDDING_MODEL,
             },
         )
 
         # File index collection (tracks which files are indexed and their hashes)
-        self.file_index_collection = self.chroma_client.get_or_create_collection(
+        self.file_index_collection = self._open_collection(
             name=self.FILE_INDEX_COLLECTION,
+            embedding_fn=embedding_fn,
             metadata={"description": "File index for incremental updates (v2)"},
         )
 
@@ -224,9 +255,87 @@ def __init__(
         self._bm25_dirty = True
 
         logger.info(
-            f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, ChromaDB: {chroma_path}"
+            f"MemoryManager initialized. Agent FS: {self.agent_fs_path}, "
+            f"ChromaDB: {chroma_path}, embedding model: {MEMORY_EMBEDDING_MODEL}"
         )
 
+    # ───────────────────────────── Embedding ─────────────────────────────
+
+    def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]):
+        """Open a Chroma collection, auto-rebuilding on embedding mismatch.
+
+        ChromaDB persists the embedding-function name in the collection config
+        and refuses get_or_create with a different one. That happens when the
+        collection was first created in a session where sentence-transformers
+        wasn't loadable (falling back to default) and is reopened in a session
+        where it is. The Chroma index is a derived cache — the source of truth
+        is the markdown files — so dropping and rebuilding is safe; the next
+        update() call will repopulate from disk.
+        """
+        try:
+            return self.chroma_client.get_or_create_collection(
+                name=name,
+                embedding_function=embedding_fn,
+                metadata=metadata,
+            )
+        except ValueError as e:
+            msg = str(e).lower()
+            if "embedding function" in msg and ("conflict" in msg or "already exists" in msg):
+                logger.warning(
+                    f"[MEMORY] Embedding-function mismatch on '{name}' "
+                    f"(persisted vs. current model). Dropping and rebuilding; "
+                    f"the index will be re-populated from agent_file_system on "
+                    f"the next update()."
+                )
+                try:
+                    self.chroma_client.delete_collection(name)
+                except Exception as del_err:
+                    logger.error(
+                        f"[MEMORY] Failed to delete stale collection '{name}': {del_err}"
+                    )
+                    raise
+                return self.chroma_client.create_collection(
+                    name=name,
+                    embedding_function=embedding_fn,
+                    metadata=metadata,
+                )
+            raise
+
+    @staticmethod
+    def _build_embedding_function():
+        """Construct ChromaDB's embedding function.
+
+        Honours the MEMORY_EMBEDDING_MODEL constant. Falls back to
+        ChromaDB's bundled default (ONNX all-MiniLM-L6-v2) silently when
+        sentence-transformers is missing or the model can't load — so
+        the agent never fails to start because of an embedding-model
+        installation issue.
+        """
+        if MEMORY_EMBEDDING_MODEL == "default":
+            return None  # ChromaDB applies its bundled default
+        try:
+            from chromadb.utils.embedding_functions import (
+                SentenceTransformerEmbeddingFunction,
+            )
+            return SentenceTransformerEmbeddingFunction(
+                model_name=MEMORY_EMBEDDING_MODEL
+            )
+        except ImportError:
+            logger.warning(
+                "[MEMORY] sentence-transformers not installed — falling back "
+                "to ChromaDB's default MiniLM embeddings. Retrieval quality "
+                "will be poor. Install with: conda install -c conda-forge "
+                "sentence-transformers"
+            )
+            return None
+        except Exception as e:
+            logger.warning(
+                f"[MEMORY] Failed to load embedding model "
+                f"'{MEMORY_EMBEDDING_MODEL}' ({e}); falling back to ChromaDB "
+                f"default."
+            )
+            return None
+
     # ───────────────────────────── Public API ─────────────────────────────
 
     def retrieve(
@@ -276,7 +385,14 @@ def retrieve(
         if file_filter:
             where_filter = {"file_path": {"$in": file_filter}}
 
-        logger.info(f"[MEMORY QUERY] Query: {query}")
+        # Single-line query rendering so multi-line queries don't bleed into
+        # following log entries (used to make the log appear to mix queries
+        # with conversation history). Truncate long queries for log hygiene;
+        # full query is still passed to the retriever.
+        _q_one_line = " ".join(query.split())
+        if len(_q_one_line) > 300:
+            _q_one_line = _q_one_line[:297] + "..."
+        logger.info(f"[MEMORY QUERY] {_q_one_line}")
 
         # ── Channel 1: vector similarity ──
         vector_hits: Dict[str, Dict[str, Any]] = {}
@@ -386,10 +502,20 @@ def retrieve(
         pointers = pointers[:top_k]
 
         logger.info(
-            f"Retrieved {len(pointers)} memory pointers "
-            f"(vector={len(vector_hits)}, bm25={len(bm25_hits)}) "
-            f"for query: {query[:50]}..."
+            f"[MEMORY RESULT] {len(pointers)} pointer(s) returned "
+            f"(vector candidates={len(vector_hits)}, bm25 candidates={len(bm25_hits)}, "
+            f"min_relevance={min_relevance})"
         )
+        if not pointers:
+            logger.info("[MEMORY RESULT]   (no pointers above min_relevance)")
+        for i, p in enumerate(pointers, start=1):
+            summary_preview = " ".join((p.summary or "").split())
+            if len(summary_preview) > 120:
+                summary_preview = summary_preview[:117] + "..."
+            logger.info(
+                f"[MEMORY RESULT]   #{i} score={p.relevance_score:.3f} "
+                f"file={p.file_path} section={p.section_path} :: {summary_preview}"
+            )
         return pointers
 
     # ───────────────────────── Hybrid retrieval helpers ─────────────────────────
diff --git a/app/main.py b/app/main.py
index 02455d5b..8ffd3633 100644
--- a/app/main.py
+++ b/app/main.py
@@ -8,6 +8,43 @@
 Run this before the app directory, using 'python -m app.main'
 """
 
+# ============================================================================
+# CRITICAL: SSL bootstrap BEFORE any TLS-using import (aiohttp, openai, etc.)
+#
+# On Windows, a single malformed certificate in the OS cert store
+# ("Trusted Root", "CA", etc.) breaks ssl.create_default_context() with
+# "[ASN1: NOT_ENOUGH_DATA]" because the stdlib loads ALL Windows certs in
+# one batch via load_verify_locations(cadata=...). One bad cert poisons the
+# whole batch.
+#
+# Workaround: wrap SSLContext._load_windows_store_certs to swallow that
+# specific SSLError. Lost Windows-CA-store certs are replaced by certifi's
+# Mozilla bundle (set_default_verify_paths still runs), so server cert
+# validation still works for PyPI / OpenAI / Anthropic / etc.
+import sys as _sys
+if _sys.platform == "win32":
+    import ssl as _ssl
+    _orig_load_win_certs = getattr(
+        _ssl.SSLContext, "_load_windows_store_certs", None
+    )
+    if _orig_load_win_certs is not None:
+        def _safe_load_windows_store_certs(self, storename, purpose):
+            try:
+                return _orig_load_win_certs(self, storename, purpose)
+            except _ssl.SSLError:
+                # Malformed cert in store — skip silently. certifi still loads.
+                return None
+        _ssl.SSLContext._load_windows_store_certs = _safe_load_windows_store_certs
+
+    # Also try truststore as an extra layer (uses Windows SChannel directly
+    # on modern versions); harmless if not installed.
+    try:
+        import truststore as _truststore
+        _truststore.inject_into_ssl()
+    except Exception:
+        pass
+# ============================================================================
+
 # ============================================================================
 # CRITICAL: Suppress console logging BEFORE imports
 # Must be done before any module calls logging.basicConfig()

From 7161156ad63ff0030a4e43a6581000fd295a2524 Mon Sep 17 00:00:00 2001
From: CraftBot <craftbot@craftbot.dev>
Date: Sat, 20 Jun 2026 21:20:40 +0900
Subject: [PATCH 3/3] refactor code and remove recency logic

---
 agent_core/core/impl/context/engine.py |  33 ++----
 agent_core/core/impl/memory/manager.py | 143 ++++++++++---------------
 2 files changed, 70 insertions(+), 106 deletions(-)

diff --git a/agent_core/core/impl/context/engine.py b/agent_core/core/impl/context/engine.py
index b61085e2..6db8b46a 100644
--- a/agent_core/core/impl/context/engine.py
+++ b/agent_core/core/impl/context/engine.py
@@ -603,35 +603,24 @@ def _build_memory_query(
     ) -> Optional[str]:
         """Build a semantic query for memory retrieval.
 
-        Uses ONLY the latest user message. Agent messages are excluded — they
-        often restate or drift to adjacent topics and were observed dominating
-        the embedding (e.g. a proactive-tasks explanation poisoning an MCP
-        question). If no user message is available (background task, planner,
-        heartbeat), falls back to the task instruction, then to the explicit
-        query argument.
-
-        Args:
-            query: Optional explicit query string.
-            session_id: Optional session ID for session-specific state lookup.
-
-        Returns:
-            A query string suitable for semantic memory search, or None if no context.
+        Priority: latest user message → task instruction → explicit query.
+        Agent messages are deliberately excluded — they often restate or
+        drift to adjacent topics and were observed dominating the embedding
+        (a long proactive-tasks reply poisoned a follow-up MCP question).
         """
         latest_user_message = self._get_latest_user_message(session_id)
         if latest_user_message:
             return latest_user_message
 
         session = get_session_or_none(session_id)
-        if session and session.current_task:
-            task_instruction = session.current_task.instruction
-        else:
-            current_task = get_state().current_task
-            task_instruction = current_task.instruction if current_task else None
-
-        if task_instruction:
-            return task_instruction
+        current_task = (
+            session.current_task if session and session.current_task
+            else get_state().current_task
+        )
+        if current_task and current_task.instruction:
+            return current_task.instruction
 
-        return query if query else None
+        return query or None
 
     def _get_latest_user_message(self, session_id: Optional[str]) -> str:
         """Return the most recent user message text, or empty string if none.
diff --git a/agent_core/core/impl/memory/manager.py b/agent_core/core/impl/memory/manager.py
index a93c5a0d..388e98f0 100644
--- a/agent_core/core/impl/memory/manager.py
+++ b/agent_core/core/impl/memory/manager.py
@@ -16,11 +16,10 @@
 from __future__ import annotations
 
 import hashlib
-import math
 import re
 import uuid
 from dataclasses import dataclass, field
-from datetime import datetime, timezone
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -43,15 +42,33 @@
     r"^\s*\[(\d{4}[-/]\d{2}[-/]\d{2}[ T]\d{2}:\d{2}:\d{2})\]\s+\[([\w\-]+)\]\s*:?\s*(.+?)\s*$"
 )
 
-# Hybrid-retrieval weights. Vector is primary signal, BM25 backstops proper
-# nouns and dates, recency breaks ties on equally-relevant memories.
+# Hybrid-retrieval weights. Vector is the primary signal, BM25 backstops
+# proper nouns and dates.
 HYBRID_WEIGHTS = {
-    "vector": 0.55,
-    "bm25": 0.30,
-    "recency": 0.15,
+    "vector": 0.65,
+    "bm25": 0.35,
 }
-# Days until recency contribution halves. exp(-30/30) ≈ 0.37.
-RECENCY_HALF_LIFE_DAYS = 30.0
+
+# Log-line preview limits. Keep multi-line queries and long summaries from
+# bleeding across log entries.
+_LOG_QUERY_MAX_CHARS = 300
+_LOG_SUMMARY_MAX_CHARS = 120
+
+
+def _log_preview(text: str, max_chars: int) -> str:
+    """Collapse whitespace and truncate text for safe logging."""
+    flat = " ".join((text or "").split())
+    if len(flat) <= max_chars:
+        return flat
+    return flat[: max_chars - 3] + "..."
+
+
+def _is_embedding_function_conflict(err: Exception) -> bool:
+    """Detect ChromaDB's "embedding function mismatch" ValueError by message."""
+    msg = str(err).lower()
+    return "embedding function" in msg and (
+        "conflict" in msg or "already exists" in msg
+    )
 
 # ───────────────────────── Embedding Model ─────────────────────────
 # ChromaDB's default is sentence-transformers/all-MiniLM-L6-v2 (22M params,
@@ -265,12 +282,11 @@ def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]):
         """Open a Chroma collection, auto-rebuilding on embedding mismatch.
 
         ChromaDB persists the embedding-function name in the collection config
-        and refuses get_or_create with a different one. That happens when the
-        collection was first created in a session where sentence-transformers
-        wasn't loadable (falling back to default) and is reopened in a session
-        where it is. The Chroma index is a derived cache — the source of truth
-        is the markdown files — so dropping and rebuilding is safe; the next
-        update() call will repopulate from disk.
+        and refuses get_or_create with a different one — happens when the
+        collection was first created without sentence-transformers loadable
+        and is reopened later with a real model. The index is a derived cache
+        (source of truth is the markdown files), so dropping and rebuilding
+        is safe; update() repopulates from disk on next call.
         """
         try:
             return self.chroma_client.get_or_create_collection(
@@ -279,27 +295,19 @@ def _open_collection(self, name: str, embedding_fn, metadata: Dict[str, Any]):
                 metadata=metadata,
             )
         except ValueError as e:
-            msg = str(e).lower()
-            if "embedding function" in msg and ("conflict" in msg or "already exists" in msg):
-                logger.warning(
-                    f"[MEMORY] Embedding-function mismatch on '{name}' "
-                    f"(persisted vs. current model). Dropping and rebuilding; "
-                    f"the index will be re-populated from agent_file_system on "
-                    f"the next update()."
-                )
-                try:
-                    self.chroma_client.delete_collection(name)
-                except Exception as del_err:
-                    logger.error(
-                        f"[MEMORY] Failed to delete stale collection '{name}': {del_err}"
-                    )
-                    raise
-                return self.chroma_client.create_collection(
-                    name=name,
-                    embedding_function=embedding_fn,
-                    metadata=metadata,
-                )
-            raise
+            if not _is_embedding_function_conflict(e):
+                raise
+
+        logger.warning(
+            f"[MEMORY] Embedding-function mismatch on '{name}' — dropping and "
+            f"rebuilding; index will repopulate from agent_file_system on next update()."
+        )
+        self.chroma_client.delete_collection(name)
+        return self.chroma_client.create_collection(
+            name=name,
+            embedding_function=embedding_fn,
+            metadata=metadata,
+        )
 
     @staticmethod
     def _build_embedding_function():
@@ -348,18 +356,17 @@ def retrieve(
         """
         Retrieve memory pointers relevant to the query.
 
-        Uses a hybrid score: vector cosine similarity + BM25 keyword match
-        + recency boost. Candidate pool is the union of top-K from each
-        channel (Reciprocal-Rank-Fusion style); final ranking is the
-        weighted sum defined by ``HYBRID_WEIGHTS``.
+        Uses a hybrid score: vector cosine similarity + BM25 keyword match.
+        Candidate pool is the union of top-K from each channel
+        (Reciprocal-Rank-Fusion style); final ranking is the weighted sum
+        defined by ``HYBRID_WEIGHTS``.
 
         Args:
             query: The search query
             top_k: Maximum number of results to return
             min_relevance: Minimum hybrid score (0-1) to include.
-                Default raised to 0.55 to match cosine-scaled scores; callers
-                that previously passed 0.0 still get sensible behaviour
-                because BM25 + recency lift relevant matches above the cut.
+                Default 0.55 matches cosine-scaled scores; BM25 lifts
+                keyword-strong matches above the cut.
             file_filter: Optional list of file paths to search within
 
         Returns:
@@ -385,14 +392,9 @@ def retrieve(
         if file_filter:
             where_filter = {"file_path": {"$in": file_filter}}
 
-        # Single-line query rendering so multi-line queries don't bleed into
-        # following log entries (used to make the log appear to mix queries
-        # with conversation history). Truncate long queries for log hygiene;
-        # full query is still passed to the retriever.
-        _q_one_line = " ".join(query.split())
-        if len(_q_one_line) > 300:
-            _q_one_line = _q_one_line[:297] + "..."
-        logger.info(f"[MEMORY QUERY] {_q_one_line}")
+        # Render single-line so multi-line queries don't bleed into the next
+        # log entry. Full query is still passed to the retriever.
+        logger.info(f"[MEMORY QUERY] {_log_preview(query, _LOG_QUERY_MAX_CHARS)}")
 
         # ── Channel 1: vector similarity ──
         vector_hits: Dict[str, Dict[str, Any]] = {}
@@ -455,7 +457,6 @@ def retrieve(
         missing_ids = [cid for cid in candidate_ids if cid not in vector_hits]
         extra_meta = self._fetch_metadata(missing_ids) if missing_ids else {}
 
-        now = datetime.now(timezone.utc)
         pointers: List[MemoryPointer] = []
 
         w = HYBRID_WEIGHTS
@@ -470,13 +471,8 @@ def retrieve(
 
             vector_score = vector_hits.get(chunk_id, {}).get("score", 0.0)
             bm25_score = bm25_hits.get(chunk_id, {}).get("score", 0.0)
-            recency_score = _recency_score(meta.get("timestamp", ""), now)
 
-            final = (
-                w["vector"] * vector_score
-                + w["bm25"] * bm25_score
-                + w["recency"] * recency_score
-            )
+            final = w["vector"] * vector_score + w["bm25"] * bm25_score
 
             if final < min_relevance:
                 continue
@@ -509,12 +505,10 @@ def retrieve(
         if not pointers:
             logger.info("[MEMORY RESULT]   (no pointers above min_relevance)")
         for i, p in enumerate(pointers, start=1):
-            summary_preview = " ".join((p.summary or "").split())
-            if len(summary_preview) > 120:
-                summary_preview = summary_preview[:117] + "..."
             logger.info(
                 f"[MEMORY RESULT]   #{i} score={p.relevance_score:.3f} "
-                f"file={p.file_path} section={p.section_path} :: {summary_preview}"
+                f"file={p.file_path} section={p.section_path} "
+                f":: {_log_preview(p.summary, _LOG_SUMMARY_MAX_CHARS)}"
             )
         return pointers
 
@@ -773,8 +767,7 @@ def _chunk_memory_log(
 
         Per-chunk metadata carries timestamp, category, extracted_entities
         (list of capitalised tokens / quoted strings) and an indexed_at
-        stamp. ``age_days`` is NOT stored — it's computed at query time
-        from ``timestamp`` so a stale index doesn't lock in old recency.
+        stamp. Timestamp is stored for display / debugging only.
         """
         chunks: List[MemoryChunk] = []
         now = datetime.utcnow().isoformat()
@@ -1361,8 +1354,8 @@ def _cosine_distance_to_similarity(distance: float) -> float:
 def _normalize_timestamp(ts: str) -> str:
     """Coerce '/' or 'T'-separated timestamps to canonical 'YYYY-MM-DD HH:MM:SS'.
 
-    Returns an empty string when parsing fails — callers treat that as
-    "unknown age" and the recency channel contributes 0 for the chunk.
+    Returns an empty string when parsing fails — stored as metadata only;
+    not currently used in ranking.
     """
     if not ts:
         return ""
@@ -1374,24 +1367,6 @@ def _normalize_timestamp(ts: str) -> str:
         return ""
 
 
-def _recency_score(timestamp_iso: str, now: datetime) -> float:
-    """``exp(-age_days / RECENCY_HALF_LIFE_DAYS)`` — newer = closer to 1.0.
-
-    Chunks without a parseable timestamp (e.g. AGENT.md sections) score 0
-    so they neither help nor hurt the hybrid rank.
-    """
-    if not timestamp_iso:
-        return 0.0
-    try:
-        item_dt = datetime.strptime(timestamp_iso, "%Y-%m-%d %H:%M:%S")
-    except ValueError:
-        return 0.0
-    if item_dt.tzinfo is None:
-        item_dt = item_dt.replace(tzinfo=timezone.utc)
-    age_days = max(0.0, (now - item_dt).total_seconds() / 86400.0)
-    return math.exp(-age_days / RECENCY_HALF_LIFE_DAYS)
-
-
 # ───────────────────────────── Testing / Demo ─────────────────────────────