From d4e9643afcac76d50c4e0b07cbfb94bf944e7c34 Mon Sep 17 00:00:00 2001 From: Matteo Rosati Date: Wed, 18 Feb 2026 14:29:27 +0100 Subject: [PATCH] add lookback for sources in re-ranker --- chain.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/chain.py b/chain.py index f175878..e99da77 100644 --- a/chain.py +++ b/chain.py @@ -110,10 +110,18 @@ class RagChain: for doc in retrieved_docs ] + # Build a lookup map from page_content -> source using the original + # retrieved docs, because VertexAIRank strips metadata (including source) + # from the documents it returns. + source_lookup: dict[str, str] = { + doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs + } + self._reranked_sources = [ { "relevance_score": doc.metadata.get("relevance_score", ""), "page_content": f"{doc.page_content[:50]}...", + "source": source_lookup.get(doc.page_content, ""), } for doc in reranked_docs ] @@ -129,8 +137,9 @@ class RagChain: print("========== RERANKED DOCUMENTS ==========") for idx, doc in enumerate(reranked_docs, start=1): snippet = doc.page_content[:200].replace("\n", " ") + source = source_lookup.get(doc.page_content, "") print( - f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..." + f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..." ) return "\n\n".join(doc.page_content for doc in reranked_docs)