diff --git a/chain.py b/chain.py index f175878..e99da77 100644 --- a/chain.py +++ b/chain.py @@ -110,10 +110,18 @@ class RagChain: for doc in retrieved_docs ] + # Build a lookup map from page_content -> source using the original + # retrieved docs, because VertexAIRank strips metadata (including source) + # from the documents it returns. + source_lookup: dict[str, str] = { + doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs + } + self._reranked_sources = [ { "relevance_score": doc.metadata.get("relevance_score", ""), "page_content": f"{doc.page_content[:50]}...", + "source": source_lookup.get(doc.page_content, ""), } for doc in reranked_docs ] @@ -129,8 +137,9 @@ class RagChain: print("========== RERANKED DOCUMENTS ==========") for idx, doc in enumerate(reranked_docs, start=1): snippet = doc.page_content[:200].replace("\n", " ") + source = source_lookup.get(doc.page_content, "") print( - f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..." + f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..." ) return "\n\n".join(doc.page_content for doc in reranked_docs)