add lookback for sources in re-ranker

This commit is contained in:
Matteo Rosati
2026-02-18 14:29:27 +01:00
parent e1afb6e6c7
commit d4e9643afc

View File

@@ -110,10 +110,18 @@ class RagChain:
for doc in retrieved_docs for doc in retrieved_docs
] ]
# Build a lookup map from page_content -> source using the original
# retrieved docs, because VertexAIRank strips metadata (including source)
# from the documents it returns.
source_lookup: dict[str, str] = {
doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs
}
self._reranked_sources = [ self._reranked_sources = [
{ {
"relevance_score": doc.metadata.get("relevance_score", ""), "relevance_score": doc.metadata.get("relevance_score", ""),
"page_content": f"{doc.page_content[:50]}...", "page_content": f"{doc.page_content[:50]}...",
"source": source_lookup.get(doc.page_content, ""),
} }
for doc in reranked_docs for doc in reranked_docs
] ]
@@ -129,8 +137,9 @@ class RagChain:
print("========== RERANKED DOCUMENTS ==========") print("========== RERANKED DOCUMENTS ==========")
for idx, doc in enumerate(reranked_docs, start=1): for idx, doc in enumerate(reranked_docs, start=1):
snippet = doc.page_content[:200].replace("\n", " ") snippet = doc.page_content[:200].replace("\n", " ")
source = source_lookup.get(doc.page_content, "")
print( print(
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..." f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..."
) )
return "\n\n".join(doc.page_content for doc in reranked_docs) return "\n\n".join(doc.page_content for doc in reranked_docs)