add lookback for sources in re-ranker

This commit is contained in:
Matteo Rosati
2026-02-18 14:29:27 +01:00
parent e1afb6e6c7
commit d4e9643afc

View File

@@ -110,10 +110,18 @@ class RagChain:
for doc in retrieved_docs
]
# Build a lookup map from page_content -> source using the original
# retrieved docs, because VertexAIRank strips metadata (including source)
# from the documents it returns.
source_lookup: dict[str, str] = {
doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs
}
self._reranked_sources = [
{
"relevance_score": doc.metadata.get("relevance_score", ""),
"page_content": f"{doc.page_content[:50]}...",
"source": source_lookup.get(doc.page_content, ""),
}
for doc in reranked_docs
]
@@ -129,8 +137,9 @@ class RagChain:
print("========== RERANKED DOCUMENTS ==========")
for idx, doc in enumerate(reranked_docs, start=1):
snippet = doc.page_content[:200].replace("\n", " ")
source = source_lookup.get(doc.page_content, "")
print(
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..."
)
return "\n\n".join(doc.page_content for doc in reranked_docs)