add lookback for sources in re-ranker
This commit is contained in:
11
chain.py
11
chain.py
@@ -110,10 +110,18 @@ class RagChain:
|
||||
for doc in retrieved_docs
|
||||
]
|
||||
|
||||
# Build a lookup map from page_content -> source using the original
|
||||
# retrieved docs, because VertexAIRank strips metadata (including source)
|
||||
# from the documents it returns.
|
||||
source_lookup: dict[str, str] = {
|
||||
doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs
|
||||
}
|
||||
|
||||
self._reranked_sources = [
|
||||
{
|
||||
"relevance_score": doc.metadata.get("relevance_score", ""),
|
||||
"page_content": f"{doc.page_content[:50]}...",
|
||||
"source": source_lookup.get(doc.page_content, ""),
|
||||
}
|
||||
for doc in reranked_docs
|
||||
]
|
||||
@@ -129,8 +137,9 @@ class RagChain:
|
||||
print("========== RERANKED DOCUMENTS ==========")
|
||||
for idx, doc in enumerate(reranked_docs, start=1):
|
||||
snippet = doc.page_content[:200].replace("\n", " ")
|
||||
source = source_lookup.get(doc.page_content, "")
|
||||
print(
|
||||
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
|
||||
f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..."
|
||||
)
|
||||
|
||||
return "\n\n".join(doc.page_content for doc in reranked_docs)
|
||||
|
||||
Reference in New Issue
Block a user