add lookback for sources in re-ranker
This commit is contained in:
11
chain.py
11
chain.py
@@ -110,10 +110,18 @@ class RagChain:
|
|||||||
for doc in retrieved_docs
|
for doc in retrieved_docs
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Build a lookup map from page_content -> source using the original
|
||||||
|
# retrieved docs, because VertexAIRank strips metadata (including source)
|
||||||
|
# from the documents it returns.
|
||||||
|
source_lookup: dict[str, str] = {
|
||||||
|
doc.page_content: doc.metadata.get("source", "") for doc in retrieved_docs
|
||||||
|
}
|
||||||
|
|
||||||
self._reranked_sources = [
|
self._reranked_sources = [
|
||||||
{
|
{
|
||||||
"relevance_score": doc.metadata.get("relevance_score", ""),
|
"relevance_score": doc.metadata.get("relevance_score", ""),
|
||||||
"page_content": f"{doc.page_content[:50]}...",
|
"page_content": f"{doc.page_content[:50]}...",
|
||||||
|
"source": source_lookup.get(doc.page_content, ""),
|
||||||
}
|
}
|
||||||
for doc in reranked_docs
|
for doc in reranked_docs
|
||||||
]
|
]
|
||||||
@@ -129,8 +137,9 @@ class RagChain:
|
|||||||
print("========== RERANKED DOCUMENTS ==========")
|
print("========== RERANKED DOCUMENTS ==========")
|
||||||
for idx, doc in enumerate(reranked_docs, start=1):
|
for idx, doc in enumerate(reranked_docs, start=1):
|
||||||
snippet = doc.page_content[:200].replace("\n", " ")
|
snippet = doc.page_content[:200].replace("\n", " ")
|
||||||
|
source = source_lookup.get(doc.page_content, "")
|
||||||
print(
|
print(
|
||||||
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
|
f"[{idx}] source={source} | relevance_score={doc.metadata.get('relevance_score', '')} | snippet=...{snippet}..."
|
||||||
)
|
)
|
||||||
|
|
||||||
return "\n\n".join(doc.page_content for doc in reranked_docs)
|
return "\n\n".join(doc.page_content for doc in reranked_docs)
|
||||||
|
|||||||
Reference in New Issue
Block a user