message streaming
This commit is contained in:
194
chain.py
194
chain.py
@@ -1,5 +1,3 @@
|
||||
import asyncio
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from langchain_classic.retrievers import ContextualCompressionRetriever
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
@@ -16,106 +14,132 @@ DATA_STORE = "akern-ds_1771234036654"
|
||||
MODEL = "gemini-2.5-flash"
|
||||
LOCATION = "eu"
|
||||
PRINT_SOURCES = False
|
||||
|
||||
# LLM CONFIG
|
||||
TOP_K = 40
|
||||
TOP_P = 1
|
||||
TEMPERATURE = 0.0
|
||||
MAX_OUTPUT_TOKENS = 65535
|
||||
RETRIEVER_MAX_DOCS = 50
|
||||
RERANKER_MAX_RESULTS = 25
|
||||
|
||||
with open("prompt.md") as f:
|
||||
question_template = f.read()
|
||||
|
||||
with open("question_rewrite_prompt.md") as f:
|
||||
question_rewrite_template = f.read()
|
||||
|
||||
question_prompt = ChatPromptTemplate.from_template(question_template)
|
||||
question_rewrite_prompt = ChatPromptTemplate.from_template(question_rewrite_template)
|
||||
|
||||
|
||||
def format_docs(question: str) -> str:
|
||||
retrieved_docs = base_retriever.invoke(question)
|
||||
reranked_docs = compression_retriever.invoke(question)
|
||||
class RagChain:
|
||||
def __init__(
|
||||
self,
|
||||
top_k: int,
|
||||
top_p: float,
|
||||
temperature: float,
|
||||
retriever_max_docs: int,
|
||||
reranker_max_results: int,
|
||||
) -> None:
|
||||
self.top_k = top_k
|
||||
self.top_p = top_p
|
||||
self.temperature = temperature
|
||||
self.retriever_max_docs = retriever_max_docs
|
||||
self.reranker_max_results = reranker_max_results
|
||||
|
||||
if PRINT_SOURCES:
|
||||
print("========== RETRIEVER DOCUMENTS ==========")
|
||||
for idx, doc in enumerate(retrieved_docs, start=1):
|
||||
snippet = doc.page_content[:200].replace("\n", " ")
|
||||
print(
|
||||
f"[{idx}] metadata={doc.metadata['source']} | snippet=...{snippet}..."
|
||||
)
|
||||
with open("prompt.md") as f:
|
||||
question_template = f.read()
|
||||
|
||||
print("========== RERANKED DOCUMENTS ==========")
|
||||
for idx, doc in enumerate(reranked_docs, start=1):
|
||||
snippet = doc.page_content[:200].replace("\n", " ")
|
||||
print(
|
||||
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
|
||||
)
|
||||
with open("question_rewrite_prompt.md") as f:
|
||||
question_rewrite_template = f.read()
|
||||
|
||||
return "\n\n".join(doc.page_content for doc in reranked_docs)
|
||||
question_prompt = ChatPromptTemplate.from_template(question_template)
|
||||
question_rewrite_prompt = ChatPromptTemplate.from_template(
|
||||
question_rewrite_template
|
||||
)
|
||||
|
||||
self._retriever_sources: list[dict] = []
|
||||
self._reranked_sources: list[dict] = []
|
||||
|
||||
def log_rewritten_question(rewritten_question: str) -> str:
|
||||
print("=== REWRITTEN QUESTION ===")
|
||||
print(rewritten_question)
|
||||
return rewritten_question
|
||||
self._llm = ChatGoogleGenerativeAI(
|
||||
model=MODEL,
|
||||
project=PROJECT,
|
||||
vertexai=True,
|
||||
top_p=self.top_p,
|
||||
top_k=self.top_k,
|
||||
temperature=self.temperature,
|
||||
max_output_tokens=MAX_OUTPUT_TOKENS,
|
||||
)
|
||||
|
||||
self._base_retriever = VertexAISearchRetriever(
|
||||
project_id=PROJECT,
|
||||
data_store_id=DATA_STORE,
|
||||
max_documents=self.retriever_max_docs,
|
||||
location_id=LOCATION,
|
||||
beta=True,
|
||||
)
|
||||
|
||||
llm = ChatGoogleGenerativeAI(
|
||||
model=MODEL,
|
||||
project=PROJECT,
|
||||
vertexai=True,
|
||||
top_p=TOP_P,
|
||||
top_k=TOP_K,
|
||||
temperature=TEMPERATURE,
|
||||
max_output_tokens=MAX_OUTPUT_TOKENS,
|
||||
)
|
||||
self._reranker = VertexAIRank(
|
||||
project_id=PROJECT,
|
||||
location_id=LOCATION,
|
||||
ranking_config="default_ranking_config",
|
||||
top_n=self.reranker_max_results,
|
||||
)
|
||||
|
||||
base_retriever = VertexAISearchRetriever(
|
||||
project_id=PROJECT,
|
||||
data_store_id=DATA_STORE,
|
||||
max_documents=RETRIEVER_MAX_DOCS,
|
||||
location_id=LOCATION,
|
||||
beta=True,
|
||||
)
|
||||
self._compression_retriever = ContextualCompressionRetriever(
|
||||
base_compressor=self._reranker, base_retriever=self._base_retriever
|
||||
)
|
||||
|
||||
reranker = VertexAIRank(
|
||||
project_id=PROJECT,
|
||||
location_id="eu",
|
||||
ranking_config="default_ranking_config",
|
||||
top_n=RERANKER_MAX_RESULTS,
|
||||
)
|
||||
question_rewrite_chain = (
|
||||
{"question": RunnablePassthrough()}
|
||||
| question_rewrite_prompt
|
||||
| self._llm
|
||||
| StrOutputParser()
|
||||
| RunnableLambda(self._log_rewritten_question)
|
||||
)
|
||||
|
||||
compression_retriever = ContextualCompressionRetriever(
|
||||
base_compressor=reranker, base_retriever=base_retriever
|
||||
)
|
||||
rag_chain = (
|
||||
{
|
||||
"context": RunnableLambda(self._format_docs),
|
||||
"question": RunnablePassthrough(),
|
||||
}
|
||||
| question_prompt
|
||||
| self._llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
question_rewrite_chain = (
|
||||
{"question": RunnablePassthrough()}
|
||||
| question_rewrite_prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
| RunnableLambda(log_rewritten_question)
|
||||
)
|
||||
self._full_chain = question_rewrite_chain | rag_chain
|
||||
|
||||
rag_chain = (
|
||||
{"context": RunnableLambda(format_docs), "question": RunnablePassthrough()}
|
||||
| question_prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
def _log_rewritten_question(self, rewritten_question: str) -> str:
|
||||
return rewritten_question
|
||||
|
||||
full_chain = question_rewrite_chain | rag_chain
|
||||
def _format_docs(self, question: str) -> str:
|
||||
retrieved_docs = self._base_retriever.invoke(question)
|
||||
reranked_docs = self._compression_retriever.invoke(question)
|
||||
|
||||
self._retriever_sources = [
|
||||
{
|
||||
"page_content": f"{doc.page_content[:50]}...",
|
||||
"source": doc.metadata.get("source", ""),
|
||||
}
|
||||
for doc in retrieved_docs
|
||||
]
|
||||
|
||||
async def main():
|
||||
response = await full_chain.ainvoke(
|
||||
"Buongiorno, non so se è la mail specifica ma volevo se possibile dei chiarimenti per l’interpretazione dei parametri BCM /SMM/ASMM. Mi capita a volte di trovare casi in cui la BCM è aumentata ma allo stesso tempo SMM/ASMM hanno subito una piccola flessione in negativo (o viceversa). Se la parte metabolicamente attiva aumenta perchè può succedere che gli altri compartimenti si riducono?? E allo stesso tempo phA e BCM possono essere inversamente proporzionali?? So che il phA correla con massa e struttura + idratazione."
|
||||
)
|
||||
print(response)
|
||||
self._reranked_sources = [
|
||||
{
|
||||
"relevance_score": doc.metadata.get("relevance_score", ""),
|
||||
"page_content": f"{doc.page_content[:50]}...",
|
||||
}
|
||||
for doc in reranked_docs
|
||||
]
|
||||
|
||||
if PRINT_SOURCES:
|
||||
print("========== RETRIEVER DOCUMENTS ==========")
|
||||
for idx, doc in enumerate(retrieved_docs, start=1):
|
||||
snippet = doc.page_content[:200].replace("\n", " ")
|
||||
print(
|
||||
f"[{idx}] metadata={doc.metadata['source']} | snippet=...{snippet}..."
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
print("========== RERANKED DOCUMENTS ==========")
|
||||
for idx, doc in enumerate(reranked_docs, start=1):
|
||||
snippet = doc.page_content[:200].replace("\n", " ")
|
||||
print(
|
||||
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
|
||||
)
|
||||
|
||||
return "\n\n".join(doc.page_content for doc in reranked_docs)
|
||||
|
||||
def getSources(self) -> list[dict]:
|
||||
return list(self._retriever_sources)
|
||||
|
||||
def getRankedSources(self) -> list[dict]:
|
||||
return list(self._reranked_sources)
|
||||
|
||||
def stream(self, message: str):
|
||||
return self._full_chain.astream(message)
|
||||
|
||||
Reference in New Issue
Block a user