start frontend

This commit is contained in:
Matteo Rosati
2026-02-18 13:27:39 +01:00
parent 6e8c8ceb38
commit 3e6fefabbd
9 changed files with 1085 additions and 9 deletions

121
chain.py Normal file
View File

@@ -0,0 +1,121 @@
import asyncio
from dotenv import load_dotenv
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_google_community import VertexAISearchRetriever
from langchain_google_community.vertex_rank import VertexAIRank
from langchain_google_genai import ChatGoogleGenerativeAI
load_dotenv()
PROJECT = "akqa-ita-ai-poc1"
DATA_STORE = "akern-ds_1771234036654"
MODEL = "gemini-2.5-flash"
LOCATION = "eu"
PRINT_SOURCES = False
# LLM CONFIG
TOP_K = 40
TOP_P = 1
TEMPERATURE = 0.0
MAX_OUTPUT_TOKENS = 65535
RETRIEVER_MAX_DOCS = 50
RERANKER_MAX_RESULTS = 25
with open("prompt.md") as f:
question_template = f.read()
with open("question_rewrite_prompt.md") as f:
question_rewrite_template = f.read()
question_prompt = ChatPromptTemplate.from_template(question_template)
question_rewrite_prompt = ChatPromptTemplate.from_template(question_rewrite_template)
def format_docs(question: str) -> str:
retrieved_docs = base_retriever.invoke(question)
reranked_docs = compression_retriever.invoke(question)
if PRINT_SOURCES:
print("========== RETRIEVER DOCUMENTS ==========")
for idx, doc in enumerate(retrieved_docs, start=1):
snippet = doc.page_content[:200].replace("\n", " ")
print(
f"[{idx}] metadata={doc.metadata['source']} | snippet=...{snippet}..."
)
print("========== RERANKED DOCUMENTS ==========")
for idx, doc in enumerate(reranked_docs, start=1):
snippet = doc.page_content[:200].replace("\n", " ")
print(
f"[{idx}] metadata={doc.metadata['relevance_score']} | snippet=...{snippet}..."
)
return "\n\n".join(doc.page_content for doc in reranked_docs)
def log_rewritten_question(rewritten_question: str) -> str:
print("=== REWRITTEN QUESTION ===")
print(rewritten_question)
return rewritten_question
llm = ChatGoogleGenerativeAI(
model=MODEL,
project=PROJECT,
vertexai=True,
top_p=TOP_P,
top_k=TOP_K,
temperature=TEMPERATURE,
max_output_tokens=MAX_OUTPUT_TOKENS,
)
base_retriever = VertexAISearchRetriever(
project_id=PROJECT,
data_store_id=DATA_STORE,
max_documents=RETRIEVER_MAX_DOCS,
location_id=LOCATION,
beta=True,
)
reranker = VertexAIRank(
project_id=PROJECT,
location_id="eu",
ranking_config="default_ranking_config",
top_n=RERANKER_MAX_RESULTS,
)
compression_retriever = ContextualCompressionRetriever(
base_compressor=reranker, base_retriever=base_retriever
)
question_rewrite_chain = (
{"question": RunnablePassthrough()}
| question_rewrite_prompt
| llm
| StrOutputParser()
| RunnableLambda(log_rewritten_question)
)
rag_chain = (
{"context": RunnableLambda(format_docs), "question": RunnablePassthrough()}
| question_prompt
| llm
| StrOutputParser()
)
full_chain = question_rewrite_chain | rag_chain
async def main():
response = await full_chain.ainvoke(
"Buongiorno, non so se è la mail specifica ma volevo se possibile dei chiarimenti per linterpretazione dei parametri BCM /SMM/ASMM. Mi capita a volte di trovare casi in cui la BCM è aumentata ma allo stesso tempo SMM/ASMM hanno subito una piccola flessione in negativo (o viceversa). Se la parte metabolicamente attiva aumenta perchè può succedere che gli altri compartimenti si riducono?? E allo stesso tempo phA e BCM possono essere inversamente proporzionali?? So che il phA correla con massa e struttura + idratazione."
)
print(response)
if __name__ == "__main__":
asyncio.run(main())