diff --git a/Dockerfile b/Dockerfile index 885e388..f441db7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,6 +51,7 @@ WORKDIR /app # Copy application files COPY --chown=appuser:appuser app.py . +COPY --chown=appuser:appuser llm_config.py . COPY --chown=appuser:appuser lib.py . COPY --chown=appuser:appuser credentials.json . COPY --chown=appuser:appuser static ./static diff --git a/lib.py b/lib.py index bb807cd..addf9c3 100644 --- a/lib.py +++ b/lib.py @@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod with Vertex AI RAG (Retrieval-Augmented Generation) support. """ +import asyncio +import threading from google import genai from google.genai import types from dotenv import load_dotenv +from llm_config import generate_content_config + # Load environment variables from .env file load_dotenv() -# Vertex AI RAG Corpus resource path -CORPUS: str = ( - "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952" -) # Gemini model name GEMINI_MODEL: str = "gemini-3-pro-preview" @@ -35,53 +35,51 @@ async def generate(prompt: str): Yields: str: Text chunks from the generated response. """ - client = genai.Client(vertexai=True) + # Create a queue for streaming chunks + chunk_queue: asyncio.Queue[str] = asyncio.Queue() + loop = asyncio.get_event_loop() - contents = [ - types.Content(role="user", parts=[types.Part.from_text(text=prompt)]), - ] - tools = [ - types.Tool( - retrieval=types.Retrieval( - vertex_rag_store=types.VertexRagStore( - rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)], - ) + def run_streaming(): + """Run the synchronous streaming in a separate thread.""" + try: + client = genai.Client(vertexai=True) + + contents = [ + types.Content(role="user", parts=[types.Part.from_text(text=prompt)]), + ] + + for chunk in client.models.generate_content_stream( + model=GEMINI_MODEL, + contents=contents, + config=generate_content_config, + ): + if ( + chunk.candidates + and chunk.candidates[0].content + and chunk.candidates[0].content.parts + ): + # Schedule the put operation in the event loop + future = asyncio.run_coroutine_threadsafe( + chunk_queue.put(chunk.text), + loop, + ) + # Wait for the put to complete (quick operation) + future.result(timeout=1) + except Exception as e: + print(f"[ERROR] Streaming error: {e}") + finally: + asyncio.run_coroutine_threadsafe( + chunk_queue.put("<>"), + loop, ) - ) - ] - generate_content_config = types.GenerateContentConfig( - temperature=1, - top_p=0.95, - max_output_tokens=65535, - safety_settings=[ - types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"), - types.SafetySetting( - category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF" - ), - types.SafetySetting( - category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF" - ), - types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"), - ], - tools=tools, - thinking_config=types.ThinkingConfig( - thinking_level="HIGH", - ), - ) + # Start the streaming in a daemon thread + stream_thread = threading.Thread(target=run_streaming, daemon=True) + stream_thread.start() - for chunk in client.models.generate_content_stream( - model=GEMINI_MODEL, - contents=contents, - config=generate_content_config, - ): - # DEBUG: Log chunk type to confirm generator behavior - print(f"[DEBUG] Chunk type: {type(chunk)}") - if ( - not chunk.candidates - or not chunk.candidates[0].content - or not chunk.candidates[0].content.parts - ): - continue - - yield chunk.text + # Yield chunks as they become available + while True: + chunk = await chunk_queue.get() + if chunk == "<>": + break + yield chunk diff --git a/llm_config.py b/llm_config.py new file mode 100644 index 0000000..e11a33b --- /dev/null +++ b/llm_config.py @@ -0,0 +1,36 @@ +from google.genai import types + +# Vertex AI RAG Corpus resource path +CORPUS: str = ( + "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952" +) + +tools = [ + types.Tool( + retrieval=types.Retrieval( + vertex_rag_store=types.VertexRagStore( + rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)], + ) + ) + ) +] + +generate_content_config = types.GenerateContentConfig( + temperature=1, + top_p=0.95, + max_output_tokens=65535, + safety_settings=[ + types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"), + types.SafetySetting( + category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF" + ), + types.SafetySetting( + category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF" + ), + types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"), + ], + tools=tools, + thinking_config=types.ThinkingConfig( + thinking_level="HIGH", + ), +) diff --git a/pyproject.toml b/pyproject.toml index c36444d..894dad0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ description = "Add your description here" readme = "README.md" requires-python = ">=3.13" dependencies = [ + "asyncio>=4.0.0", "fastapi>=0.128.0", "fastapi-sse>=1.1.1", "google-genai>=1.59.0", diff --git a/requirements.txt b/requirements.txt index 770f2a7..d88d59f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.1 +asyncio==4.0.0 certifi==2026.1.4 charset-normalizer==3.4.4 click==8.3.1 diff --git a/uv.lock b/uv.lock index fbce947..0a4e605 100644 --- a/uv.lock +++ b/uv.lock @@ -32,6 +32,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] +[[package]] +name = "asyncio" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" }, +] + [[package]] name = "certifi" version = "2026.1.4" @@ -144,6 +153,7 @@ name = "genai" version = "0.1.0" source = { virtual = "." } dependencies = [ + { name = "asyncio" }, { name = "fastapi" }, { name = "fastapi-sse" }, { name = "google-genai" }, @@ -155,6 +165,7 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "asyncio", specifier = ">=4.0.0" }, { name = "fastapi", specifier = ">=0.128.0" }, { name = "fastapi-sse", specifier = ">=1.1.1" }, { name = "google-genai", specifier = ">=1.59.0" },