fix async with threads

2026-01-22 10:18:45 +01:00
parent 1ed452f1d9
commit 48d8ce9276
6 changed files with 98 additions and 50 deletions
--- a/1
+++ b/1
@@ -51,6 +51,7 @@ WORKDIR /app
 # Copy application files
 COPY --chown=appuser:appuser app.py .
 COPY --chown=appuser:appuser llm_config.py .
 COPY --chown=appuser:appuser lib.py .
 COPY --chown=appuser:appuser credentials.json .
 COPY --chown=appuser:appuser static ./static
--- a/lib.py
+++ b/lib.py
@@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod
 with Vertex AI RAG (Retrieval-Augmented Generation) support.
 """
 import asyncio
 import threading
 from google import genai
 from google.genai import types
 from dotenv import load_dotenv
 from llm_config import generate_content_config
 # Load environment variables from .env file
 load_dotenv()
 # Vertex AI RAG Corpus resource path
 CORPUS: str = (
    "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
 )
 # Gemini model name
 GEMINI_MODEL: str = "gemini-3-pro-preview"
@@ -35,53 +35,51 @@ async def generate(prompt: str):
    Yields:
        str: Text chunks from the generated response.
    """
-    client = genai.Client(vertexai=True)
+    # Create a queue for streaming chunks
    chunk_queue: asyncio.Queue[str] = asyncio.Queue()
    loop = asyncio.get_event_loop()
-    contents = [
+    def run_streaming():
-        types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
+        """Run the synchronous streaming in a separate thread."""
-    ]
+        try:
-    tools = [
+            client = genai.Client(vertexai=True)
-        types.Tool(
+
-            retrieval=types.Retrieval(
+            contents = [
-                vertex_rag_store=types.VertexRagStore(
+                types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
-                    rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
+            ]
-                )
+
            for chunk in client.models.generate_content_stream(
                model=GEMINI_MODEL,
                contents=contents,
                config=generate_content_config,
            ):
                if (
                    chunk.candidates
                    and chunk.candidates[0].content
                    and chunk.candidates[0].content.parts
                ):
                    # Schedule the put operation in the event loop
                    future = asyncio.run_coroutine_threadsafe(
                        chunk_queue.put(chunk.text),
                        loop,
                    )
                    # Wait for the put to complete (quick operation)
                    future.result(timeout=1)
        except Exception as e:
            print(f"[ERROR] Streaming error: {e}")
        finally:
            asyncio.run_coroutine_threadsafe(
                chunk_queue.put("<<END>>"),
                loop,
            )
        )
    ]
-    generate_content_config = types.GenerateContentConfig(
+    # Start the streaming in a daemon thread
-        temperature=1,
+    stream_thread = threading.Thread(target=run_streaming, daemon=True)
-        top_p=0.95,
+    stream_thread.start()
        max_output_tokens=65535,
        safety_settings=[
            types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
            types.SafetySetting(
                category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
            ),
            types.SafetySetting(
                category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
            ),
            types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
        ],
        tools=tools,
        thinking_config=types.ThinkingConfig(
            thinking_level="HIGH",
        ),
    )
-    for chunk in client.models.generate_content_stream(
+    # Yield chunks as they become available
-        model=GEMINI_MODEL,
+    while True:
-        contents=contents,
+        chunk = await chunk_queue.get()
-        config=generate_content_config,
+        if chunk == "<<END>>":
-    ):
+            break
-        # DEBUG: Log chunk type to confirm generator behavior
+        yield chunk
        print(f"[DEBUG] Chunk type: {type(chunk)}")
        if (
            not chunk.candidates
            or not chunk.candidates[0].content
            or not chunk.candidates[0].content.parts
        ):
            continue
        yield chunk.text
--- a/llm_config.py
+++ b/llm_config.py
@@ -0,0 +1,36 @@
 from google.genai import types
 # Vertex AI RAG Corpus resource path
 CORPUS: str = (
    "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
 )
 tools = [
    types.Tool(
        retrieval=types.Retrieval(
            vertex_rag_store=types.VertexRagStore(
                rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
            )
        )
    )
 ]
 generate_content_config = types.GenerateContentConfig(
    temperature=1,
    top_p=0.95,
    max_output_tokens=65535,
    safety_settings=[
        types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
        types.SafetySetting(
            category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
        ),
        types.SafetySetting(
            category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
        ),
        types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
    ],
    tools=tools,
    thinking_config=types.ThinkingConfig(
        thinking_level="HIGH",
    ),
 )
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
    "asyncio>=4.0.0",
    "fastapi>=0.128.0",
    "fastapi-sse>=1.1.1",
    "google-genai>=1.59.0",
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.12.1
 asyncio==4.0.0
 certifi==2026.1.4
 charset-normalizer==3.4.4
 click==8.3.1
--- a/uv.lock
+++ b/uv.lock
@@ -32,6 +32,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 [[package]]
 name = "asyncio"
 version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" },
 ]
 [[package]]
 name = "certifi"
 version = "2026.1.4"
@@ -144,6 +153,7 @@ name = "genai"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
    { name = "asyncio" },
    { name = "fastapi" },
    { name = "fastapi-sse" },
    { name = "google-genai" },
@@ -155,6 +165,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
    { name = "asyncio", specifier = ">=4.0.0" },
    { name = "fastapi", specifier = ">=0.128.0" },
    { name = "fastapi-sse", specifier = ">=1.1.1" },
    { name = "google-genai", specifier = ">=1.59.0" },