fix async with threads

This commit is contained in:
Matteo Rosati
2026-01-22 10:18:45 +01:00
parent 1ed452f1d9
commit 48d8ce9276
6 changed files with 98 additions and 50 deletions

View File

@@ -51,6 +51,7 @@ WORKDIR /app
# Copy application files # Copy application files
COPY --chown=appuser:appuser app.py . COPY --chown=appuser:appuser app.py .
COPY --chown=appuser:appuser llm_config.py .
COPY --chown=appuser:appuser lib.py . COPY --chown=appuser:appuser lib.py .
COPY --chown=appuser:appuser credentials.json . COPY --chown=appuser:appuser credentials.json .
COPY --chown=appuser:appuser static ./static COPY --chown=appuser:appuser static ./static

98
lib.py
View File

@@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod
with Vertex AI RAG (Retrieval-Augmented Generation) support. with Vertex AI RAG (Retrieval-Augmented Generation) support.
""" """
import asyncio
import threading
from google import genai from google import genai
from google.genai import types from google.genai import types
from dotenv import load_dotenv from dotenv import load_dotenv
from llm_config import generate_content_config
# Load environment variables from .env file # Load environment variables from .env file
load_dotenv() load_dotenv()
# Vertex AI RAG Corpus resource path
CORPUS: str = (
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
)
# Gemini model name # Gemini model name
GEMINI_MODEL: str = "gemini-3-pro-preview" GEMINI_MODEL: str = "gemini-3-pro-preview"
@@ -35,53 +35,51 @@ async def generate(prompt: str):
Yields: Yields:
str: Text chunks from the generated response. str: Text chunks from the generated response.
""" """
client = genai.Client(vertexai=True) # Create a queue for streaming chunks
chunk_queue: asyncio.Queue[str] = asyncio.Queue()
loop = asyncio.get_event_loop()
contents = [ def run_streaming():
types.Content(role="user", parts=[types.Part.from_text(text=prompt)]), """Run the synchronous streaming in a separate thread."""
] try:
tools = [ client = genai.Client(vertexai=True)
types.Tool(
retrieval=types.Retrieval( contents = [
vertex_rag_store=types.VertexRagStore( types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)], ]
)
for chunk in client.models.generate_content_stream(
model=GEMINI_MODEL,
contents=contents,
config=generate_content_config,
):
if (
chunk.candidates
and chunk.candidates[0].content
and chunk.candidates[0].content.parts
):
# Schedule the put operation in the event loop
future = asyncio.run_coroutine_threadsafe(
chunk_queue.put(chunk.text),
loop,
)
# Wait for the put to complete (quick operation)
future.result(timeout=1)
except Exception as e:
print(f"[ERROR] Streaming error: {e}")
finally:
asyncio.run_coroutine_threadsafe(
chunk_queue.put("<<END>>"),
loop,
) )
)
]
generate_content_config = types.GenerateContentConfig( # Start the streaming in a daemon thread
temperature=1, stream_thread = threading.Thread(target=run_streaming, daemon=True)
top_p=0.95, stream_thread.start()
max_output_tokens=65535,
safety_settings=[
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
types.SafetySetting(
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
),
types.SafetySetting(
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
),
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
],
tools=tools,
thinking_config=types.ThinkingConfig(
thinking_level="HIGH",
),
)
for chunk in client.models.generate_content_stream( # Yield chunks as they become available
model=GEMINI_MODEL, while True:
contents=contents, chunk = await chunk_queue.get()
config=generate_content_config, if chunk == "<<END>>":
): break
# DEBUG: Log chunk type to confirm generator behavior yield chunk
print(f"[DEBUG] Chunk type: {type(chunk)}")
if (
not chunk.candidates
or not chunk.candidates[0].content
or not chunk.candidates[0].content.parts
):
continue
yield chunk.text

36
llm_config.py Normal file
View File

@@ -0,0 +1,36 @@
from google.genai import types
# Vertex AI RAG Corpus resource path
CORPUS: str = (
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
)
tools = [
types.Tool(
retrieval=types.Retrieval(
vertex_rag_store=types.VertexRagStore(
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
)
)
)
]
generate_content_config = types.GenerateContentConfig(
temperature=1,
top_p=0.95,
max_output_tokens=65535,
safety_settings=[
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
types.SafetySetting(
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
),
types.SafetySetting(
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
),
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
],
tools=tools,
thinking_config=types.ThinkingConfig(
thinking_level="HIGH",
),
)

View File

@@ -5,6 +5,7 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"asyncio>=4.0.0",
"fastapi>=0.128.0", "fastapi>=0.128.0",
"fastapi-sse>=1.1.1", "fastapi-sse>=1.1.1",
"google-genai>=1.59.0", "google-genai>=1.59.0",

View File

@@ -1,6 +1,7 @@
annotated-doc==0.0.4 annotated-doc==0.0.4
annotated-types==0.7.0 annotated-types==0.7.0
anyio==4.12.1 anyio==4.12.1
asyncio==4.0.0
certifi==2026.1.4 certifi==2026.1.4
charset-normalizer==3.4.4 charset-normalizer==3.4.4
click==8.3.1 click==8.3.1

11
uv.lock generated
View File

@@ -32,6 +32,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
] ]
[[package]]
name = "asyncio"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" },
]
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2026.1.4" version = "2026.1.4"
@@ -144,6 +153,7 @@ name = "genai"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "asyncio" },
{ name = "fastapi" }, { name = "fastapi" },
{ name = "fastapi-sse" }, { name = "fastapi-sse" },
{ name = "google-genai" }, { name = "google-genai" },
@@ -155,6 +165,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "asyncio", specifier = ">=4.0.0" },
{ name = "fastapi", specifier = ">=0.128.0" }, { name = "fastapi", specifier = ">=0.128.0" },
{ name = "fastapi-sse", specifier = ">=1.1.1" }, { name = "fastapi-sse", specifier = ">=1.1.1" },
{ name = "google-genai", specifier = ">=1.59.0" }, { name = "google-genai", specifier = ">=1.59.0" },