fix async with threads
This commit is contained in:
@@ -51,6 +51,7 @@ WORKDIR /app
|
||||
|
||||
# Copy application files
|
||||
COPY --chown=appuser:appuser app.py .
|
||||
COPY --chown=appuser:appuser llm_config.py .
|
||||
COPY --chown=appuser:appuser lib.py .
|
||||
COPY --chown=appuser:appuser credentials.json .
|
||||
COPY --chown=appuser:appuser static ./static
|
||||
|
||||
98
lib.py
98
lib.py
@@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod
|
||||
with Vertex AI RAG (Retrieval-Augmented Generation) support.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from llm_config import generate_content_config
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
# Vertex AI RAG Corpus resource path
|
||||
CORPUS: str = (
|
||||
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
|
||||
)
|
||||
|
||||
# Gemini model name
|
||||
GEMINI_MODEL: str = "gemini-3-pro-preview"
|
||||
@@ -35,53 +35,51 @@ async def generate(prompt: str):
|
||||
Yields:
|
||||
str: Text chunks from the generated response.
|
||||
"""
|
||||
client = genai.Client(vertexai=True)
|
||||
# Create a queue for streaming chunks
|
||||
chunk_queue: asyncio.Queue[str] = asyncio.Queue()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
contents = [
|
||||
types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
|
||||
]
|
||||
tools = [
|
||||
types.Tool(
|
||||
retrieval=types.Retrieval(
|
||||
vertex_rag_store=types.VertexRagStore(
|
||||
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
|
||||
)
|
||||
def run_streaming():
|
||||
"""Run the synchronous streaming in a separate thread."""
|
||||
try:
|
||||
client = genai.Client(vertexai=True)
|
||||
|
||||
contents = [
|
||||
types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
|
||||
]
|
||||
|
||||
for chunk in client.models.generate_content_stream(
|
||||
model=GEMINI_MODEL,
|
||||
contents=contents,
|
||||
config=generate_content_config,
|
||||
):
|
||||
if (
|
||||
chunk.candidates
|
||||
and chunk.candidates[0].content
|
||||
and chunk.candidates[0].content.parts
|
||||
):
|
||||
# Schedule the put operation in the event loop
|
||||
future = asyncio.run_coroutine_threadsafe(
|
||||
chunk_queue.put(chunk.text),
|
||||
loop,
|
||||
)
|
||||
# Wait for the put to complete (quick operation)
|
||||
future.result(timeout=1)
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Streaming error: {e}")
|
||||
finally:
|
||||
asyncio.run_coroutine_threadsafe(
|
||||
chunk_queue.put("<<END>>"),
|
||||
loop,
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
generate_content_config = types.GenerateContentConfig(
|
||||
temperature=1,
|
||||
top_p=0.95,
|
||||
max_output_tokens=65535,
|
||||
safety_settings=[
|
||||
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
|
||||
types.SafetySetting(
|
||||
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
|
||||
),
|
||||
types.SafetySetting(
|
||||
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
|
||||
),
|
||||
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
|
||||
],
|
||||
tools=tools,
|
||||
thinking_config=types.ThinkingConfig(
|
||||
thinking_level="HIGH",
|
||||
),
|
||||
)
|
||||
# Start the streaming in a daemon thread
|
||||
stream_thread = threading.Thread(target=run_streaming, daemon=True)
|
||||
stream_thread.start()
|
||||
|
||||
for chunk in client.models.generate_content_stream(
|
||||
model=GEMINI_MODEL,
|
||||
contents=contents,
|
||||
config=generate_content_config,
|
||||
):
|
||||
# DEBUG: Log chunk type to confirm generator behavior
|
||||
print(f"[DEBUG] Chunk type: {type(chunk)}")
|
||||
if (
|
||||
not chunk.candidates
|
||||
or not chunk.candidates[0].content
|
||||
or not chunk.candidates[0].content.parts
|
||||
):
|
||||
continue
|
||||
|
||||
yield chunk.text
|
||||
# Yield chunks as they become available
|
||||
while True:
|
||||
chunk = await chunk_queue.get()
|
||||
if chunk == "<<END>>":
|
||||
break
|
||||
yield chunk
|
||||
|
||||
36
llm_config.py
Normal file
36
llm_config.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from google.genai import types
|
||||
|
||||
# Vertex AI RAG Corpus resource path
|
||||
CORPUS: str = (
|
||||
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
|
||||
)
|
||||
|
||||
tools = [
|
||||
types.Tool(
|
||||
retrieval=types.Retrieval(
|
||||
vertex_rag_store=types.VertexRagStore(
|
||||
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
|
||||
)
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
generate_content_config = types.GenerateContentConfig(
|
||||
temperature=1,
|
||||
top_p=0.95,
|
||||
max_output_tokens=65535,
|
||||
safety_settings=[
|
||||
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
|
||||
types.SafetySetting(
|
||||
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
|
||||
),
|
||||
types.SafetySetting(
|
||||
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
|
||||
),
|
||||
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
|
||||
],
|
||||
tools=tools,
|
||||
thinking_config=types.ThinkingConfig(
|
||||
thinking_level="HIGH",
|
||||
),
|
||||
)
|
||||
@@ -5,6 +5,7 @@ description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"asyncio>=4.0.0",
|
||||
"fastapi>=0.128.0",
|
||||
"fastapi-sse>=1.1.1",
|
||||
"google-genai>=1.59.0",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
annotated-doc==0.0.4
|
||||
annotated-types==0.7.0
|
||||
anyio==4.12.1
|
||||
asyncio==4.0.0
|
||||
certifi==2026.1.4
|
||||
charset-normalizer==3.4.4
|
||||
click==8.3.1
|
||||
|
||||
11
uv.lock
generated
11
uv.lock
generated
@@ -32,6 +32,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asyncio"
|
||||
version = "4.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2026.1.4"
|
||||
@@ -144,6 +153,7 @@ name = "genai"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
dependencies = [
|
||||
{ name = "asyncio" },
|
||||
{ name = "fastapi" },
|
||||
{ name = "fastapi-sse" },
|
||||
{ name = "google-genai" },
|
||||
@@ -155,6 +165,7 @@ dependencies = [
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "asyncio", specifier = ">=4.0.0" },
|
||||
{ name = "fastapi", specifier = ">=0.128.0" },
|
||||
{ name = "fastapi-sse", specifier = ">=1.1.1" },
|
||||
{ name = "google-genai", specifier = ">=1.59.0" },
|
||||
|
||||
Reference in New Issue
Block a user