fix async with threads
This commit is contained in:
@@ -51,6 +51,7 @@ WORKDIR /app
|
|||||||
|
|
||||||
# Copy application files
|
# Copy application files
|
||||||
COPY --chown=appuser:appuser app.py .
|
COPY --chown=appuser:appuser app.py .
|
||||||
|
COPY --chown=appuser:appuser llm_config.py .
|
||||||
COPY --chown=appuser:appuser lib.py .
|
COPY --chown=appuser:appuser lib.py .
|
||||||
COPY --chown=appuser:appuser credentials.json .
|
COPY --chown=appuser:appuser credentials.json .
|
||||||
COPY --chown=appuser:appuser static ./static
|
COPY --chown=appuser:appuser static ./static
|
||||||
|
|||||||
78
lib.py
78
lib.py
@@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod
|
|||||||
with Vertex AI RAG (Retrieval-Augmented Generation) support.
|
with Vertex AI RAG (Retrieval-Augmented Generation) support.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import threading
|
||||||
from google import genai
|
from google import genai
|
||||||
from google.genai import types
|
from google.genai import types
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from llm_config import generate_content_config
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Vertex AI RAG Corpus resource path
|
|
||||||
CORPUS: str = (
|
|
||||||
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Gemini model name
|
# Gemini model name
|
||||||
GEMINI_MODEL: str = "gemini-3-pro-preview"
|
GEMINI_MODEL: str = "gemini-3-pro-preview"
|
||||||
@@ -35,53 +35,51 @@ async def generate(prompt: str):
|
|||||||
Yields:
|
Yields:
|
||||||
str: Text chunks from the generated response.
|
str: Text chunks from the generated response.
|
||||||
"""
|
"""
|
||||||
|
# Create a queue for streaming chunks
|
||||||
|
chunk_queue: asyncio.Queue[str] = asyncio.Queue()
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
def run_streaming():
|
||||||
|
"""Run the synchronous streaming in a separate thread."""
|
||||||
|
try:
|
||||||
client = genai.Client(vertexai=True)
|
client = genai.Client(vertexai=True)
|
||||||
|
|
||||||
contents = [
|
contents = [
|
||||||
types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
|
types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
|
||||||
]
|
]
|
||||||
tools = [
|
|
||||||
types.Tool(
|
|
||||||
retrieval=types.Retrieval(
|
|
||||||
vertex_rag_store=types.VertexRagStore(
|
|
||||||
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
generate_content_config = types.GenerateContentConfig(
|
|
||||||
temperature=1,
|
|
||||||
top_p=0.95,
|
|
||||||
max_output_tokens=65535,
|
|
||||||
safety_settings=[
|
|
||||||
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
|
|
||||||
types.SafetySetting(
|
|
||||||
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
|
|
||||||
),
|
|
||||||
types.SafetySetting(
|
|
||||||
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
|
|
||||||
),
|
|
||||||
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
|
|
||||||
],
|
|
||||||
tools=tools,
|
|
||||||
thinking_config=types.ThinkingConfig(
|
|
||||||
thinking_level="HIGH",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
for chunk in client.models.generate_content_stream(
|
for chunk in client.models.generate_content_stream(
|
||||||
model=GEMINI_MODEL,
|
model=GEMINI_MODEL,
|
||||||
contents=contents,
|
contents=contents,
|
||||||
config=generate_content_config,
|
config=generate_content_config,
|
||||||
):
|
):
|
||||||
# DEBUG: Log chunk type to confirm generator behavior
|
|
||||||
print(f"[DEBUG] Chunk type: {type(chunk)}")
|
|
||||||
if (
|
if (
|
||||||
not chunk.candidates
|
chunk.candidates
|
||||||
or not chunk.candidates[0].content
|
and chunk.candidates[0].content
|
||||||
or not chunk.candidates[0].content.parts
|
and chunk.candidates[0].content.parts
|
||||||
):
|
):
|
||||||
continue
|
# Schedule the put operation in the event loop
|
||||||
|
future = asyncio.run_coroutine_threadsafe(
|
||||||
|
chunk_queue.put(chunk.text),
|
||||||
|
loop,
|
||||||
|
)
|
||||||
|
# Wait for the put to complete (quick operation)
|
||||||
|
future.result(timeout=1)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] Streaming error: {e}")
|
||||||
|
finally:
|
||||||
|
asyncio.run_coroutine_threadsafe(
|
||||||
|
chunk_queue.put("<<END>>"),
|
||||||
|
loop,
|
||||||
|
)
|
||||||
|
|
||||||
yield chunk.text
|
# Start the streaming in a daemon thread
|
||||||
|
stream_thread = threading.Thread(target=run_streaming, daemon=True)
|
||||||
|
stream_thread.start()
|
||||||
|
|
||||||
|
# Yield chunks as they become available
|
||||||
|
while True:
|
||||||
|
chunk = await chunk_queue.get()
|
||||||
|
if chunk == "<<END>>":
|
||||||
|
break
|
||||||
|
yield chunk
|
||||||
|
|||||||
36
llm_config.py
Normal file
36
llm_config.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
from google.genai import types
|
||||||
|
|
||||||
|
# Vertex AI RAG Corpus resource path
|
||||||
|
CORPUS: str = (
|
||||||
|
"projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
|
||||||
|
)
|
||||||
|
|
||||||
|
tools = [
|
||||||
|
types.Tool(
|
||||||
|
retrieval=types.Retrieval(
|
||||||
|
vertex_rag_store=types.VertexRagStore(
|
||||||
|
rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
generate_content_config = types.GenerateContentConfig(
|
||||||
|
temperature=1,
|
||||||
|
top_p=0.95,
|
||||||
|
max_output_tokens=65535,
|
||||||
|
safety_settings=[
|
||||||
|
types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
|
||||||
|
types.SafetySetting(
|
||||||
|
category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
|
||||||
|
),
|
||||||
|
types.SafetySetting(
|
||||||
|
category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
|
||||||
|
),
|
||||||
|
types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
|
||||||
|
],
|
||||||
|
tools=tools,
|
||||||
|
thinking_config=types.ThinkingConfig(
|
||||||
|
thinking_level="HIGH",
|
||||||
|
),
|
||||||
|
)
|
||||||
@@ -5,6 +5,7 @@ description = "Add your description here"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.13"
|
requires-python = ">=3.13"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"asyncio>=4.0.0",
|
||||||
"fastapi>=0.128.0",
|
"fastapi>=0.128.0",
|
||||||
"fastapi-sse>=1.1.1",
|
"fastapi-sse>=1.1.1",
|
||||||
"google-genai>=1.59.0",
|
"google-genai>=1.59.0",
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
annotated-doc==0.0.4
|
annotated-doc==0.0.4
|
||||||
annotated-types==0.7.0
|
annotated-types==0.7.0
|
||||||
anyio==4.12.1
|
anyio==4.12.1
|
||||||
|
asyncio==4.0.0
|
||||||
certifi==2026.1.4
|
certifi==2026.1.4
|
||||||
charset-normalizer==3.4.4
|
charset-normalizer==3.4.4
|
||||||
click==8.3.1
|
click==8.3.1
|
||||||
|
|||||||
11
uv.lock
generated
11
uv.lock
generated
@@ -32,6 +32,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
|
{ url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "asyncio"
|
||||||
|
version = "4.0.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
version = "2026.1.4"
|
version = "2026.1.4"
|
||||||
@@ -144,6 +153,7 @@ name = "genai"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { virtual = "." }
|
source = { virtual = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "asyncio" },
|
||||||
{ name = "fastapi" },
|
{ name = "fastapi" },
|
||||||
{ name = "fastapi-sse" },
|
{ name = "fastapi-sse" },
|
||||||
{ name = "google-genai" },
|
{ name = "google-genai" },
|
||||||
@@ -155,6 +165,7 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "asyncio", specifier = ">=4.0.0" },
|
||||||
{ name = "fastapi", specifier = ">=0.128.0" },
|
{ name = "fastapi", specifier = ">=0.128.0" },
|
||||||
{ name = "fastapi-sse", specifier = ">=1.1.1" },
|
{ name = "fastapi-sse", specifier = ">=1.1.1" },
|
||||||
{ name = "google-genai", specifier = ">=1.59.0" },
|
{ name = "google-genai", specifier = ">=1.59.0" },
|
||||||
|
|||||||
Reference in New Issue
Block a user