reset parallelism

This commit is contained in:
Matteo Rosati
2026-01-21 11:52:37 +01:00
parent f86cbed467
commit 31ee3fed8c
2 changed files with 7 additions and 34 deletions

2
app.py
View File

@@ -91,7 +91,7 @@ async def websocket_endpoint(websocket: WebSocket):
while True: while True:
data = await websocket.receive_text() data = await websocket.receive_text()
async for chunk in generate(data): for chunk in generate(data):
await websocket.send_text(chunk) await websocket.send_text(chunk)
await websocket.send_text("<<END>>") await websocket.send_text("<<END>>")

39
lib.py
View File

@@ -4,8 +4,6 @@ This module provides functionality to generate content using Google's Gemini mod
with Vertex AI RAG (Retrieval-Augmented Generation) support. with Vertex AI RAG (Retrieval-Augmented Generation) support.
""" """
import asyncio
from concurrent.futures import ThreadPoolExecutor
from google import genai from google import genai
from google.genai import types from google.genai import types
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -19,15 +17,15 @@ CORPUS: str = "projects/520464122471/locations/europe-west3/ragCorpora/230584300
# Gemini model name # Gemini model name
GEMINI_MODEL: str = "gemini-3-pro-preview" GEMINI_MODEL: str = "gemini-3-pro-preview"
# Thread pool for blocking API calls
_executor = ThreadPoolExecutor(max_workers=10)
def generate(prompt: str):
"""Generate content using Gemini model with RAG retrieval.
def _generate_sync(prompt: str): This function creates a streaming response from the Gemini model,
"""Synchronous wrapper for generate_content_stream. augmented with content from the configured RAG corpus.
This function contains the blocking Google GenAI SDK call. The blocking API call is run in a thread pool to allow concurrent
It should be run in a thread pool to avoid blocking the event loop. processing of multiple WebSocket connections.
Args: Args:
prompt: The user's input prompt to generate content for. prompt: The user's input prompt to generate content for.
@@ -86,28 +84,3 @@ def _generate_sync(prompt: str):
continue continue
yield chunk.text yield chunk.text
async def generate(prompt: str):
"""Generate content using Gemini model with RAG retrieval.
This function creates a streaming response from the Gemini model,
augmented with content from the configured RAG corpus.
The blocking API call is run in a thread pool to allow concurrent
processing of multiple WebSocket connections.
Args:
prompt: The user's input prompt to generate content for.
Yields:
str: Text chunks from the generated response.
"""
loop = asyncio.get_running_loop()
# Run the synchronous generator in a thread pool to avoid blocking the event loop
sync_gen = await loop.run_in_executor(_executor, _generate_sync, prompt)
# Yield from the synchronous generator
for chunk in sync_gen:
yield chunk