diff --git a/Dockerfile b/Dockerfile
index 885e388..f441db7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,6 +51,7 @@ WORKDIR /app
 
 # Copy application files
 COPY --chown=appuser:appuser app.py .
+COPY --chown=appuser:appuser llm_config.py .
 COPY --chown=appuser:appuser lib.py .
 COPY --chown=appuser:appuser credentials.json .
 COPY --chown=appuser:appuser static ./static
diff --git a/lib.py b/lib.py
index bb807cd..addf9c3 100644
--- a/lib.py
+++ b/lib.py
@@ -4,17 +4,17 @@ This module provides functionality to generate content using Google's Gemini mod
 with Vertex AI RAG (Retrieval-Augmented Generation) support.
 """
 
+import asyncio
+import threading
 from google import genai
 from google.genai import types
 from dotenv import load_dotenv
 
+from llm_config import generate_content_config
+
 # Load environment variables from .env file
 load_dotenv()
 
-# Vertex AI RAG Corpus resource path
-CORPUS: str = (
-    "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
-)
 
 # Gemini model name
 GEMINI_MODEL: str = "gemini-3-pro-preview"
@@ -35,53 +35,51 @@ async def generate(prompt: str):
     Yields:
         str: Text chunks from the generated response.
     """
-    client = genai.Client(vertexai=True)
+    # Create a queue for streaming chunks
+    chunk_queue: asyncio.Queue[str] = asyncio.Queue()
+    loop = asyncio.get_event_loop()
 
-    contents = [
-        types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
-    ]
-    tools = [
-        types.Tool(
-            retrieval=types.Retrieval(
-                vertex_rag_store=types.VertexRagStore(
-                    rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
-                )
+    def run_streaming():
+        """Run the synchronous streaming in a separate thread."""
+        try:
+            client = genai.Client(vertexai=True)
+
+            contents = [
+                types.Content(role="user", parts=[types.Part.from_text(text=prompt)]),
+            ]
+
+            for chunk in client.models.generate_content_stream(
+                model=GEMINI_MODEL,
+                contents=contents,
+                config=generate_content_config,
+            ):
+                if (
+                    chunk.candidates
+                    and chunk.candidates[0].content
+                    and chunk.candidates[0].content.parts
+                ):
+                    # Schedule the put operation in the event loop
+                    future = asyncio.run_coroutine_threadsafe(
+                        chunk_queue.put(chunk.text),
+                        loop,
+                    )
+                    # Wait for the put to complete (quick operation)
+                    future.result(timeout=1)
+        except Exception as e:
+            print(f"[ERROR] Streaming error: {e}")
+        finally:
+            asyncio.run_coroutine_threadsafe(
+                chunk_queue.put("<<END>>"),
+                loop,
             )
-        )
-    ]
 
-    generate_content_config = types.GenerateContentConfig(
-        temperature=1,
-        top_p=0.95,
-        max_output_tokens=65535,
-        safety_settings=[
-            types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
-            types.SafetySetting(
-                category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
-            ),
-            types.SafetySetting(
-                category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
-            ),
-            types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
-        ],
-        tools=tools,
-        thinking_config=types.ThinkingConfig(
-            thinking_level="HIGH",
-        ),
-    )
+    # Start the streaming in a daemon thread
+    stream_thread = threading.Thread(target=run_streaming, daemon=True)
+    stream_thread.start()
 
-    for chunk in client.models.generate_content_stream(
-        model=GEMINI_MODEL,
-        contents=contents,
-        config=generate_content_config,
-    ):
-        # DEBUG: Log chunk type to confirm generator behavior
-        print(f"[DEBUG] Chunk type: {type(chunk)}")
-        if (
-            not chunk.candidates
-            or not chunk.candidates[0].content
-            or not chunk.candidates[0].content.parts
-        ):
-            continue
-
-        yield chunk.text
+    # Yield chunks as they become available
+    while True:
+        chunk = await chunk_queue.get()
+        if chunk == "<<END>>":
+            break
+        yield chunk
diff --git a/llm_config.py b/llm_config.py
new file mode 100644
index 0000000..e11a33b
--- /dev/null
+++ b/llm_config.py
@@ -0,0 +1,36 @@
+from google.genai import types
+
+# Vertex AI RAG Corpus resource path
+CORPUS: str = (
+    "projects/520464122471/locations/europe-west3/ragCorpora/2305843009213693952"
+)
+
+tools = [
+    types.Tool(
+        retrieval=types.Retrieval(
+            vertex_rag_store=types.VertexRagStore(
+                rag_resources=[types.VertexRagStoreRagResource(rag_corpus=CORPUS)],
+            )
+        )
+    )
+]
+
+generate_content_config = types.GenerateContentConfig(
+    temperature=1,
+    top_p=0.95,
+    max_output_tokens=65535,
+    safety_settings=[
+        types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
+        types.SafetySetting(
+            category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
+        ),
+        types.SafetySetting(
+            category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
+        ),
+        types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
+    ],
+    tools=tools,
+    thinking_config=types.ThinkingConfig(
+        thinking_level="HIGH",
+    ),
+)
diff --git a/pyproject.toml b/pyproject.toml
index c36444d..894dad0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,6 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
+    "asyncio>=4.0.0",
     "fastapi>=0.128.0",
     "fastapi-sse>=1.1.1",
     "google-genai>=1.59.0",
diff --git a/requirements.txt b/requirements.txt
index 770f2a7..d88d59f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.12.1
+asyncio==4.0.0
 certifi==2026.1.4
 charset-normalizer==3.4.4
 click==8.3.1
diff --git a/uv.lock b/uv.lock
index fbce947..0a4e605 100644
--- a/uv.lock
+++ b/uv.lock
@@ -32,6 +32,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
+[[package]]
+name = "asyncio"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/71/ea/26c489a11f7ca862d5705db67683a7361ce11c23a7b98fc6c2deaeccede2/asyncio-4.0.0.tar.gz", hash = "sha256:570cd9e50db83bc1629152d4d0b7558d6451bb1bfd5dfc2e935d96fc2f40329b", size = 5371, upload-time = "2025-08-05T02:51:46.605Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/57/64/eff2564783bd650ca25e15938d1c5b459cda997574a510f7de69688cb0b4/asyncio-4.0.0-py3-none-any.whl", hash = "sha256:c1eddb0659231837046809e68103969b2bef8b0400d59cfa6363f6b5ed8cc88b", size = 5555, upload-time = "2025-08-05T02:51:45.767Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2026.1.4"
@@ -144,6 +153,7 @@ name = "genai"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "asyncio" },
     { name = "fastapi" },
     { name = "fastapi-sse" },
     { name = "google-genai" },
@@ -155,6 +165,7 @@ dependencies = [
 
 [package.metadata]
 requires-dist = [
+    { name = "asyncio", specifier = ">=4.0.0" },
     { name = "fastapi", specifier = ">=0.128.0" },
     { name = "fastapi-sse", specifier = ">=1.1.1" },
     { name = "google-genai", specifier = ">=1.59.0" },