from collections.abc import AsyncIterable from fastapi import FastAPI from fastapi.sse import EventSourceResponse, ServerSentEvent from pydantic import BaseModel app = FastAPI() class Prompt(BaseModel): text: str @app.post("/chat/stream", response_class=EventSourceResponse) async def stream_chat(prompt: Prompt) -> AsyncIterable[ServerSentEvent]: words = prompt.text.split() for word in words: yield ServerSentEvent(data=word, event="token") yield ServerSentEvent(raw_data="[DONE]", event="done")