Skip to content

streaming_completion

Runnable HTTP server example streaming_completion.

import sys
from openai import OpenAI
client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
response = client.completions.create(
model="default",
prompt="My favorite theorem is",
max_tokens=32,
stream=True,
)
for chunk in response:
delta = chunk.choices[0].text
print(delta, end="")
sys.stdout.flush()

Source: examples/server/streaming_completion.py