streaming
Runnable HTTP server example streaming.
import sysfrom openai import OpenAI
client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
messages = []prompt = input("Enter system prompt >>> ")if len(prompt) > 0: messages.append({"role": "system", "content": prompt})
while True: prompt = input(">>> ") messages.append({"role": "user", "content": prompt}) resp = "" response = client.chat.completions.create( model="default", messages=messages, max_tokens=256, stream=True, ) for chunk in response: delta = chunk.choices[0].delta.content print(delta, end="") sys.stdout.flush() resp += delta messages.append({"role": "assistant", "content": resp}) print()Source: examples/server/streaming.py