Skip to content

Start the server:

Start the server: mistralrs serve —quant 4 -p 1234 -m Qwen/Qwen3-Coder-Next

"""
Start the server:
mistralrs serve --quant 4 -p 1234 -m Qwen/Qwen3-Coder-Next
"""
from openai import OpenAI
client = OpenAI(api_key="foobar", base_url="http://localhost:1234/v1/")
messages = []
prompt = input("Enter system prompt >>> ")
if len(prompt) > 0:
messages.append({"role": "system", "content": prompt})
while True:
prompt = input(">>> ")
messages.append({"role": "user", "content": prompt})
completion = client.chat.completions.create(
model="default",
messages=messages,
max_tokens=256,
frequency_penalty=1.0,
top_p=0.1,
temperature=0,
)
resp = completion.choices[0].message.content
print(resp)
messages.append({"role": "assistant", "content": resp})

Source: examples/server/qwen3_next.py