OpenAI-compatible chat completions endpoint handler.

POST

/v1/chat/completions

const url = 'https://example.com/v1/chat/completions';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"adapter":{"generation":"5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a"},"agent_permission":null,"code_execution_permission":null,"dry_allowed_length":null,"dry_base":null,"dry_multiplier":null,"dry_sequence_breakers":null,"enable_shell":false,"enable_thinking":null,"files":["example"],"frequency_penalty":null,"grammar":{"type":"regex","value":"example"},"logit_bias":null,"logprobs":false,"max_tokens":256,"max_tool_rounds":null,"messages":[{"content":"example","name":"example","role":"example","tool_call_id":"example","tool_calls":[{"function":{"arguments":"example","name":"example"},"id":"example","type":"function"}]}],"min_p":null,"model":"mistral","n":1,"presence_penalty":null,"reasoning_effort":null,"repetition_penalty":null,"response_format":{"type":"text"},"session_id":"example","stop":"example","stream":true,"temperature":0.7,"tool_choice":"none","tools":null,"top_k":null,"top_logprobs":null,"top_p":null,"truncate_sequence":null,"web_search_options":{"external_web_access":true,"extract_description":"example","filters":{"allowed_domains":["example"],"blocked_domains":["example"]},"image_settings":{"caption":true,"max_results":1},"return_token_budget":"default","search_content_types":["text"],"search_context_size":"low","search_description":"example","user_location":{"approximate":{"city":"example","country":"example","region":"example","timezone":"example"},"type":"approximate"}}}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}

curl --request POST \
  --url https://example.com/v1/chat/completions \
  --header 'Content-Type: application/json' \
  --data '{ "adapter": { "generation": "5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a" }, "agent_permission": null, "code_execution_permission": null, "dry_allowed_length": null, "dry_base": null, "dry_multiplier": null, "dry_sequence_breakers": null, "enable_shell": false, "enable_thinking": null, "files": [ "example" ], "frequency_penalty": null, "grammar": { "type": "regex", "value": "example" }, "logit_bias": null, "logprobs": false, "max_tokens": 256, "max_tool_rounds": null, "messages": [ { "content": "example", "name": "example", "role": "example", "tool_call_id": "example", "tool_calls": [ { "function": { "arguments": "example", "name": "example" }, "id": "example", "type": "function" } ] } ], "min_p": null, "model": "mistral", "n": 1, "presence_penalty": null, "reasoning_effort": null, "repetition_penalty": null, "response_format": { "type": "text" }, "session_id": "example", "stop": "example", "stream": true, "temperature": 0.7, "tool_choice": "none", "tools": null, "top_k": null, "top_logprobs": null, "top_p": null, "truncate_sequence": null, "web_search_options": { "external_web_access": true, "extract_description": "example", "filters": { "allowed_domains": [ "example" ], "blocked_domains": [ "example" ] }, "image_settings": { "caption": true, "max_results": 1 }, "return_token_budget": "default", "search_content_types": [ "text" ], "search_context_size": "low", "search_description": "example", "user_location": { "approximate": { "city": "example", "country": "example", "region": "example", "timezone": "example" }, "type": "approximate" } } }'

Request Body^required

application/json

Chat completion request following OpenAI’s specification

object

adapter

One of:

null
object

null

One of:

string
object

Resolve this alias when the request is admitted.

string

Pin this exact resident generation.

object

generation

required

The 64-character hexadecimal generation ID returned by the adapter management API.

string

Example

5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a5a

agent_permission

Permission policy for agentic tools.

string | null

Example

null

code_execution_permission

Permission policy for code execution.

string | null

Example

null

dry_allowed_length

Longest repeated sequence DRY leaves unpenalized.

integer | null

Example

null

dry_base

Base for DRY’s exponential penalty growth.

number | null format: float

Example

null

dry_multiplier

DRY repetition penalty multiplier; 0 disables DRY.

number | null format: float

Example

null

dry_sequence_breakers

Sequences that reset DRY repetition matching.

Array<string> | null

Example

null

enable_shell

Enable the built-in shell execution tool.

boolean

Example

false

enable_thinking

Toggle thinking output for models that support it.

boolean | null

Example

null

files

Required output files. The runtime asks the model to produce them and surfaces a File (or error placeholder) for each.

Array | null

frequency_penalty

Penalize tokens by how often they have appeared so far; positive values reduce repetition.

number | null format: float

Example

null

grammar

One of:

null
object

null

logit_bias

Bias added to the logits of these token IDs before sampling.

object | null

Example

null

logprobs

Return log probabilities of the output tokens.

boolean

Example

false

max_tokens

Maximum number of tokens to generate.

integer | null

Example

max_tool_rounds

Maximum number of tool-call rounds the server will auto-execute.

integer | null

Example

null

messages

required

One of:

Array<object>
string

Array<object>

Represents a single message in a conversation

Examples

use either::Either;
use mistralrs_server_core::openai::{Message, MessageContent};

// User message
let user_msg = Message {
    content: Some(MessageContent(Either::Left("What's 2+2?".to_string()))),
    role: "user".to_string(),
    name: None,
    tool_calls: None,
};

// System message
let system_msg = Message {
    content: Some(MessageContent(Either::Left("You are a helpful assistant.".to_string()))),
    role: "system".to_string(),
    name: None,
    tool_calls: None,
};

object

content

One of:

null
unknown

null

name

Optional participant name for this message

string | null

role

required

The role of the message sender (“user”, “assistant”, “system”, “tool”, etc.)

string

tool_call_id

Tool call ID this message is responding to (for tool messages)

string | null

tool_calls

Optional list of tool calls (for assistant messages)

Array<object> | null

Represents a tool call made by the assistant

This structure wraps a function call with its type information.

object

function

required

The function call details

object

arguments

required

The function arguments (JSON string)

string

name

required

The name of the function to call

string

Unique identifier for this tool call

string | null

type

required

The type of tool being called

string

Allowed values: function

min_p

Drop tokens below this fraction of the top token’s probability.

number | null format: double

Example

null

model

Model ID; “default” targets the only loaded model.

string

Example

mistral

How many choices to generate.

integer

Example

presence_penalty

Penalize tokens that have already appeared; positive values push toward new topics.

number | null format: float

Example

null

reasoning_effort

Reasoning effort level for Harmony-format models (GPT-OSS). Controls the depth of reasoning/analysis: “low”, “medium”, or “high”.

string | null

Example

null

repetition_penalty

Multiplicative repetition penalty; 1.0 disables it.

number | null format: float

Example

null

response_format

One of:

null
object

null

session_id

Persistent agentic state. If None, a new session is created and the ID is returned in the response.

string | null

stop

One of:

null
unknown

null

stream

Stream the response as server-sent events.

boolean | null

Example

true

temperature

Sampling temperature; higher values increase randomness.

number | null format: double

Example

0.7

tool_choice

One of:

null
object

null

One of:

Disallow selection of tools.

string

Allowed values: none

Tool definition

object

function

required

Function definition for a tool

object

description

string | null

name

required

string

parameters

object | null

strict

When true, the tool’s parameters JSON schema is enforced on the generated arguments via constrained decoding (llguidance).

boolean | null

type

required

Type of tool

string

Allowed values: function

tools

Tools the model may call.

Array | null

One of:

Tool definition

object

function

required

Function definition for a tool

object

description

string | null

name

required

string

parameters

object | null

strict

When true, the tool’s parameters JSON schema is enforced on the generated arguments via constrained decoding (llguidance).

boolean | null

type

required

Type of tool

string

Allowed values: function

Example

null

top_k

Sample only from the k most likely tokens.

integer | null

Example

null

top_logprobs

Number of most likely tokens to return per position; requires logprobs.

integer | null

Example

null

top_p

Nucleus sampling: only tokens within the top cumulative probability mass are considered.

number | null format: double

Example

null

truncate_sequence

Truncate inputs that exceed the model’s context length instead of erroring.

boolean | null

Example

null

web_search_options

One of:

null
object

null

Responses

200

Chat completion JSON or server-sent event chunks

object

adapter_generation

string | null

agentic_tool_calls

Array | null

choices

required

Array<object>

object

finish_reason

required

string

index

required

integer

logprobs

message

required

object

content

string | null

reasoning_content

string | null

role

required

string

tool_calls

Array | null

created

required

integer format: int64

files

Array | null

required

string

model

required

string

object

required

string

session_id

string | null

system_fingerprint

required

string

usage

required

object

avg_compl_tok_per_sec

required

number format: float

avg_prompt_tok_per_sec

required

number format: float

avg_tok_per_sec

required

number format: float

completion_tokens

required

integer

prompt_tokens

required

integer

total_completion_time_sec

required

number format: float

total_prompt_time_sec

required

number format: float

total_time_sec

required

number format: float

total_tokens

required

integer

Example ^generated

{
  "adapter_generation": "example",
  "agentic_tool_calls": [
    "example"
  ],
  "choices": [
    {
      "finish_reason": "example",
      "index": 1,
      "logprobs": "example",
      "message": {
        "content": "example",
        "reasoning_content": "example",
        "role": "example",
        "tool_calls": [
          "example"
        ]
      }
    }
  ],
  "created": 1,
  "files": [
    "example"
  ],
  "id": "example",
  "model": "example",
  "object": "example",
  "session_id": "example",
  "system_fingerprint": "example",
  "usage": {
    "avg_compl_tok_per_sec": 1,
    "avg_prompt_tok_per_sec": 1,
    "avg_tok_per_sec": 1,
    "completion_tokens": 1,
    "prompt_tokens": 1,
    "total_completion_time_sec": 1,
    "total_prompt_time_sec": 1,
    "total_time_sec": 1,
    "total_tokens": 1
  }
}

OpenAI-compatible chat completions endpoint handler.

Request Body required

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Examples

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Example

Responses

200

Example generated

Request Body^required

Example ^generated