mistralrs_server_core/
openai.rs

1//! ## OpenAI compatible functionality.
2
3use std::{collections::HashMap, ops::Deref};
4
5use either::Either;
6use mistralrs_core::{
7    ImageGenerationResponseFormat, LlguidanceGrammar, Tool, ToolChoice, ToolType, WebSearchOptions,
8};
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11use utoipa::{
12    openapi::{schema::SchemaType, ArrayBuilder, ObjectBuilder, OneOfBuilder, RefOr, Schema, Type},
13    PartialSchema, ToSchema,
14};
15
16/// Inner content structure for messages that can be either a string or key-value pairs
17#[derive(Debug, Clone, Deserialize, Serialize)]
18pub struct MessageInnerContent(
19    #[serde(with = "either::serde_untagged")] pub Either<String, HashMap<String, String>>,
20);
21
22// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
23// properly working, so manually impl ToSchema
24impl PartialSchema for MessageInnerContent {
25    fn schema() -> RefOr<Schema> {
26        RefOr::T(message_inner_content_schema())
27    }
28}
29
30impl ToSchema for MessageInnerContent {
31    fn schemas(
32        schemas: &mut Vec<(
33            String,
34            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
35        )>,
36    ) {
37        schemas.push((
38            MessageInnerContent::name().into(),
39            MessageInnerContent::schema(),
40        ));
41    }
42}
43
44impl Deref for MessageInnerContent {
45    type Target = Either<String, HashMap<String, String>>;
46    fn deref(&self) -> &Self::Target {
47        &self.0
48    }
49}
50
51/// Function for MessageInnerContent Schema generation to handle `Either`
52fn message_inner_content_schema() -> Schema {
53    Schema::OneOf(
54        OneOfBuilder::new()
55            // Either::Left - simple string
56            .item(Schema::Object(
57                ObjectBuilder::new()
58                    .schema_type(SchemaType::Type(Type::String))
59                    .build(),
60            ))
61            // Either::Right - object with string values
62            .item(Schema::Object(
63                ObjectBuilder::new()
64                    .schema_type(SchemaType::Type(Type::Object))
65                    .additional_properties(Some(RefOr::T(Schema::Object(
66                        ObjectBuilder::new()
67                            .schema_type(SchemaType::Type(Type::String))
68                            .build(),
69                    ))))
70                    .build(),
71            ))
72            .build(),
73    )
74}
75
76/// Message content that can be either simple text or complex structured content
77#[derive(Debug, Clone, Deserialize, Serialize)]
78pub struct MessageContent(
79    #[serde(with = "either::serde_untagged")]
80    Either<String, Vec<HashMap<String, MessageInnerContent>>>,
81);
82
83// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
84// properly working, so manually impl ToSchema
85impl PartialSchema for MessageContent {
86    fn schema() -> RefOr<Schema> {
87        RefOr::T(message_content_schema())
88    }
89}
90
91impl ToSchema for MessageContent {
92    fn schemas(
93        schemas: &mut Vec<(
94            String,
95            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
96        )>,
97    ) {
98        schemas.push((MessageContent::name().into(), MessageContent::schema()));
99    }
100}
101
102impl MessageContent {
103    /// Create a new MessageContent from a string
104    pub fn from_text(text: String) -> Self {
105        MessageContent(Either::Left(text))
106    }
107
108    /// Extract text from MessageContent
109    pub fn to_text(&self) -> Option<String> {
110        match &self.0 {
111            Either::Left(text) => Some(text.clone()),
112            Either::Right(parts) => {
113                // For complex content, try to extract text from parts
114                let mut text_parts = Vec::new();
115                for part in parts {
116                    for (key, value) in part {
117                        if key == "text" {
118                            if let Either::Left(text) = &**value {
119                                text_parts.push(text.clone());
120                            }
121                        }
122                    }
123                }
124                if text_parts.is_empty() {
125                    None
126                } else {
127                    Some(text_parts.join(" "))
128                }
129            }
130        }
131    }
132}
133
134impl Deref for MessageContent {
135    type Target = Either<String, Vec<HashMap<String, MessageInnerContent>>>;
136    fn deref(&self) -> &Self::Target {
137        &self.0
138    }
139}
140
141/// Function for MessageContent Schema generation to handle `Either`
142fn message_content_schema() -> Schema {
143    Schema::OneOf(
144        OneOfBuilder::new()
145            .item(Schema::Object(
146                ObjectBuilder::new()
147                    .schema_type(SchemaType::Type(Type::String))
148                    .build(),
149            ))
150            .item(Schema::Array(
151                ArrayBuilder::new()
152                    .items(RefOr::T(Schema::Object(
153                        ObjectBuilder::new()
154                            .schema_type(SchemaType::Type(Type::Object))
155                            .additional_properties(Some(RefOr::Ref(
156                                utoipa::openapi::Ref::from_schema_name("MessageInnerContent"),
157                            )))
158                            .build(),
159                    )))
160                    .build(),
161            ))
162            .build(),
163    )
164}
165
166/// Represents a function call made by the assistant
167///
168/// When using tool calling, this structure contains the details of a function
169/// that the model has decided to call, including the function name and its parameters.
170#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
171pub struct FunctionCalled {
172    /// The name of the function to call
173    pub name: String,
174    /// The function arguments (JSON string)
175    #[serde(alias = "parameters")]
176    pub arguments: String,
177}
178
179/// Represents a tool call made by the assistant
180///
181/// This structure wraps a function call with its type information.
182#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
183pub struct ToolCall {
184    /// Unique identifier for this tool call
185    #[serde(default)]
186    pub id: Option<String>,
187    /// The type of tool being called
188    #[serde(rename = "type")]
189    pub tp: ToolType,
190    ///  The function call details
191    pub function: FunctionCalled,
192}
193
194/// Represents a single message in a conversation
195///
196/// ### Examples
197///
198/// ```ignore
199/// use either::Either;
200/// use mistralrs_server_core::openai::{Message, MessageContent};
201///
202/// // User message
203/// let user_msg = Message {
204///     content: Some(MessageContent(Either::Left("What's 2+2?".to_string()))),
205///     role: "user".to_string(),
206///     name: None,
207///     tool_calls: None,
208/// };
209///
210/// // System message
211/// let system_msg = Message {
212///     content: Some(MessageContent(Either::Left("You are a helpful assistant.".to_string()))),
213///     role: "system".to_string(),
214///     name: None,
215///     tool_calls: None,
216/// };
217/// ```
218#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
219pub struct Message {
220    /// The message content
221    pub content: Option<MessageContent>,
222    /// The role of the message sender ("user", "assistant", "system", "tool", etc.)
223    pub role: String,
224    pub name: Option<String>,
225    /// Optional list of tool calls (for assistant messages)
226    pub tool_calls: Option<Vec<ToolCall>>,
227    /// Tool call ID this message is responding to (for tool messages)
228    pub tool_call_id: Option<String>,
229}
230
231/// Stop token configuration for generation
232///
233/// Defines when the model should stop generating text, either with a single
234/// stop token or multiple possible stop sequences.
235#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
236#[serde(untagged)]
237pub enum StopTokens {
238    ///  Multiple possible stop sequences
239    Multi(Vec<String>),
240    /// Single stop sequence
241    Single(String),
242}
243
244/// Default value helper
245fn default_false() -> bool {
246    false
247}
248
249/// Default value helper
250fn default_1usize() -> usize {
251    1
252}
253
254/// Default value helper
255fn default_720usize() -> usize {
256    720
257}
258
259/// Default value helper
260fn default_1280usize() -> usize {
261    1280
262}
263
264/// Default value helper
265fn default_model() -> String {
266    "default".to_string()
267}
268
269/// Default value helper
270fn default_response_format() -> ImageGenerationResponseFormat {
271    ImageGenerationResponseFormat::Url
272}
273
274/// Grammar specification for structured generation
275///
276/// Defines different types of grammars that can be used to constrain model output,
277/// ensuring it follows specific formats or structures.
278///
279/// ### Examples
280///
281/// ```ignore
282/// use mistralrs_server_core::openai::Grammar;
283///
284/// // Regex grammar for phone numbers
285/// let phone_regex = Grammar::Regex(r"\d{3}-\d{3}-\d{4}".to_string());
286///
287/// // JSON schema for structured data
288/// let json_schema = Grammar::JsonSchema(serde_json::json!({
289///     "type": "object",
290///     "properties": {
291///         "name": {"type": "string"},
292///         "age": {"type": "integer"}
293///     },
294///     "required": ["name", "age"]
295/// }));
296///
297/// // Lark grammar for arithmetic expressions
298/// let lark_grammar = Grammar::Lark(r#"
299///     ?start: expr
300///     expr: term ("+" term | "-" term)*
301///     term: factor ("*" factor | "/" factor)*
302///     factor: NUMBER | "(" expr ")"
303///     %import common.NUMBER
304/// "#.to_string());
305/// ```
306#[derive(Debug, Clone, Deserialize, Serialize)]
307#[serde(tag = "type", content = "value")]
308pub enum Grammar {
309    /// Regular expression grammar
310    #[serde(rename = "regex")]
311    Regex(String),
312    /// JSON schema grammar
313    #[serde(rename = "json_schema")]
314    JsonSchema(serde_json::Value),
315    /// LLGuidance grammar
316    #[serde(rename = "llguidance")]
317    Llguidance(LlguidanceGrammar),
318    /// Lark parser grammar
319    #[serde(rename = "lark")]
320    Lark(String),
321}
322
323// Implement ToSchema manually to handle `LlguidanceGrammar`
324impl PartialSchema for Grammar {
325    fn schema() -> RefOr<Schema> {
326        RefOr::T(Schema::OneOf(
327            OneOfBuilder::new()
328                .item(create_grammar_variant_schema(
329                    "regex",
330                    Schema::Object(
331                        ObjectBuilder::new()
332                            .schema_type(SchemaType::Type(Type::String))
333                            .build(),
334                    ),
335                ))
336                .item(create_grammar_variant_schema(
337                    "json_schema",
338                    Schema::Object(
339                        ObjectBuilder::new()
340                            .schema_type(SchemaType::Type(Type::Object))
341                            .build(),
342                    ),
343                ))
344                .item(create_grammar_variant_schema(
345                    "llguidance",
346                    llguidance_schema(),
347                ))
348                .item(create_grammar_variant_schema(
349                    "lark",
350                    Schema::Object(
351                        ObjectBuilder::new()
352                            .schema_type(SchemaType::Type(Type::String))
353                            .build(),
354                    ),
355                ))
356                .build(),
357        ))
358    }
359}
360
361impl ToSchema for Grammar {
362    fn schemas(
363        schemas: &mut Vec<(
364            String,
365            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
366        )>,
367    ) {
368        schemas.push((Grammar::name().into(), Grammar::schema()));
369    }
370}
371
372/// Helper function to create a grammar variant schema
373fn create_grammar_variant_schema(type_value: &str, value_schema: Schema) -> Schema {
374    Schema::Object(
375        ObjectBuilder::new()
376            .schema_type(SchemaType::Type(Type::Object))
377            .property(
378                "type",
379                RefOr::T(Schema::Object(
380                    ObjectBuilder::new()
381                        .schema_type(SchemaType::Type(Type::String))
382                        .enum_values(Some(vec![serde_json::Value::String(
383                            type_value.to_string(),
384                        )]))
385                        .build(),
386                )),
387            )
388            .property("value", RefOr::T(value_schema))
389            .required("type")
390            .required("value")
391            .build(),
392    )
393}
394
395/// Helper function to generate LLGuidance schema
396fn llguidance_schema() -> Schema {
397    let grammar_with_lexer_schema = Schema::Object(
398        ObjectBuilder::new()
399            .schema_type(SchemaType::Type(Type::Object))
400            .property(
401                "name",
402                RefOr::T(Schema::Object(
403                    ObjectBuilder::new()
404                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
405                        .description(Some(
406                            "The name of this grammar, can be used in GenGrammar nodes",
407                        ))
408                        .build(),
409                )),
410            )
411            .property(
412                "json_schema",
413                RefOr::T(Schema::Object(
414                    ObjectBuilder::new()
415                        .schema_type(SchemaType::from_iter([Type::Object, Type::Null]))
416                        .description(Some("The JSON schema that the grammar should generate"))
417                        .build(),
418                )),
419            )
420            .property(
421                "lark_grammar",
422                RefOr::T(Schema::Object(
423                    ObjectBuilder::new()
424                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
425                        .description(Some("The Lark grammar that the grammar should generate"))
426                        .build(),
427                )),
428            )
429            .description(Some("Grammar configuration with lexer settings"))
430            .build(),
431    );
432
433    Schema::Object(
434        ObjectBuilder::new()
435            .schema_type(SchemaType::Type(Type::Object))
436            .property(
437                "grammars",
438                RefOr::T(Schema::Array(
439                    ArrayBuilder::new()
440                        .items(RefOr::T(grammar_with_lexer_schema))
441                        .description(Some("List of grammar configurations"))
442                        .build(),
443                )),
444            )
445            .property(
446                "max_tokens",
447                RefOr::T(Schema::Object(
448                    ObjectBuilder::new()
449                        .schema_type(SchemaType::from_iter([Type::Integer, Type::Null]))
450                        .description(Some("Maximum number of tokens to generate"))
451                        .build(),
452                )),
453            )
454            .required("grammars")
455            .description(Some("Top-level grammar configuration for LLGuidance"))
456            .build(),
457    )
458}
459
460/// JSON Schema for structured responses
461#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
462pub struct JsonSchemaResponseFormat {
463    pub name: String,
464    pub schema: serde_json::Value,
465}
466
467/// Response format for model output
468#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
469#[serde(tag = "type")]
470pub enum ResponseFormat {
471    /// Free-form text response
472    #[serde(rename = "text")]
473    Text,
474    /// Structured response following a JSON schema
475    #[serde(rename = "json_schema")]
476    JsonSchema {
477        json_schema: JsonSchemaResponseFormat,
478    },
479}
480
481/// Chat completion request following OpenAI's specification
482#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
483pub struct ChatCompletionRequest {
484    #[schema(
485        schema_with = messages_schema,
486        example = json!(vec![Message{content:Some(MessageContent{0: either::Left(("Why did the crab cross the road?".to_string()))}), role:"user".to_string(), name: None, tool_calls: None}])
487    )]
488    #[serde(with = "either::serde_untagged")]
489    pub messages: Either<Vec<Message>, String>,
490    #[schema(example = "mistral")]
491    #[serde(default = "default_model")]
492    pub model: String,
493    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
494    pub logit_bias: Option<HashMap<u32, f32>>,
495    #[serde(default = "default_false")]
496    #[schema(example = false)]
497    pub logprobs: bool,
498    #[schema(example = json!(Option::None::<usize>))]
499    pub top_logprobs: Option<usize>,
500    #[schema(example = 256)]
501    #[serde(alias = "max_completion_tokens")]
502    pub max_tokens: Option<usize>,
503    #[serde(rename = "n")]
504    #[serde(default = "default_1usize")]
505    #[schema(example = 1)]
506    pub n_choices: usize,
507    #[schema(example = json!(Option::None::<f32>))]
508    pub presence_penalty: Option<f32>,
509    #[schema(example = json!(Option::None::<f32>))]
510    pub frequency_penalty: Option<f32>,
511    #[schema(example = json!(Option::None::<f32>))]
512    pub repetition_penalty: Option<f32>,
513    #[serde(rename = "stop")]
514    #[schema(example = json!(Option::None::<StopTokens>))]
515    pub stop_seqs: Option<StopTokens>,
516    #[schema(example = 0.7)]
517    pub temperature: Option<f64>,
518    #[schema(example = json!(Option::None::<f64>))]
519    pub top_p: Option<f64>,
520    #[schema(example = true)]
521    pub stream: Option<bool>,
522    #[schema(example = json!(Option::None::<Vec<Tool>>))]
523    pub tools: Option<Vec<Tool>>,
524    #[schema(example = json!(Option::None::<ToolChoice>))]
525    pub tool_choice: Option<ToolChoice>,
526    #[schema(example = json!(Option::None::<ResponseFormat>))]
527    pub response_format: Option<ResponseFormat>,
528    #[schema(example = json!(Option::None::<WebSearchOptions>))]
529    pub web_search_options: Option<WebSearchOptions>,
530
531    // mistral.rs additional
532    #[schema(example = json!(Option::None::<usize>))]
533    pub top_k: Option<usize>,
534    #[schema(example = json!(Option::None::<Grammar>))]
535    pub grammar: Option<Grammar>,
536    #[schema(example = json!(Option::None::<f64>))]
537    pub min_p: Option<f64>,
538    #[schema(example = json!(Option::None::<f32>))]
539    pub dry_multiplier: Option<f32>,
540    #[schema(example = json!(Option::None::<f32>))]
541    pub dry_base: Option<f32>,
542    #[schema(example = json!(Option::None::<usize>))]
543    pub dry_allowed_length: Option<usize>,
544    #[schema(example = json!(Option::None::<String>))]
545    pub dry_sequence_breakers: Option<Vec<String>>,
546    #[schema(example = json!(Option::None::<bool>))]
547    pub enable_thinking: Option<bool>,
548    /// Reasoning effort level for Harmony-format models (GPT-OSS).
549    /// Controls the depth of reasoning/analysis: "low", "medium", or "high".
550    #[schema(example = json!(Option::None::<String>))]
551    pub reasoning_effort: Option<String>,
552    #[schema(example = json!(Option::None::<bool>))]
553    #[serde(default)]
554    pub truncate_sequence: Option<bool>,
555}
556
557/// Function for ChatCompletionRequest.messages Schema generation to handle `Either`
558fn messages_schema() -> Schema {
559    Schema::OneOf(
560        OneOfBuilder::new()
561            .item(Schema::Array(
562                ArrayBuilder::new()
563                    .items(RefOr::Ref(utoipa::openapi::Ref::from_schema_name(
564                        "Message",
565                    )))
566                    .build(),
567            ))
568            .item(Schema::Object(
569                ObjectBuilder::new()
570                    .schema_type(SchemaType::Type(Type::String))
571                    .build(),
572            ))
573            .build(),
574    )
575}
576
577/// Model information metadata about an available mode
578#[derive(Debug, Serialize, ToSchema)]
579pub struct ModelObject {
580    pub id: String,
581    pub object: &'static str,
582    pub created: u64,
583    pub owned_by: &'static str,
584    /// Whether tools are available through MCP or tool callbacks
585    #[serde(skip_serializing_if = "Option::is_none")]
586    pub tools_available: Option<bool>,
587    /// Number of tools available from MCP servers
588    #[serde(skip_serializing_if = "Option::is_none")]
589    pub mcp_tools_count: Option<usize>,
590    /// Number of connected MCP servers
591    #[serde(skip_serializing_if = "Option::is_none")]
592    pub mcp_servers_connected: Option<usize>,
593}
594
595/// Collection of available models
596#[derive(Debug, Serialize, ToSchema)]
597pub struct ModelObjects {
598    pub object: &'static str,
599    pub data: Vec<ModelObject>,
600}
601
602/// Legacy OpenAI compatible text completion request
603#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
604pub struct CompletionRequest {
605    #[schema(example = "mistral")]
606    #[serde(default = "default_model")]
607    pub model: String,
608    #[schema(example = "Say this is a test.")]
609    pub prompt: String,
610    #[schema(example = 1)]
611    pub best_of: Option<usize>,
612    #[serde(rename = "echo")]
613    #[serde(default = "default_false")]
614    #[schema(example = false)]
615    pub echo_prompt: bool,
616    #[schema(example = json!(Option::None::<f32>))]
617    pub presence_penalty: Option<f32>,
618    #[schema(example = json!(Option::None::<f32>))]
619    pub frequency_penalty: Option<f32>,
620    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
621    pub logit_bias: Option<HashMap<u32, f32>>,
622    #[schema(example = json!(Option::None::<usize>))]
623    pub logprobs: Option<usize>,
624    #[schema(example = 16)]
625    #[serde(alias = "max_completion_tokens")]
626    pub max_tokens: Option<usize>,
627    #[serde(rename = "n")]
628    #[serde(default = "default_1usize")]
629    #[schema(example = 1)]
630    pub n_choices: usize,
631    #[serde(rename = "stop")]
632    #[schema(example = json!(Option::None::<StopTokens>))]
633    pub stop_seqs: Option<StopTokens>,
634    pub stream: Option<bool>,
635    #[schema(example = 0.7)]
636    pub temperature: Option<f64>,
637    #[schema(example = json!(Option::None::<f64>))]
638    pub top_p: Option<f64>,
639    #[schema(example = json!(Option::None::<String>))]
640    pub suffix: Option<String>,
641    #[serde(rename = "user")]
642    pub _user: Option<String>,
643    #[schema(example = json!(Option::None::<Vec<Tool>>))]
644    pub tools: Option<Vec<Tool>>,
645    #[schema(example = json!(Option::None::<ToolChoice>))]
646    pub tool_choice: Option<ToolChoice>,
647
648    // mistral.rs additional
649    #[schema(example = json!(Option::None::<usize>))]
650    pub top_k: Option<usize>,
651    #[schema(example = json!(Option::None::<Grammar>))]
652    pub grammar: Option<Grammar>,
653    #[schema(example = json!(Option::None::<f64>))]
654    pub min_p: Option<f64>,
655    #[schema(example = json!(Option::None::<f32>))]
656    pub repetition_penalty: Option<f32>,
657    #[schema(example = json!(Option::None::<f32>))]
658    pub dry_multiplier: Option<f32>,
659    #[schema(example = json!(Option::None::<f32>))]
660    pub dry_base: Option<f32>,
661    #[schema(example = json!(Option::None::<usize>))]
662    pub dry_allowed_length: Option<usize>,
663    #[schema(example = json!(Option::None::<String>))]
664    pub dry_sequence_breakers: Option<Vec<String>>,
665    #[schema(example = json!(Option::None::<bool>))]
666    #[serde(default)]
667    pub truncate_sequence: Option<bool>,
668}
669
670#[derive(Debug, Clone, Deserialize, Serialize)]
671#[serde(untagged)]
672pub enum EmbeddingInput {
673    Single(String),
674    Multiple(Vec<String>),
675    Tokens(Vec<u32>),
676    TokensBatch(Vec<Vec<u32>>),
677}
678
679impl PartialSchema for EmbeddingInput {
680    fn schema() -> RefOr<Schema> {
681        RefOr::T(embedding_input_schema())
682    }
683}
684
685impl ToSchema for EmbeddingInput {
686    fn schemas(
687        schemas: &mut Vec<(
688            String,
689            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
690        )>,
691    ) {
692        schemas.push((EmbeddingInput::name().into(), EmbeddingInput::schema()));
693    }
694}
695
696fn embedding_input_schema() -> Schema {
697    Schema::OneOf(
698        OneOfBuilder::new()
699            .item(Schema::Object(
700                ObjectBuilder::new()
701                    .schema_type(SchemaType::Type(Type::String))
702                    .description(Some("Single input string"))
703                    .build(),
704            ))
705            .item(Schema::Array(
706                ArrayBuilder::new()
707                    .items(RefOr::T(Schema::Object(
708                        ObjectBuilder::new()
709                            .schema_type(SchemaType::Type(Type::String))
710                            .build(),
711                    )))
712                    .description(Some("Multiple input strings"))
713                    .build(),
714            ))
715            .item(Schema::Array(
716                ArrayBuilder::new()
717                    .items(RefOr::T(Schema::Object(
718                        ObjectBuilder::new()
719                            .schema_type(SchemaType::Type(Type::Integer))
720                            .build(),
721                    )))
722                    .description(Some("Single token array"))
723                    .build(),
724            ))
725            .item(Schema::Array(
726                ArrayBuilder::new()
727                    .items(RefOr::T(Schema::Array(
728                        ArrayBuilder::new()
729                            .items(RefOr::T(Schema::Object(
730                                ObjectBuilder::new()
731                                    .schema_type(SchemaType::Type(Type::Integer))
732                                    .build(),
733                            )))
734                            .build(),
735                    )))
736                    .description(Some("Multiple token arrays"))
737                    .build(),
738            ))
739            .build(),
740    )
741}
742
743#[derive(Debug, Clone, Deserialize, Serialize, ToSchema, Default)]
744#[serde(rename_all = "snake_case")]
745pub enum EmbeddingEncodingFormat {
746    #[default]
747    Float,
748    Base64,
749}
750
751#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
752pub struct EmbeddingRequest {
753    #[schema(example = "default")]
754    #[serde(default = "default_model")]
755    pub model: String,
756    pub input: EmbeddingInput,
757    #[schema(example = "float")]
758    #[serde(default)]
759    pub encoding_format: Option<EmbeddingEncodingFormat>,
760    #[schema(example = json!(Option::None::<usize>))]
761    pub dimensions: Option<usize>,
762    #[schema(example = json!(Option::None::<String>))]
763    #[serde(rename = "user")]
764    pub _user: Option<String>,
765
766    // mistral.rs additional
767    #[schema(example = json!(Option::None::<bool>))]
768    #[serde(default)]
769    pub truncate_sequence: Option<bool>,
770}
771
772#[derive(Debug, Clone, Serialize, ToSchema)]
773pub struct EmbeddingUsage {
774    pub prompt_tokens: u32,
775    pub total_tokens: u32,
776}
777
778#[derive(Debug, Clone, Serialize)]
779#[serde(untagged)]
780pub enum EmbeddingVector {
781    Float(Vec<f32>),
782    Base64(String),
783}
784
785impl PartialSchema for EmbeddingVector {
786    fn schema() -> RefOr<Schema> {
787        RefOr::T(embedding_vector_schema())
788    }
789}
790
791impl ToSchema for EmbeddingVector {
792    fn schemas(
793        schemas: &mut Vec<(
794            String,
795            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
796        )>,
797    ) {
798        schemas.push((EmbeddingVector::name().into(), EmbeddingVector::schema()));
799    }
800}
801
802fn embedding_vector_schema() -> Schema {
803    Schema::OneOf(
804        OneOfBuilder::new()
805            .item(Schema::Array(
806                ArrayBuilder::new()
807                    .items(RefOr::T(Schema::Object(
808                        ObjectBuilder::new()
809                            .schema_type(SchemaType::Type(Type::Number))
810                            .build(),
811                    )))
812                    .description(Some("Embedding returned as an array of floats"))
813                    .build(),
814            ))
815            .item(Schema::Object(
816                ObjectBuilder::new()
817                    .schema_type(SchemaType::Type(Type::String))
818                    .description(Some("Embedding returned as a base64-encoded string"))
819                    .build(),
820            ))
821            .build(),
822    )
823}
824
825#[derive(Debug, Clone, Serialize, ToSchema)]
826pub struct EmbeddingData {
827    pub object: &'static str,
828    pub embedding: EmbeddingVector,
829    pub index: usize,
830}
831
832#[derive(Debug, Clone, Serialize, ToSchema)]
833pub struct EmbeddingResponse {
834    pub object: &'static str,
835    pub data: Vec<EmbeddingData>,
836    pub model: String,
837    pub usage: EmbeddingUsage,
838}
839
840/// Image generation request
841#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
842pub struct ImageGenerationRequest {
843    #[schema(example = "mistral")]
844    #[serde(default = "default_model")]
845    pub model: String,
846    #[schema(example = "Draw a picture of a majestic, snow-covered mountain.")]
847    pub prompt: String,
848    #[serde(rename = "n")]
849    #[serde(default = "default_1usize")]
850    #[schema(example = 1)]
851    pub n_choices: usize,
852    #[serde(default = "default_response_format")]
853    pub response_format: ImageGenerationResponseFormat,
854    #[serde(default = "default_720usize")]
855    #[schema(example = 720)]
856    pub height: usize,
857    #[serde(default = "default_1280usize")]
858    #[schema(example = 1280)]
859    pub width: usize,
860}
861
862/// Audio format options for speech generation responses.
863#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, ToSchema)]
864#[serde(rename_all = "lowercase")]
865pub enum AudioResponseFormat {
866    /// Widely compatible, lossy compression, good for web streaming
867    #[default]
868    Mp3,
869    /// Good compression efficiency, ideal for real-time communication
870    Opus,
871    /// High-quality lossy compression, commonly used in mobile applications
872    Aac,
873    /// Lossless compression, larger file sizes but good audio quality
874    Flac,
875    /// Uncompressed, largest file sizes but maximum compatibility
876    Wav,
877    ///  Raw audio data, requires additional format specification
878    Pcm,
879}
880
881impl AudioResponseFormat {
882    /// Generate the appropriate MIME content type string for this audio format.
883    pub fn audio_content_type(
884        &self,
885        pcm_rate: usize,
886        pcm_channels: usize,
887        pcm_format: &'static str,
888    ) -> String {
889        let content_type = match &self {
890            AudioResponseFormat::Mp3 => "audio/mpeg".to_string(),
891            AudioResponseFormat::Opus => "audio/ogg; codecs=opus".to_string(),
892            AudioResponseFormat::Aac => "audio/aac".to_string(),
893            AudioResponseFormat::Flac => "audio/flac".to_string(),
894            AudioResponseFormat::Wav => "audio/wav".to_string(),
895            AudioResponseFormat::Pcm => format!("audio/pcm; codecs=1; format={pcm_format}"),
896        };
897
898        format!("{content_type}; rate={pcm_rate}; channels={pcm_channels}")
899    }
900}
901
902/// Speech generation request
903#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
904pub struct SpeechGenerationRequest {
905    /// The TTS model to use for audio generation.
906    #[schema(example = "nari-labs/Dia-1.6B")]
907    #[serde(default = "default_model")]
908    pub model: String,
909    /// The text content to convert to speech.
910    #[schema(
911        example = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
912    )]
913    pub input: String,
914    // `voice` and `instructions` are ignored.
915    /// The desired audio format for the generated speech.
916    #[schema(example = "mp3")]
917    pub response_format: AudioResponseFormat,
918}
919
920/// Helper type for messages field in ResponsesCreateRequest
921#[derive(Debug, Clone, Deserialize, Serialize)]
922#[serde(untagged)]
923pub enum ResponsesMessages {
924    Messages(Vec<Message>),
925    String(String),
926}
927
928impl ResponsesMessages {
929    pub fn into_either(self) -> Either<Vec<Message>, String> {
930        match self {
931            ResponsesMessages::Messages(msgs) => Either::Left(msgs),
932            ResponsesMessages::String(s) => Either::Right(s),
933        }
934    }
935}
936
937impl PartialSchema for ResponsesMessages {
938    fn schema() -> RefOr<Schema> {
939        RefOr::T(messages_schema())
940    }
941}
942
943impl ToSchema for ResponsesMessages {
944    fn schemas(
945        schemas: &mut Vec<(
946            String,
947            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
948        )>,
949    ) {
950        schemas.push((
951            ResponsesMessages::name().into(),
952            ResponsesMessages::schema(),
953        ));
954    }
955}
956
957/// Response creation request
958#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
959pub struct ResponsesCreateRequest {
960    #[schema(example = "mistral")]
961    #[serde(default = "default_model")]
962    pub model: String,
963    pub input: ResponsesMessages,
964    #[schema(example = json!(Option::None::<String>))]
965    pub instructions: Option<String>,
966    #[schema(example = json!(Option::None::<Vec<String>>))]
967    pub modalities: Option<Vec<String>>,
968    #[schema(example = json!(Option::None::<String>))]
969    pub previous_response_id: Option<String>,
970    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
971    pub logit_bias: Option<HashMap<u32, f32>>,
972    #[serde(default = "default_false")]
973    #[schema(example = false)]
974    pub logprobs: bool,
975    #[schema(example = json!(Option::None::<usize>))]
976    pub top_logprobs: Option<usize>,
977    #[schema(example = 256)]
978    #[serde(alias = "max_completion_tokens", alias = "max_output_tokens")]
979    pub max_tokens: Option<usize>,
980    #[serde(rename = "n")]
981    #[serde(default = "default_1usize")]
982    #[schema(example = 1)]
983    pub n_choices: usize,
984    #[schema(example = json!(Option::None::<f32>))]
985    pub presence_penalty: Option<f32>,
986    #[schema(example = json!(Option::None::<f32>))]
987    pub frequency_penalty: Option<f32>,
988    #[serde(rename = "stop")]
989    #[schema(example = json!(Option::None::<StopTokens>))]
990    pub stop_seqs: Option<StopTokens>,
991    #[schema(example = 0.7)]
992    pub temperature: Option<f64>,
993    #[schema(example = json!(Option::None::<f64>))]
994    pub top_p: Option<f64>,
995    #[schema(example = false)]
996    pub stream: Option<bool>,
997    #[schema(example = json!(Option::None::<Vec<Tool>>))]
998    pub tools: Option<Vec<Tool>>,
999    #[schema(example = json!(Option::None::<ToolChoice>))]
1000    pub tool_choice: Option<ToolChoice>,
1001    #[schema(example = json!(Option::None::<ResponseFormat>))]
1002    pub response_format: Option<ResponseFormat>,
1003    #[schema(example = json!(Option::None::<WebSearchOptions>))]
1004    pub web_search_options: Option<WebSearchOptions>,
1005    #[schema(example = json!(Option::None::<Value>))]
1006    pub metadata: Option<Value>,
1007    #[schema(example = json!(Option::None::<bool>))]
1008    pub output_token_details: Option<bool>,
1009    #[schema(example = json!(Option::None::<bool>))]
1010    pub parallel_tool_calls: Option<bool>,
1011    #[schema(example = json!(Option::None::<bool>))]
1012    pub store: Option<bool>,
1013    #[schema(example = json!(Option::None::<usize>))]
1014    pub max_tool_calls: Option<usize>,
1015    #[schema(example = json!(Option::None::<bool>))]
1016    pub reasoning_enabled: Option<bool>,
1017    #[schema(example = json!(Option::None::<usize>))]
1018    pub reasoning_max_tokens: Option<usize>,
1019    #[schema(example = json!(Option::None::<usize>))]
1020    pub reasoning_top_logprobs: Option<usize>,
1021    #[schema(example = json!(Option::None::<Vec<String>>))]
1022    pub truncation: Option<HashMap<String, Value>>,
1023
1024    // mistral.rs additional
1025    #[schema(example = json!(Option::None::<usize>))]
1026    pub top_k: Option<usize>,
1027    #[schema(example = json!(Option::None::<Grammar>))]
1028    pub grammar: Option<Grammar>,
1029    #[schema(example = json!(Option::None::<f64>))]
1030    pub min_p: Option<f64>,
1031    #[schema(example = json!(Option::None::<f32>))]
1032    pub repetition_penalty: Option<f32>,
1033    #[schema(example = json!(Option::None::<f32>))]
1034    pub dry_multiplier: Option<f32>,
1035    #[schema(example = json!(Option::None::<f32>))]
1036    pub dry_base: Option<f32>,
1037    #[schema(example = json!(Option::None::<usize>))]
1038    pub dry_allowed_length: Option<usize>,
1039    #[schema(example = json!(Option::None::<String>))]
1040    pub dry_sequence_breakers: Option<Vec<String>>,
1041    #[schema(example = json!(Option::None::<bool>))]
1042    pub enable_thinking: Option<bool>,
1043    #[schema(example = json!(Option::None::<bool>))]
1044    #[serde(default)]
1045    pub truncate_sequence: Option<bool>,
1046    /// Reasoning effort level for models that support extended thinking.
1047    /// Valid values: "low", "medium", "high"
1048    #[schema(example = json!(Option::None::<String>))]
1049    pub reasoning_effort: Option<String>,
1050}
1051
1052/// Response object
1053#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1054pub struct ResponsesObject {
1055    pub id: String,
1056    pub object: &'static str,
1057    pub created_at: f64,
1058    pub model: String,
1059    pub status: String,
1060    pub output: Vec<ResponsesOutput>,
1061    pub output_text: Option<String>,
1062    pub usage: Option<ResponsesUsage>,
1063    pub error: Option<ResponsesError>,
1064    pub metadata: Option<Value>,
1065    pub instructions: Option<String>,
1066    pub incomplete_details: Option<ResponsesIncompleteDetails>,
1067}
1068
1069/// Response usage information
1070#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1071pub struct ResponsesUsage {
1072    pub input_tokens: usize,
1073    pub output_tokens: usize,
1074    pub total_tokens: usize,
1075    pub input_tokens_details: Option<ResponsesInputTokensDetails>,
1076    pub output_tokens_details: Option<ResponsesOutputTokensDetails>,
1077}
1078
1079/// Input tokens details
1080#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1081pub struct ResponsesInputTokensDetails {
1082    pub audio_tokens: Option<usize>,
1083    pub cached_tokens: Option<usize>,
1084    pub image_tokens: Option<usize>,
1085    pub text_tokens: Option<usize>,
1086}
1087
1088/// Output tokens details
1089#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1090pub struct ResponsesOutputTokensDetails {
1091    pub audio_tokens: Option<usize>,
1092    pub text_tokens: Option<usize>,
1093    pub reasoning_tokens: Option<usize>,
1094}
1095
1096/// Response error
1097#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1098pub struct ResponsesError {
1099    #[serde(rename = "type")]
1100    pub error_type: String,
1101    pub message: String,
1102}
1103
1104/// Incomplete details for incomplete responses
1105#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1106pub struct ResponsesIncompleteDetails {
1107    pub reason: String,
1108}
1109
1110/// Response output item
1111#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1112pub struct ResponsesOutput {
1113    pub id: String,
1114    #[serde(rename = "type")]
1115    pub output_type: String,
1116    pub role: String,
1117    pub status: Option<String>,
1118    pub content: Vec<ResponsesContent>,
1119}
1120
1121/// Response content item
1122#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1123pub struct ResponsesContent {
1124    #[serde(rename = "type")]
1125    pub content_type: String,
1126    pub text: Option<String>,
1127    pub annotations: Option<Vec<ResponsesAnnotation>>,
1128}
1129
1130/// Response annotation
1131#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1132pub struct ResponsesAnnotation {
1133    #[serde(rename = "type")]
1134    pub annotation_type: String,
1135    pub text: String,
1136    pub start_index: usize,
1137    pub end_index: usize,
1138}
1139
1140/// Response streaming chunk
1141#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1142pub struct ResponsesChunk {
1143    pub id: String,
1144    pub object: &'static str,
1145    pub created_at: f64,
1146    pub model: String,
1147    pub chunk_type: String,
1148    pub delta: Option<ResponsesDelta>,
1149    pub usage: Option<ResponsesUsage>,
1150    pub metadata: Option<Value>,
1151}
1152
1153/// Response delta for streaming
1154#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1155pub struct ResponsesDelta {
1156    pub output: Option<Vec<ResponsesDeltaOutput>>,
1157    pub status: Option<String>,
1158}
1159
1160/// Response delta output item
1161#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1162pub struct ResponsesDeltaOutput {
1163    pub id: String,
1164    #[serde(rename = "type")]
1165    pub output_type: String,
1166    pub content: Option<Vec<ResponsesDeltaContent>>,
1167}
1168
1169/// Response delta content item
1170#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1171pub struct ResponsesDeltaContent {
1172    #[serde(rename = "type")]
1173    pub content_type: String,
1174    pub text: Option<String>,
1175}