mistralrs_server_core/
openai.rs

1//! ## OpenAI compatible functionality.
2
3use std::{collections::HashMap, ops::Deref};
4
5use either::Either;
6use mistralrs_core::{
7    ImageGenerationResponseFormat, LlguidanceGrammar, Tool, ToolChoice, ToolType, WebSearchOptions,
8};
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11use utoipa::{
12    openapi::{schema::SchemaType, ArrayBuilder, ObjectBuilder, OneOfBuilder, RefOr, Schema, Type},
13    PartialSchema, ToSchema,
14};
15
16/// Inner content structure for messages that can be either a string or key-value pairs
17#[derive(Debug, Clone, Deserialize, Serialize)]
18pub struct MessageInnerContent(
19    #[serde(with = "either::serde_untagged")] pub Either<String, HashMap<String, String>>,
20);
21
22// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
23// properly working, so manually impl ToSchema
24impl PartialSchema for MessageInnerContent {
25    fn schema() -> RefOr<Schema> {
26        RefOr::T(message_inner_content_schema())
27    }
28}
29
30impl ToSchema for MessageInnerContent {
31    fn schemas(
32        schemas: &mut Vec<(
33            String,
34            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
35        )>,
36    ) {
37        schemas.push((
38            MessageInnerContent::name().into(),
39            MessageInnerContent::schema(),
40        ));
41    }
42}
43
44impl Deref for MessageInnerContent {
45    type Target = Either<String, HashMap<String, String>>;
46    fn deref(&self) -> &Self::Target {
47        &self.0
48    }
49}
50
51/// Function for MessageInnerContent Schema generation to handle `Either`
52fn message_inner_content_schema() -> Schema {
53    Schema::OneOf(
54        OneOfBuilder::new()
55            // Either::Left - simple string
56            .item(Schema::Object(
57                ObjectBuilder::new()
58                    .schema_type(SchemaType::Type(Type::String))
59                    .build(),
60            ))
61            // Either::Right - object with string values
62            .item(Schema::Object(
63                ObjectBuilder::new()
64                    .schema_type(SchemaType::Type(Type::Object))
65                    .additional_properties(Some(RefOr::T(Schema::Object(
66                        ObjectBuilder::new()
67                            .schema_type(SchemaType::Type(Type::String))
68                            .build(),
69                    ))))
70                    .build(),
71            ))
72            .build(),
73    )
74}
75
76/// Message content that can be either simple text or complex structured content
77#[derive(Debug, Clone, Deserialize, Serialize)]
78pub struct MessageContent(
79    #[serde(with = "either::serde_untagged")]
80    Either<String, Vec<HashMap<String, MessageInnerContent>>>,
81);
82
83// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
84// properly working, so manually impl ToSchema
85impl PartialSchema for MessageContent {
86    fn schema() -> RefOr<Schema> {
87        RefOr::T(message_content_schema())
88    }
89}
90
91impl ToSchema for MessageContent {
92    fn schemas(
93        schemas: &mut Vec<(
94            String,
95            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
96        )>,
97    ) {
98        schemas.push((MessageContent::name().into(), MessageContent::schema()));
99    }
100}
101
102impl MessageContent {
103    /// Create a new MessageContent from a string
104    pub fn from_text(text: String) -> Self {
105        MessageContent(Either::Left(text))
106    }
107
108    /// Create a new MessageContent from multimodal parts
109    pub fn from_parts(parts: Vec<HashMap<String, MessageInnerContent>>) -> Self {
110        MessageContent(Either::Right(parts))
111    }
112
113    /// Create a text content part for multimodal messages
114    pub fn text_part(text: String) -> HashMap<String, MessageInnerContent> {
115        let mut part = HashMap::new();
116        part.insert(
117            "type".to_string(),
118            MessageInnerContent(Either::Left("text".to_string())),
119        );
120        part.insert("text".to_string(), MessageInnerContent(Either::Left(text)));
121        part
122    }
123
124    /// Create an image URL content part for multimodal messages
125    pub fn image_url_part(url: String) -> HashMap<String, MessageInnerContent> {
126        let mut part = HashMap::new();
127        part.insert(
128            "type".to_string(),
129            MessageInnerContent(Either::Left("image_url".to_string())),
130        );
131        let mut image_url_obj = HashMap::new();
132        image_url_obj.insert("url".to_string(), url);
133        part.insert(
134            "image_url".to_string(),
135            MessageInnerContent(Either::Right(image_url_obj)),
136        );
137        part
138    }
139
140    /// Create an image URL content part with detail level
141    pub fn image_url_part_with_detail(
142        url: String,
143        detail: String,
144    ) -> HashMap<String, MessageInnerContent> {
145        let mut part = HashMap::new();
146        part.insert(
147            "type".to_string(),
148            MessageInnerContent(Either::Left("image_url".to_string())),
149        );
150        let mut image_url_obj = HashMap::new();
151        image_url_obj.insert("url".to_string(), url);
152        image_url_obj.insert("detail".to_string(), detail);
153        part.insert(
154            "image_url".to_string(),
155            MessageInnerContent(Either::Right(image_url_obj)),
156        );
157        part
158    }
159
160    /// Extract text from MessageContent
161    pub fn to_text(&self) -> Option<String> {
162        match &self.0 {
163            Either::Left(text) => Some(text.clone()),
164            Either::Right(parts) => {
165                // For complex content, try to extract text from parts
166                let mut text_parts = Vec::new();
167                for part in parts {
168                    for (key, value) in part {
169                        if key == "text" {
170                            if let Either::Left(text) = &**value {
171                                text_parts.push(text.clone());
172                            }
173                        }
174                    }
175                }
176                if text_parts.is_empty() {
177                    None
178                } else {
179                    Some(text_parts.join(" "))
180                }
181            }
182        }
183    }
184}
185
186impl Deref for MessageContent {
187    type Target = Either<String, Vec<HashMap<String, MessageInnerContent>>>;
188    fn deref(&self) -> &Self::Target {
189        &self.0
190    }
191}
192
193/// Function for MessageContent Schema generation to handle `Either`
194fn message_content_schema() -> Schema {
195    Schema::OneOf(
196        OneOfBuilder::new()
197            .item(Schema::Object(
198                ObjectBuilder::new()
199                    .schema_type(SchemaType::Type(Type::String))
200                    .build(),
201            ))
202            .item(Schema::Array(
203                ArrayBuilder::new()
204                    .items(RefOr::T(Schema::Object(
205                        ObjectBuilder::new()
206                            .schema_type(SchemaType::Type(Type::Object))
207                            .additional_properties(Some(RefOr::Ref(
208                                utoipa::openapi::Ref::from_schema_name("MessageInnerContent"),
209                            )))
210                            .build(),
211                    )))
212                    .build(),
213            ))
214            .build(),
215    )
216}
217
218/// Represents a function call made by the assistant
219///
220/// When using tool calling, this structure contains the details of a function
221/// that the model has decided to call, including the function name and its parameters.
222#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
223pub struct FunctionCalled {
224    /// The name of the function to call
225    pub name: String,
226    /// The function arguments (JSON string)
227    #[serde(alias = "parameters")]
228    pub arguments: String,
229}
230
231/// Represents a tool call made by the assistant
232///
233/// This structure wraps a function call with its type information.
234#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
235pub struct ToolCall {
236    /// Unique identifier for this tool call
237    #[serde(default)]
238    pub id: Option<String>,
239    /// The type of tool being called
240    #[serde(rename = "type")]
241    pub tp: ToolType,
242    ///  The function call details
243    pub function: FunctionCalled,
244}
245
246/// Represents a single message in a conversation
247///
248/// ### Examples
249///
250/// ```ignore
251/// use either::Either;
252/// use mistralrs_server_core::openai::{Message, MessageContent};
253///
254/// // User message
255/// let user_msg = Message {
256///     content: Some(MessageContent(Either::Left("What's 2+2?".to_string()))),
257///     role: "user".to_string(),
258///     name: None,
259///     tool_calls: None,
260/// };
261///
262/// // System message
263/// let system_msg = Message {
264///     content: Some(MessageContent(Either::Left("You are a helpful assistant.".to_string()))),
265///     role: "system".to_string(),
266///     name: None,
267///     tool_calls: None,
268/// };
269/// ```
270#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
271pub struct Message {
272    /// The message content
273    pub content: Option<MessageContent>,
274    /// The role of the message sender ("user", "assistant", "system", "tool", etc.)
275    pub role: String,
276    pub name: Option<String>,
277    /// Optional list of tool calls (for assistant messages)
278    pub tool_calls: Option<Vec<ToolCall>>,
279    /// Tool call ID this message is responding to (for tool messages)
280    pub tool_call_id: Option<String>,
281}
282
283/// Stop token configuration for generation
284///
285/// Defines when the model should stop generating text, either with a single
286/// stop token or multiple possible stop sequences.
287#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
288#[serde(untagged)]
289pub enum StopTokens {
290    ///  Multiple possible stop sequences
291    Multi(Vec<String>),
292    /// Single stop sequence
293    Single(String),
294}
295
296/// Default value helper
297fn default_false() -> bool {
298    false
299}
300
301/// Default value helper
302fn default_1usize() -> usize {
303    1
304}
305
306/// Default value helper
307fn default_720usize() -> usize {
308    720
309}
310
311/// Default value helper
312fn default_1280usize() -> usize {
313    1280
314}
315
316/// Default value helper
317fn default_model() -> String {
318    "default".to_string()
319}
320
321/// Default value helper
322fn default_response_format() -> ImageGenerationResponseFormat {
323    ImageGenerationResponseFormat::Url
324}
325
326/// Grammar specification for structured generation
327///
328/// Defines different types of grammars that can be used to constrain model output,
329/// ensuring it follows specific formats or structures.
330///
331/// ### Examples
332///
333/// ```ignore
334/// use mistralrs_server_core::openai::Grammar;
335///
336/// // Regex grammar for phone numbers
337/// let phone_regex = Grammar::Regex(r"\d{3}-\d{3}-\d{4}".to_string());
338///
339/// // JSON schema for structured data
340/// let json_schema = Grammar::JsonSchema(serde_json::json!({
341///     "type": "object",
342///     "properties": {
343///         "name": {"type": "string"},
344///         "age": {"type": "integer"}
345///     },
346///     "required": ["name", "age"]
347/// }));
348///
349/// // Lark grammar for arithmetic expressions
350/// let lark_grammar = Grammar::Lark(r#"
351///     ?start: expr
352///     expr: term ("+" term | "-" term)*
353///     term: factor ("*" factor | "/" factor)*
354///     factor: NUMBER | "(" expr ")"
355///     %import common.NUMBER
356/// "#.to_string());
357/// ```
358#[derive(Debug, Clone, Deserialize, Serialize)]
359#[serde(tag = "type", content = "value")]
360pub enum Grammar {
361    /// Regular expression grammar
362    #[serde(rename = "regex")]
363    Regex(String),
364    /// JSON schema grammar
365    #[serde(rename = "json_schema")]
366    JsonSchema(serde_json::Value),
367    /// LLGuidance grammar
368    #[serde(rename = "llguidance")]
369    Llguidance(LlguidanceGrammar),
370    /// Lark parser grammar
371    #[serde(rename = "lark")]
372    Lark(String),
373}
374
375// Implement ToSchema manually to handle `LlguidanceGrammar`
376impl PartialSchema for Grammar {
377    fn schema() -> RefOr<Schema> {
378        RefOr::T(Schema::OneOf(
379            OneOfBuilder::new()
380                .item(create_grammar_variant_schema(
381                    "regex",
382                    Schema::Object(
383                        ObjectBuilder::new()
384                            .schema_type(SchemaType::Type(Type::String))
385                            .build(),
386                    ),
387                ))
388                .item(create_grammar_variant_schema(
389                    "json_schema",
390                    Schema::Object(
391                        ObjectBuilder::new()
392                            .schema_type(SchemaType::Type(Type::Object))
393                            .build(),
394                    ),
395                ))
396                .item(create_grammar_variant_schema(
397                    "llguidance",
398                    llguidance_schema(),
399                ))
400                .item(create_grammar_variant_schema(
401                    "lark",
402                    Schema::Object(
403                        ObjectBuilder::new()
404                            .schema_type(SchemaType::Type(Type::String))
405                            .build(),
406                    ),
407                ))
408                .build(),
409        ))
410    }
411}
412
413impl ToSchema for Grammar {
414    fn schemas(
415        schemas: &mut Vec<(
416            String,
417            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
418        )>,
419    ) {
420        schemas.push((Grammar::name().into(), Grammar::schema()));
421    }
422}
423
424/// Helper function to create a grammar variant schema
425fn create_grammar_variant_schema(type_value: &str, value_schema: Schema) -> Schema {
426    Schema::Object(
427        ObjectBuilder::new()
428            .schema_type(SchemaType::Type(Type::Object))
429            .property(
430                "type",
431                RefOr::T(Schema::Object(
432                    ObjectBuilder::new()
433                        .schema_type(SchemaType::Type(Type::String))
434                        .enum_values(Some(vec![serde_json::Value::String(
435                            type_value.to_string(),
436                        )]))
437                        .build(),
438                )),
439            )
440            .property("value", RefOr::T(value_schema))
441            .required("type")
442            .required("value")
443            .build(),
444    )
445}
446
447/// Helper function to generate LLGuidance schema
448fn llguidance_schema() -> Schema {
449    let grammar_with_lexer_schema = Schema::Object(
450        ObjectBuilder::new()
451            .schema_type(SchemaType::Type(Type::Object))
452            .property(
453                "name",
454                RefOr::T(Schema::Object(
455                    ObjectBuilder::new()
456                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
457                        .description(Some(
458                            "The name of this grammar, can be used in GenGrammar nodes",
459                        ))
460                        .build(),
461                )),
462            )
463            .property(
464                "json_schema",
465                RefOr::T(Schema::Object(
466                    ObjectBuilder::new()
467                        .schema_type(SchemaType::from_iter([Type::Object, Type::Null]))
468                        .description(Some("The JSON schema that the grammar should generate"))
469                        .build(),
470                )),
471            )
472            .property(
473                "lark_grammar",
474                RefOr::T(Schema::Object(
475                    ObjectBuilder::new()
476                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
477                        .description(Some("The Lark grammar that the grammar should generate"))
478                        .build(),
479                )),
480            )
481            .description(Some("Grammar configuration with lexer settings"))
482            .build(),
483    );
484
485    Schema::Object(
486        ObjectBuilder::new()
487            .schema_type(SchemaType::Type(Type::Object))
488            .property(
489                "grammars",
490                RefOr::T(Schema::Array(
491                    ArrayBuilder::new()
492                        .items(RefOr::T(grammar_with_lexer_schema))
493                        .description(Some("List of grammar configurations"))
494                        .build(),
495                )),
496            )
497            .property(
498                "max_tokens",
499                RefOr::T(Schema::Object(
500                    ObjectBuilder::new()
501                        .schema_type(SchemaType::from_iter([Type::Integer, Type::Null]))
502                        .description(Some("Maximum number of tokens to generate"))
503                        .build(),
504                )),
505            )
506            .required("grammars")
507            .description(Some("Top-level grammar configuration for LLGuidance"))
508            .build(),
509    )
510}
511
512/// JSON Schema for structured responses
513#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
514pub struct JsonSchemaResponseFormat {
515    pub name: String,
516    pub schema: serde_json::Value,
517}
518
519/// Response format for model output
520#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
521#[serde(tag = "type")]
522pub enum ResponseFormat {
523    /// Free-form text response
524    #[serde(rename = "text")]
525    Text,
526    /// Structured response following a JSON schema
527    #[serde(rename = "json_schema")]
528    JsonSchema {
529        json_schema: JsonSchemaResponseFormat,
530    },
531}
532
533/// Chat completion request following OpenAI's specification
534#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
535pub struct ChatCompletionRequest {
536    #[schema(
537        schema_with = messages_schema,
538        example = json!(vec![Message{content:Some(MessageContent{0: either::Left(("Why did the crab cross the road?".to_string()))}), role:"user".to_string(), name: None, tool_calls: None}])
539    )]
540    #[serde(with = "either::serde_untagged")]
541    pub messages: Either<Vec<Message>, String>,
542    #[schema(example = "mistral")]
543    #[serde(default = "default_model")]
544    pub model: String,
545    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
546    pub logit_bias: Option<HashMap<u32, f32>>,
547    #[serde(default = "default_false")]
548    #[schema(example = false)]
549    pub logprobs: bool,
550    #[schema(example = json!(Option::None::<usize>))]
551    pub top_logprobs: Option<usize>,
552    #[schema(example = 256)]
553    #[serde(alias = "max_completion_tokens")]
554    pub max_tokens: Option<usize>,
555    #[serde(rename = "n")]
556    #[serde(default = "default_1usize")]
557    #[schema(example = 1)]
558    pub n_choices: usize,
559    #[schema(example = json!(Option::None::<f32>))]
560    pub presence_penalty: Option<f32>,
561    #[schema(example = json!(Option::None::<f32>))]
562    pub frequency_penalty: Option<f32>,
563    #[schema(example = json!(Option::None::<f32>))]
564    pub repetition_penalty: Option<f32>,
565    #[serde(rename = "stop")]
566    #[schema(example = json!(Option::None::<StopTokens>))]
567    pub stop_seqs: Option<StopTokens>,
568    #[schema(example = 0.7)]
569    pub temperature: Option<f64>,
570    #[schema(example = json!(Option::None::<f64>))]
571    pub top_p: Option<f64>,
572    #[schema(example = true)]
573    pub stream: Option<bool>,
574    #[schema(example = json!(Option::None::<Vec<Tool>>))]
575    pub tools: Option<Vec<Tool>>,
576    #[schema(example = json!(Option::None::<ToolChoice>))]
577    pub tool_choice: Option<ToolChoice>,
578    #[schema(example = json!(Option::None::<ResponseFormat>))]
579    pub response_format: Option<ResponseFormat>,
580    #[schema(example = json!(Option::None::<WebSearchOptions>))]
581    pub web_search_options: Option<WebSearchOptions>,
582
583    // mistral.rs additional
584    #[schema(example = json!(Option::None::<usize>))]
585    pub top_k: Option<usize>,
586    #[schema(example = json!(Option::None::<Grammar>))]
587    pub grammar: Option<Grammar>,
588    #[schema(example = json!(Option::None::<f64>))]
589    pub min_p: Option<f64>,
590    #[schema(example = json!(Option::None::<f32>))]
591    pub dry_multiplier: Option<f32>,
592    #[schema(example = json!(Option::None::<f32>))]
593    pub dry_base: Option<f32>,
594    #[schema(example = json!(Option::None::<usize>))]
595    pub dry_allowed_length: Option<usize>,
596    #[schema(example = json!(Option::None::<String>))]
597    pub dry_sequence_breakers: Option<Vec<String>>,
598    #[schema(example = json!(Option::None::<bool>))]
599    pub enable_thinking: Option<bool>,
600    /// Reasoning effort level for Harmony-format models (GPT-OSS).
601    /// Controls the depth of reasoning/analysis: "low", "medium", or "high".
602    #[schema(example = json!(Option::None::<String>))]
603    pub reasoning_effort: Option<String>,
604    #[schema(example = json!(Option::None::<bool>))]
605    #[serde(default)]
606    pub truncate_sequence: Option<bool>,
607}
608
609/// Function for ChatCompletionRequest.messages Schema generation to handle `Either`
610fn messages_schema() -> Schema {
611    Schema::OneOf(
612        OneOfBuilder::new()
613            .item(Schema::Array(
614                ArrayBuilder::new()
615                    .items(RefOr::Ref(utoipa::openapi::Ref::from_schema_name(
616                        "Message",
617                    )))
618                    .build(),
619            ))
620            .item(Schema::Object(
621                ObjectBuilder::new()
622                    .schema_type(SchemaType::Type(Type::String))
623                    .build(),
624            ))
625            .build(),
626    )
627}
628
629/// Model information metadata about an available mode
630#[derive(Debug, Serialize, ToSchema)]
631pub struct ModelObject {
632    pub id: String,
633    pub object: &'static str,
634    pub created: u64,
635    pub owned_by: &'static str,
636    /// Whether tools are available through MCP or tool callbacks
637    #[serde(skip_serializing_if = "Option::is_none")]
638    pub tools_available: Option<bool>,
639    /// Number of tools available from MCP servers
640    #[serde(skip_serializing_if = "Option::is_none")]
641    pub mcp_tools_count: Option<usize>,
642    /// Number of connected MCP servers
643    #[serde(skip_serializing_if = "Option::is_none")]
644    pub mcp_servers_connected: Option<usize>,
645}
646
647/// Collection of available models
648#[derive(Debug, Serialize, ToSchema)]
649pub struct ModelObjects {
650    pub object: &'static str,
651    pub data: Vec<ModelObject>,
652}
653
654/// Legacy OpenAI compatible text completion request
655#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
656pub struct CompletionRequest {
657    #[schema(example = "mistral")]
658    #[serde(default = "default_model")]
659    pub model: String,
660    #[schema(example = "Say this is a test.")]
661    pub prompt: String,
662    #[schema(example = 1)]
663    pub best_of: Option<usize>,
664    #[serde(rename = "echo")]
665    #[serde(default = "default_false")]
666    #[schema(example = false)]
667    pub echo_prompt: bool,
668    #[schema(example = json!(Option::None::<f32>))]
669    pub presence_penalty: Option<f32>,
670    #[schema(example = json!(Option::None::<f32>))]
671    pub frequency_penalty: Option<f32>,
672    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
673    pub logit_bias: Option<HashMap<u32, f32>>,
674    #[schema(example = json!(Option::None::<usize>))]
675    pub logprobs: Option<usize>,
676    #[schema(example = 16)]
677    #[serde(alias = "max_completion_tokens")]
678    pub max_tokens: Option<usize>,
679    #[serde(rename = "n")]
680    #[serde(default = "default_1usize")]
681    #[schema(example = 1)]
682    pub n_choices: usize,
683    #[serde(rename = "stop")]
684    #[schema(example = json!(Option::None::<StopTokens>))]
685    pub stop_seqs: Option<StopTokens>,
686    pub stream: Option<bool>,
687    #[schema(example = 0.7)]
688    pub temperature: Option<f64>,
689    #[schema(example = json!(Option::None::<f64>))]
690    pub top_p: Option<f64>,
691    #[schema(example = json!(Option::None::<String>))]
692    pub suffix: Option<String>,
693    #[serde(rename = "user")]
694    pub _user: Option<String>,
695    #[schema(example = json!(Option::None::<Vec<Tool>>))]
696    pub tools: Option<Vec<Tool>>,
697    #[schema(example = json!(Option::None::<ToolChoice>))]
698    pub tool_choice: Option<ToolChoice>,
699
700    // mistral.rs additional
701    #[schema(example = json!(Option::None::<usize>))]
702    pub top_k: Option<usize>,
703    #[schema(example = json!(Option::None::<Grammar>))]
704    pub grammar: Option<Grammar>,
705    #[schema(example = json!(Option::None::<f64>))]
706    pub min_p: Option<f64>,
707    #[schema(example = json!(Option::None::<f32>))]
708    pub repetition_penalty: Option<f32>,
709    #[schema(example = json!(Option::None::<f32>))]
710    pub dry_multiplier: Option<f32>,
711    #[schema(example = json!(Option::None::<f32>))]
712    pub dry_base: Option<f32>,
713    #[schema(example = json!(Option::None::<usize>))]
714    pub dry_allowed_length: Option<usize>,
715    #[schema(example = json!(Option::None::<String>))]
716    pub dry_sequence_breakers: Option<Vec<String>>,
717    #[schema(example = json!(Option::None::<bool>))]
718    #[serde(default)]
719    pub truncate_sequence: Option<bool>,
720}
721
722#[derive(Debug, Clone, Deserialize, Serialize)]
723#[serde(untagged)]
724pub enum EmbeddingInput {
725    Single(String),
726    Multiple(Vec<String>),
727    Tokens(Vec<u32>),
728    TokensBatch(Vec<Vec<u32>>),
729}
730
731impl PartialSchema for EmbeddingInput {
732    fn schema() -> RefOr<Schema> {
733        RefOr::T(embedding_input_schema())
734    }
735}
736
737impl ToSchema for EmbeddingInput {
738    fn schemas(
739        schemas: &mut Vec<(
740            String,
741            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
742        )>,
743    ) {
744        schemas.push((EmbeddingInput::name().into(), EmbeddingInput::schema()));
745    }
746}
747
748fn embedding_input_schema() -> Schema {
749    Schema::OneOf(
750        OneOfBuilder::new()
751            .item(Schema::Object(
752                ObjectBuilder::new()
753                    .schema_type(SchemaType::Type(Type::String))
754                    .description(Some("Single input string"))
755                    .build(),
756            ))
757            .item(Schema::Array(
758                ArrayBuilder::new()
759                    .items(RefOr::T(Schema::Object(
760                        ObjectBuilder::new()
761                            .schema_type(SchemaType::Type(Type::String))
762                            .build(),
763                    )))
764                    .description(Some("Multiple input strings"))
765                    .build(),
766            ))
767            .item(Schema::Array(
768                ArrayBuilder::new()
769                    .items(RefOr::T(Schema::Object(
770                        ObjectBuilder::new()
771                            .schema_type(SchemaType::Type(Type::Integer))
772                            .build(),
773                    )))
774                    .description(Some("Single token array"))
775                    .build(),
776            ))
777            .item(Schema::Array(
778                ArrayBuilder::new()
779                    .items(RefOr::T(Schema::Array(
780                        ArrayBuilder::new()
781                            .items(RefOr::T(Schema::Object(
782                                ObjectBuilder::new()
783                                    .schema_type(SchemaType::Type(Type::Integer))
784                                    .build(),
785                            )))
786                            .build(),
787                    )))
788                    .description(Some("Multiple token arrays"))
789                    .build(),
790            ))
791            .build(),
792    )
793}
794
795#[derive(Debug, Clone, Deserialize, Serialize, ToSchema, Default)]
796#[serde(rename_all = "snake_case")]
797pub enum EmbeddingEncodingFormat {
798    #[default]
799    Float,
800    Base64,
801}
802
803#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
804pub struct EmbeddingRequest {
805    #[schema(example = "default")]
806    #[serde(default = "default_model")]
807    pub model: String,
808    pub input: EmbeddingInput,
809    #[schema(example = "float")]
810    #[serde(default)]
811    pub encoding_format: Option<EmbeddingEncodingFormat>,
812    #[schema(example = json!(Option::None::<usize>))]
813    pub dimensions: Option<usize>,
814    #[schema(example = json!(Option::None::<String>))]
815    #[serde(rename = "user")]
816    pub _user: Option<String>,
817
818    // mistral.rs additional
819    #[schema(example = json!(Option::None::<bool>))]
820    #[serde(default)]
821    pub truncate_sequence: Option<bool>,
822}
823
824#[derive(Debug, Clone, Serialize, ToSchema)]
825pub struct EmbeddingUsage {
826    pub prompt_tokens: u32,
827    pub total_tokens: u32,
828}
829
830#[derive(Debug, Clone, Serialize)]
831#[serde(untagged)]
832pub enum EmbeddingVector {
833    Float(Vec<f32>),
834    Base64(String),
835}
836
837impl PartialSchema for EmbeddingVector {
838    fn schema() -> RefOr<Schema> {
839        RefOr::T(embedding_vector_schema())
840    }
841}
842
843impl ToSchema for EmbeddingVector {
844    fn schemas(
845        schemas: &mut Vec<(
846            String,
847            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
848        )>,
849    ) {
850        schemas.push((EmbeddingVector::name().into(), EmbeddingVector::schema()));
851    }
852}
853
854fn embedding_vector_schema() -> Schema {
855    Schema::OneOf(
856        OneOfBuilder::new()
857            .item(Schema::Array(
858                ArrayBuilder::new()
859                    .items(RefOr::T(Schema::Object(
860                        ObjectBuilder::new()
861                            .schema_type(SchemaType::Type(Type::Number))
862                            .build(),
863                    )))
864                    .description(Some("Embedding returned as an array of floats"))
865                    .build(),
866            ))
867            .item(Schema::Object(
868                ObjectBuilder::new()
869                    .schema_type(SchemaType::Type(Type::String))
870                    .description(Some("Embedding returned as a base64-encoded string"))
871                    .build(),
872            ))
873            .build(),
874    )
875}
876
877#[derive(Debug, Clone, Serialize, ToSchema)]
878pub struct EmbeddingData {
879    pub object: &'static str,
880    pub embedding: EmbeddingVector,
881    pub index: usize,
882}
883
884#[derive(Debug, Clone, Serialize, ToSchema)]
885pub struct EmbeddingResponse {
886    pub object: &'static str,
887    pub data: Vec<EmbeddingData>,
888    pub model: String,
889    pub usage: EmbeddingUsage,
890}
891
892/// Image generation request
893#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
894pub struct ImageGenerationRequest {
895    #[schema(example = "mistral")]
896    #[serde(default = "default_model")]
897    pub model: String,
898    #[schema(example = "Draw a picture of a majestic, snow-covered mountain.")]
899    pub prompt: String,
900    #[serde(rename = "n")]
901    #[serde(default = "default_1usize")]
902    #[schema(example = 1)]
903    pub n_choices: usize,
904    #[serde(default = "default_response_format")]
905    pub response_format: ImageGenerationResponseFormat,
906    #[serde(default = "default_720usize")]
907    #[schema(example = 720)]
908    pub height: usize,
909    #[serde(default = "default_1280usize")]
910    #[schema(example = 1280)]
911    pub width: usize,
912}
913
914/// Audio format options for speech generation responses.
915#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, ToSchema)]
916#[serde(rename_all = "lowercase")]
917pub enum AudioResponseFormat {
918    /// Widely compatible, lossy compression, good for web streaming
919    #[default]
920    Mp3,
921    /// Good compression efficiency, ideal for real-time communication
922    Opus,
923    /// High-quality lossy compression, commonly used in mobile applications
924    Aac,
925    /// Lossless compression, larger file sizes but good audio quality
926    Flac,
927    /// Uncompressed, largest file sizes but maximum compatibility
928    Wav,
929    ///  Raw audio data, requires additional format specification
930    Pcm,
931}
932
933impl AudioResponseFormat {
934    /// Generate the appropriate MIME content type string for this audio format.
935    pub fn audio_content_type(
936        &self,
937        pcm_rate: usize,
938        pcm_channels: usize,
939        pcm_format: &'static str,
940    ) -> String {
941        let content_type = match &self {
942            AudioResponseFormat::Mp3 => "audio/mpeg".to_string(),
943            AudioResponseFormat::Opus => "audio/ogg; codecs=opus".to_string(),
944            AudioResponseFormat::Aac => "audio/aac".to_string(),
945            AudioResponseFormat::Flac => "audio/flac".to_string(),
946            AudioResponseFormat::Wav => "audio/wav".to_string(),
947            AudioResponseFormat::Pcm => format!("audio/pcm; codecs=1; format={pcm_format}"),
948        };
949
950        format!("{content_type}; rate={pcm_rate}; channels={pcm_channels}")
951    }
952}
953
954/// Speech generation request
955#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
956pub struct SpeechGenerationRequest {
957    /// The TTS model to use for audio generation.
958    #[schema(example = "nari-labs/Dia-1.6B")]
959    #[serde(default = "default_model")]
960    pub model: String,
961    /// The text content to convert to speech.
962    #[schema(
963        example = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
964    )]
965    pub input: String,
966    // `voice` and `instructions` are ignored.
967    /// The desired audio format for the generated speech.
968    #[schema(example = "mp3")]
969    pub response_format: AudioResponseFormat,
970}
971
972/// Helper type for messages field in ResponsesCreateRequest
973#[derive(Debug, Clone, Deserialize, Serialize)]
974#[serde(untagged)]
975pub enum ResponsesMessages {
976    Messages(Vec<Message>),
977    String(String),
978}
979
980impl ResponsesMessages {
981    pub fn into_either(self) -> Either<Vec<Message>, String> {
982        match self {
983            ResponsesMessages::Messages(msgs) => Either::Left(msgs),
984            ResponsesMessages::String(s) => Either::Right(s),
985        }
986    }
987}
988
989impl PartialSchema for ResponsesMessages {
990    fn schema() -> RefOr<Schema> {
991        RefOr::T(messages_schema())
992    }
993}
994
995impl ToSchema for ResponsesMessages {
996    fn schemas(
997        schemas: &mut Vec<(
998            String,
999            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
1000        )>,
1001    ) {
1002        schemas.push((
1003            ResponsesMessages::name().into(),
1004            ResponsesMessages::schema(),
1005        ));
1006    }
1007}
1008
1009/// Response creation request
1010#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
1011pub struct ResponsesCreateRequest {
1012    #[schema(example = "mistral")]
1013    #[serde(default = "default_model")]
1014    pub model: String,
1015    pub input: ResponsesMessages,
1016    #[schema(example = json!(Option::None::<String>))]
1017    pub instructions: Option<String>,
1018    #[schema(example = json!(Option::None::<Vec<String>>))]
1019    pub modalities: Option<Vec<String>>,
1020    #[schema(example = json!(Option::None::<String>))]
1021    pub previous_response_id: Option<String>,
1022    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
1023    pub logit_bias: Option<HashMap<u32, f32>>,
1024    #[serde(default = "default_false")]
1025    #[schema(example = false)]
1026    pub logprobs: bool,
1027    #[schema(example = json!(Option::None::<usize>))]
1028    pub top_logprobs: Option<usize>,
1029    #[schema(example = 256)]
1030    #[serde(alias = "max_completion_tokens", alias = "max_output_tokens")]
1031    pub max_tokens: Option<usize>,
1032    #[serde(rename = "n")]
1033    #[serde(default = "default_1usize")]
1034    #[schema(example = 1)]
1035    pub n_choices: usize,
1036    #[schema(example = json!(Option::None::<f32>))]
1037    pub presence_penalty: Option<f32>,
1038    #[schema(example = json!(Option::None::<f32>))]
1039    pub frequency_penalty: Option<f32>,
1040    #[serde(rename = "stop")]
1041    #[schema(example = json!(Option::None::<StopTokens>))]
1042    pub stop_seqs: Option<StopTokens>,
1043    #[schema(example = 0.7)]
1044    pub temperature: Option<f64>,
1045    #[schema(example = json!(Option::None::<f64>))]
1046    pub top_p: Option<f64>,
1047    #[schema(example = false)]
1048    pub stream: Option<bool>,
1049    #[schema(example = json!(Option::None::<Vec<Tool>>))]
1050    pub tools: Option<Vec<Tool>>,
1051    #[schema(example = json!(Option::None::<ToolChoice>))]
1052    pub tool_choice: Option<ToolChoice>,
1053    #[schema(example = json!(Option::None::<ResponseFormat>))]
1054    pub response_format: Option<ResponseFormat>,
1055    #[schema(example = json!(Option::None::<WebSearchOptions>))]
1056    pub web_search_options: Option<WebSearchOptions>,
1057    #[schema(example = json!(Option::None::<Value>))]
1058    pub metadata: Option<Value>,
1059    #[schema(example = json!(Option::None::<bool>))]
1060    pub output_token_details: Option<bool>,
1061    #[schema(example = json!(Option::None::<bool>))]
1062    pub parallel_tool_calls: Option<bool>,
1063    #[schema(example = json!(Option::None::<bool>))]
1064    pub store: Option<bool>,
1065    #[schema(example = json!(Option::None::<usize>))]
1066    pub max_tool_calls: Option<usize>,
1067    #[schema(example = json!(Option::None::<bool>))]
1068    pub reasoning_enabled: Option<bool>,
1069    #[schema(example = json!(Option::None::<usize>))]
1070    pub reasoning_max_tokens: Option<usize>,
1071    #[schema(example = json!(Option::None::<usize>))]
1072    pub reasoning_top_logprobs: Option<usize>,
1073    #[schema(example = json!(Option::None::<Vec<String>>))]
1074    pub truncation: Option<HashMap<String, Value>>,
1075
1076    // mistral.rs additional
1077    #[schema(example = json!(Option::None::<usize>))]
1078    pub top_k: Option<usize>,
1079    #[schema(example = json!(Option::None::<Grammar>))]
1080    pub grammar: Option<Grammar>,
1081    #[schema(example = json!(Option::None::<f64>))]
1082    pub min_p: Option<f64>,
1083    #[schema(example = json!(Option::None::<f32>))]
1084    pub repetition_penalty: Option<f32>,
1085    #[schema(example = json!(Option::None::<f32>))]
1086    pub dry_multiplier: Option<f32>,
1087    #[schema(example = json!(Option::None::<f32>))]
1088    pub dry_base: Option<f32>,
1089    #[schema(example = json!(Option::None::<usize>))]
1090    pub dry_allowed_length: Option<usize>,
1091    #[schema(example = json!(Option::None::<String>))]
1092    pub dry_sequence_breakers: Option<Vec<String>>,
1093    #[schema(example = json!(Option::None::<bool>))]
1094    pub enable_thinking: Option<bool>,
1095    #[schema(example = json!(Option::None::<bool>))]
1096    #[serde(default)]
1097    pub truncate_sequence: Option<bool>,
1098    /// Reasoning effort level for models that support extended thinking.
1099    /// Valid values: "low", "medium", "high"
1100    #[schema(example = json!(Option::None::<String>))]
1101    pub reasoning_effort: Option<String>,
1102}
1103
1104/// Response object
1105#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1106pub struct ResponsesObject {
1107    pub id: String,
1108    pub object: &'static str,
1109    pub created_at: f64,
1110    pub model: String,
1111    pub status: String,
1112    pub output: Vec<ResponsesOutput>,
1113    pub output_text: Option<String>,
1114    pub usage: Option<ResponsesUsage>,
1115    pub error: Option<ResponsesError>,
1116    pub metadata: Option<Value>,
1117    pub instructions: Option<String>,
1118    pub incomplete_details: Option<ResponsesIncompleteDetails>,
1119}
1120
1121/// Response usage information
1122#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1123pub struct ResponsesUsage {
1124    pub input_tokens: usize,
1125    pub output_tokens: usize,
1126    pub total_tokens: usize,
1127    pub input_tokens_details: Option<ResponsesInputTokensDetails>,
1128    pub output_tokens_details: Option<ResponsesOutputTokensDetails>,
1129}
1130
1131/// Input tokens details
1132#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1133pub struct ResponsesInputTokensDetails {
1134    pub audio_tokens: Option<usize>,
1135    pub cached_tokens: Option<usize>,
1136    pub image_tokens: Option<usize>,
1137    pub text_tokens: Option<usize>,
1138}
1139
1140/// Output tokens details
1141#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1142pub struct ResponsesOutputTokensDetails {
1143    pub audio_tokens: Option<usize>,
1144    pub text_tokens: Option<usize>,
1145    pub reasoning_tokens: Option<usize>,
1146}
1147
1148/// Response error
1149#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1150pub struct ResponsesError {
1151    #[serde(rename = "type")]
1152    pub error_type: String,
1153    pub message: String,
1154}
1155
1156/// Incomplete details for incomplete responses
1157#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1158pub struct ResponsesIncompleteDetails {
1159    pub reason: String,
1160}
1161
1162/// Response output item
1163#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1164pub struct ResponsesOutput {
1165    pub id: String,
1166    #[serde(rename = "type")]
1167    pub output_type: String,
1168    pub role: String,
1169    pub status: Option<String>,
1170    pub content: Vec<ResponsesContent>,
1171}
1172
1173/// Response content item
1174#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1175pub struct ResponsesContent {
1176    #[serde(rename = "type")]
1177    pub content_type: String,
1178    pub text: Option<String>,
1179    pub annotations: Option<Vec<ResponsesAnnotation>>,
1180}
1181
1182/// Response annotation
1183#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1184pub struct ResponsesAnnotation {
1185    #[serde(rename = "type")]
1186    pub annotation_type: String,
1187    pub text: String,
1188    pub start_index: usize,
1189    pub end_index: usize,
1190}
1191
1192/// Response streaming chunk
1193#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1194pub struct ResponsesChunk {
1195    pub id: String,
1196    pub object: &'static str,
1197    pub created_at: f64,
1198    pub model: String,
1199    pub chunk_type: String,
1200    pub delta: Option<ResponsesDelta>,
1201    pub usage: Option<ResponsesUsage>,
1202    pub metadata: Option<Value>,
1203}
1204
1205/// Response delta for streaming
1206#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1207pub struct ResponsesDelta {
1208    pub output: Option<Vec<ResponsesDeltaOutput>>,
1209    pub status: Option<String>,
1210}
1211
1212/// Response delta output item
1213#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1214pub struct ResponsesDeltaOutput {
1215    pub id: String,
1216    #[serde(rename = "type")]
1217    pub output_type: String,
1218    pub content: Option<Vec<ResponsesDeltaContent>>,
1219}
1220
1221/// Response delta content item
1222#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
1223pub struct ResponsesDeltaContent {
1224    #[serde(rename = "type")]
1225    pub content_type: String,
1226    pub text: Option<String>,
1227}