mistralrs_server_core/
openai.rs

1//! ## OpenAI compatible functionality.
2
3use std::{collections::HashMap, ops::Deref};
4
5use either::Either;
6use mistralrs_core::{
7    ImageGenerationResponseFormat, LlguidanceGrammar, Tool, ToolChoice, ToolType, WebSearchOptions,
8};
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11use utoipa::{
12    openapi::{schema::SchemaType, ArrayBuilder, ObjectBuilder, OneOfBuilder, RefOr, Schema, Type},
13    PartialSchema, ToSchema,
14};
15
16/// Inner content structure for messages that can be either a string or key-value pairs
17#[derive(Debug, Clone, Deserialize, Serialize)]
18pub struct MessageInnerContent(
19    #[serde(with = "either::serde_untagged")] pub Either<String, HashMap<String, String>>,
20);
21
22// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
23// properly working, so manually impl ToSchema
24impl PartialSchema for MessageInnerContent {
25    fn schema() -> RefOr<Schema> {
26        RefOr::T(message_inner_content_schema())
27    }
28}
29
30impl ToSchema for MessageInnerContent {
31    fn schemas(
32        schemas: &mut Vec<(
33            String,
34            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
35        )>,
36    ) {
37        schemas.push((
38            MessageInnerContent::name().into(),
39            MessageInnerContent::schema(),
40        ));
41    }
42}
43
44impl Deref for MessageInnerContent {
45    type Target = Either<String, HashMap<String, String>>;
46    fn deref(&self) -> &Self::Target {
47        &self.0
48    }
49}
50
51/// Function for MessageInnerContent Schema generation to handle `Either`
52fn message_inner_content_schema() -> Schema {
53    Schema::OneOf(
54        OneOfBuilder::new()
55            // Either::Left - simple string
56            .item(Schema::Object(
57                ObjectBuilder::new()
58                    .schema_type(SchemaType::Type(Type::String))
59                    .build(),
60            ))
61            // Either::Right - object with string values
62            .item(Schema::Object(
63                ObjectBuilder::new()
64                    .schema_type(SchemaType::Type(Type::Object))
65                    .additional_properties(Some(RefOr::T(Schema::Object(
66                        ObjectBuilder::new()
67                            .schema_type(SchemaType::Type(Type::String))
68                            .build(),
69                    ))))
70                    .build(),
71            ))
72            .build(),
73    )
74}
75
76/// Message content that can be either simple text or complex structured content
77#[derive(Debug, Clone, Deserialize, Serialize)]
78pub struct MessageContent(
79    #[serde(with = "either::serde_untagged")]
80    Either<String, Vec<HashMap<String, MessageInnerContent>>>,
81);
82
83// The impl Deref was preventing the Derive ToSchema and #[schema] macros from
84// properly working, so manually impl ToSchema
85impl PartialSchema for MessageContent {
86    fn schema() -> RefOr<Schema> {
87        RefOr::T(message_content_schema())
88    }
89}
90
91impl ToSchema for MessageContent {
92    fn schemas(
93        schemas: &mut Vec<(
94            String,
95            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
96        )>,
97    ) {
98        schemas.push((MessageContent::name().into(), MessageContent::schema()));
99    }
100}
101
102impl MessageContent {
103    /// Create a new MessageContent from a string
104    pub fn from_text(text: String) -> Self {
105        MessageContent(Either::Left(text))
106    }
107
108    /// Extract text from MessageContent
109    pub fn to_text(&self) -> Option<String> {
110        match &self.0 {
111            Either::Left(text) => Some(text.clone()),
112            Either::Right(parts) => {
113                // For complex content, try to extract text from parts
114                let mut text_parts = Vec::new();
115                for part in parts {
116                    for (key, value) in part {
117                        if key == "text" {
118                            if let Either::Left(text) = &**value {
119                                text_parts.push(text.clone());
120                            }
121                        }
122                    }
123                }
124                if text_parts.is_empty() {
125                    None
126                } else {
127                    Some(text_parts.join(" "))
128                }
129            }
130        }
131    }
132}
133
134impl Deref for MessageContent {
135    type Target = Either<String, Vec<HashMap<String, MessageInnerContent>>>;
136    fn deref(&self) -> &Self::Target {
137        &self.0
138    }
139}
140
141/// Function for MessageContent Schema generation to handle `Either`
142fn message_content_schema() -> Schema {
143    Schema::OneOf(
144        OneOfBuilder::new()
145            .item(Schema::Object(
146                ObjectBuilder::new()
147                    .schema_type(SchemaType::Type(Type::String))
148                    .build(),
149            ))
150            .item(Schema::Array(
151                ArrayBuilder::new()
152                    .items(RefOr::T(Schema::Object(
153                        ObjectBuilder::new()
154                            .schema_type(SchemaType::Type(Type::Object))
155                            .additional_properties(Some(RefOr::Ref(
156                                utoipa::openapi::Ref::from_schema_name("MessageInnerContent"),
157                            )))
158                            .build(),
159                    )))
160                    .build(),
161            ))
162            .build(),
163    )
164}
165
166/// Represents a function call made by the assistant
167///
168/// When using tool calling, this structure contains the details of a function
169/// that the model has decided to call, including the function name and its parameters.
170#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
171pub struct FunctionCalled {
172    /// The name of the function to call
173    pub name: String,
174    /// The function arguments
175    #[serde(alias = "arguments")]
176    pub parameters: String,
177}
178
179/// Represents a tool call made by the assistant
180///
181/// This structure wraps a function call with its type information.
182#[derive(Clone, Debug, serde::Deserialize, serde::Serialize, ToSchema)]
183pub struct ToolCall {
184    /// The type of tool being called
185    #[serde(rename = "type")]
186    pub tp: ToolType,
187    ///  The function call details
188    pub function: FunctionCalled,
189}
190
191/// Represents a single message in a conversation
192///
193/// ### Examples
194///
195/// ```ignore
196/// use either::Either;
197/// use mistralrs_server_core::openai::{Message, MessageContent};
198///
199/// // User message
200/// let user_msg = Message {
201///     content: Some(MessageContent(Either::Left("What's 2+2?".to_string()))),
202///     role: "user".to_string(),
203///     name: None,
204///     tool_calls: None,
205/// };
206///
207/// // System message
208/// let system_msg = Message {
209///     content: Some(MessageContent(Either::Left("You are a helpful assistant.".to_string()))),
210///     role: "system".to_string(),
211///     name: None,
212///     tool_calls: None,
213/// };
214/// ```
215#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
216pub struct Message {
217    /// The message content
218    pub content: Option<MessageContent>,
219    /// The role of the message sender ("user", "assistant", "system", "tool", etc.)
220    pub role: String,
221    pub name: Option<String>,
222    /// Optional list of tool calls
223    pub tool_calls: Option<Vec<ToolCall>>,
224}
225
226/// Stop token configuration for generation
227///
228/// Defines when the model should stop generating text, either with a single
229/// stop token or multiple possible stop sequences.
230#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
231#[serde(untagged)]
232pub enum StopTokens {
233    ///  Multiple possible stop sequences
234    Multi(Vec<String>),
235    /// Single stop sequence
236    Single(String),
237}
238
239/// Default value helper
240fn default_false() -> bool {
241    false
242}
243
244/// Default value helper
245fn default_1usize() -> usize {
246    1
247}
248
249/// Default value helper
250fn default_720usize() -> usize {
251    720
252}
253
254/// Default value helper
255fn default_1280usize() -> usize {
256    1280
257}
258
259/// Default value helper
260fn default_model() -> String {
261    "default".to_string()
262}
263
264/// Default value helper
265fn default_response_format() -> ImageGenerationResponseFormat {
266    ImageGenerationResponseFormat::Url
267}
268
269/// Grammar specification for structured generation
270///
271/// Defines different types of grammars that can be used to constrain model output,
272/// ensuring it follows specific formats or structures.
273///
274/// ### Examples
275///
276/// ```ignore
277/// use mistralrs_server_core::openai::Grammar;
278///
279/// // Regex grammar for phone numbers
280/// let phone_regex = Grammar::Regex(r"\d{3}-\d{3}-\d{4}".to_string());
281///
282/// // JSON schema for structured data
283/// let json_schema = Grammar::JsonSchema(serde_json::json!({
284///     "type": "object",
285///     "properties": {
286///         "name": {"type": "string"},
287///         "age": {"type": "integer"}
288///     },
289///     "required": ["name", "age"]
290/// }));
291///
292/// // Lark grammar for arithmetic expressions
293/// let lark_grammar = Grammar::Lark(r#"
294///     ?start: expr
295///     expr: term ("+" term | "-" term)*
296///     term: factor ("*" factor | "/" factor)*
297///     factor: NUMBER | "(" expr ")"
298///     %import common.NUMBER
299/// "#.to_string());
300/// ```
301#[derive(Debug, Clone, Deserialize, Serialize)]
302#[serde(tag = "type", content = "value")]
303pub enum Grammar {
304    /// Regular expression grammar
305    #[serde(rename = "regex")]
306    Regex(String),
307    /// JSON schema grammar
308    #[serde(rename = "json_schema")]
309    JsonSchema(serde_json::Value),
310    /// LLGuidance grammar
311    #[serde(rename = "llguidance")]
312    Llguidance(LlguidanceGrammar),
313    /// Lark parser grammar
314    #[serde(rename = "lark")]
315    Lark(String),
316}
317
318// Implement ToSchema manually to handle `LlguidanceGrammar`
319impl PartialSchema for Grammar {
320    fn schema() -> RefOr<Schema> {
321        RefOr::T(Schema::OneOf(
322            OneOfBuilder::new()
323                .item(create_grammar_variant_schema(
324                    "regex",
325                    Schema::Object(
326                        ObjectBuilder::new()
327                            .schema_type(SchemaType::Type(Type::String))
328                            .build(),
329                    ),
330                ))
331                .item(create_grammar_variant_schema(
332                    "json_schema",
333                    Schema::Object(
334                        ObjectBuilder::new()
335                            .schema_type(SchemaType::Type(Type::Object))
336                            .build(),
337                    ),
338                ))
339                .item(create_grammar_variant_schema(
340                    "llguidance",
341                    llguidance_schema(),
342                ))
343                .item(create_grammar_variant_schema(
344                    "lark",
345                    Schema::Object(
346                        ObjectBuilder::new()
347                            .schema_type(SchemaType::Type(Type::String))
348                            .build(),
349                    ),
350                ))
351                .build(),
352        ))
353    }
354}
355
356impl ToSchema for Grammar {
357    fn schemas(
358        schemas: &mut Vec<(
359            String,
360            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
361        )>,
362    ) {
363        schemas.push((Grammar::name().into(), Grammar::schema()));
364    }
365}
366
367/// Helper function to create a grammar variant schema
368fn create_grammar_variant_schema(type_value: &str, value_schema: Schema) -> Schema {
369    Schema::Object(
370        ObjectBuilder::new()
371            .schema_type(SchemaType::Type(Type::Object))
372            .property(
373                "type",
374                RefOr::T(Schema::Object(
375                    ObjectBuilder::new()
376                        .schema_type(SchemaType::Type(Type::String))
377                        .enum_values(Some(vec![serde_json::Value::String(
378                            type_value.to_string(),
379                        )]))
380                        .build(),
381                )),
382            )
383            .property("value", RefOr::T(value_schema))
384            .required("type")
385            .required("value")
386            .build(),
387    )
388}
389
390/// Helper function to generate LLGuidance schema
391fn llguidance_schema() -> Schema {
392    let grammar_with_lexer_schema = Schema::Object(
393        ObjectBuilder::new()
394            .schema_type(SchemaType::Type(Type::Object))
395            .property(
396                "name",
397                RefOr::T(Schema::Object(
398                    ObjectBuilder::new()
399                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
400                        .description(Some(
401                            "The name of this grammar, can be used in GenGrammar nodes",
402                        ))
403                        .build(),
404                )),
405            )
406            .property(
407                "json_schema",
408                RefOr::T(Schema::Object(
409                    ObjectBuilder::new()
410                        .schema_type(SchemaType::from_iter([Type::Object, Type::Null]))
411                        .description(Some("The JSON schema that the grammar should generate"))
412                        .build(),
413                )),
414            )
415            .property(
416                "lark_grammar",
417                RefOr::T(Schema::Object(
418                    ObjectBuilder::new()
419                        .schema_type(SchemaType::from_iter([Type::String, Type::Null]))
420                        .description(Some("The Lark grammar that the grammar should generate"))
421                        .build(),
422                )),
423            )
424            .description(Some("Grammar configuration with lexer settings"))
425            .build(),
426    );
427
428    Schema::Object(
429        ObjectBuilder::new()
430            .schema_type(SchemaType::Type(Type::Object))
431            .property(
432                "grammars",
433                RefOr::T(Schema::Array(
434                    ArrayBuilder::new()
435                        .items(RefOr::T(grammar_with_lexer_schema))
436                        .description(Some("List of grammar configurations"))
437                        .build(),
438                )),
439            )
440            .property(
441                "max_tokens",
442                RefOr::T(Schema::Object(
443                    ObjectBuilder::new()
444                        .schema_type(SchemaType::from_iter([Type::Integer, Type::Null]))
445                        .description(Some("Maximum number of tokens to generate"))
446                        .build(),
447                )),
448            )
449            .required("grammars")
450            .description(Some("Top-level grammar configuration for LLGuidance"))
451            .build(),
452    )
453}
454
455/// JSON Schema for structured responses
456#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
457pub struct JsonSchemaResponseFormat {
458    pub name: String,
459    pub schema: serde_json::Value,
460}
461
462/// Response format for model output
463#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
464#[serde(tag = "type")]
465pub enum ResponseFormat {
466    /// Free-form text response
467    #[serde(rename = "text")]
468    Text,
469    /// Structured response following a JSON schema
470    #[serde(rename = "json_schema")]
471    JsonSchema {
472        json_schema: JsonSchemaResponseFormat,
473    },
474}
475
476/// Chat completion request following OpenAI's specification
477#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
478pub struct ChatCompletionRequest {
479    #[schema(
480        schema_with = messages_schema,
481        example = json!(vec![Message{content:Some(MessageContent{0: either::Left(("Why did the crab cross the road?".to_string()))}), role:"user".to_string(), name: None, tool_calls: None}])
482    )]
483    #[serde(with = "either::serde_untagged")]
484    pub messages: Either<Vec<Message>, String>,
485    #[schema(example = "mistral")]
486    #[serde(default = "default_model")]
487    pub model: String,
488    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
489    pub logit_bias: Option<HashMap<u32, f32>>,
490    #[serde(default = "default_false")]
491    #[schema(example = false)]
492    pub logprobs: bool,
493    #[schema(example = json!(Option::None::<usize>))]
494    pub top_logprobs: Option<usize>,
495    #[schema(example = 256)]
496    #[serde(alias = "max_completion_tokens")]
497    pub max_tokens: Option<usize>,
498    #[serde(rename = "n")]
499    #[serde(default = "default_1usize")]
500    #[schema(example = 1)]
501    pub n_choices: usize,
502    #[schema(example = json!(Option::None::<f32>))]
503    pub presence_penalty: Option<f32>,
504    #[schema(example = json!(Option::None::<f32>))]
505    pub frequency_penalty: Option<f32>,
506    #[schema(example = json!(Option::None::<f32>))]
507    pub repetition_penalty: Option<f32>,
508    #[serde(rename = "stop")]
509    #[schema(example = json!(Option::None::<StopTokens>))]
510    pub stop_seqs: Option<StopTokens>,
511    #[schema(example = 0.7)]
512    pub temperature: Option<f64>,
513    #[schema(example = json!(Option::None::<f64>))]
514    pub top_p: Option<f64>,
515    #[schema(example = true)]
516    pub stream: Option<bool>,
517    #[schema(example = json!(Option::None::<Vec<Tool>>))]
518    pub tools: Option<Vec<Tool>>,
519    #[schema(example = json!(Option::None::<ToolChoice>))]
520    pub tool_choice: Option<ToolChoice>,
521    #[schema(example = json!(Option::None::<ResponseFormat>))]
522    pub response_format: Option<ResponseFormat>,
523    #[schema(example = json!(Option::None::<WebSearchOptions>))]
524    pub web_search_options: Option<WebSearchOptions>,
525
526    // mistral.rs additional
527    #[schema(example = json!(Option::None::<usize>))]
528    pub top_k: Option<usize>,
529    #[schema(example = json!(Option::None::<Grammar>))]
530    pub grammar: Option<Grammar>,
531    #[schema(example = json!(Option::None::<f64>))]
532    pub min_p: Option<f64>,
533    #[schema(example = json!(Option::None::<f32>))]
534    pub dry_multiplier: Option<f32>,
535    #[schema(example = json!(Option::None::<f32>))]
536    pub dry_base: Option<f32>,
537    #[schema(example = json!(Option::None::<usize>))]
538    pub dry_allowed_length: Option<usize>,
539    #[schema(example = json!(Option::None::<String>))]
540    pub dry_sequence_breakers: Option<Vec<String>>,
541    #[schema(example = json!(Option::None::<bool>))]
542    pub enable_thinking: Option<bool>,
543}
544
545/// Function for ChatCompletionRequest.messages Schema generation to handle `Either`
546fn messages_schema() -> Schema {
547    Schema::OneOf(
548        OneOfBuilder::new()
549            .item(Schema::Array(
550                ArrayBuilder::new()
551                    .items(RefOr::Ref(utoipa::openapi::Ref::from_schema_name(
552                        "Message",
553                    )))
554                    .build(),
555            ))
556            .item(Schema::Object(
557                ObjectBuilder::new()
558                    .schema_type(SchemaType::Type(Type::String))
559                    .build(),
560            ))
561            .build(),
562    )
563}
564
565/// Model information metadata about an available mode
566#[derive(Debug, Serialize, ToSchema)]
567pub struct ModelObject {
568    pub id: String,
569    pub object: &'static str,
570    pub created: u64,
571    pub owned_by: &'static str,
572    /// Whether tools are available through MCP or tool callbacks
573    #[serde(skip_serializing_if = "Option::is_none")]
574    pub tools_available: Option<bool>,
575    /// Number of tools available from MCP servers
576    #[serde(skip_serializing_if = "Option::is_none")]
577    pub mcp_tools_count: Option<usize>,
578    /// Number of connected MCP servers
579    #[serde(skip_serializing_if = "Option::is_none")]
580    pub mcp_servers_connected: Option<usize>,
581}
582
583/// Collection of available models
584#[derive(Debug, Serialize, ToSchema)]
585pub struct ModelObjects {
586    pub object: &'static str,
587    pub data: Vec<ModelObject>,
588}
589
590/// Legacy OpenAI compatible text completion request
591#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
592pub struct CompletionRequest {
593    #[schema(example = "mistral")]
594    #[serde(default = "default_model")]
595    pub model: String,
596    #[schema(example = "Say this is a test.")]
597    pub prompt: String,
598    #[schema(example = 1)]
599    pub best_of: Option<usize>,
600    #[serde(rename = "echo")]
601    #[serde(default = "default_false")]
602    #[schema(example = false)]
603    pub echo_prompt: bool,
604    #[schema(example = json!(Option::None::<f32>))]
605    pub presence_penalty: Option<f32>,
606    #[schema(example = json!(Option::None::<f32>))]
607    pub frequency_penalty: Option<f32>,
608    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
609    pub logit_bias: Option<HashMap<u32, f32>>,
610    #[schema(example = json!(Option::None::<usize>))]
611    pub logprobs: Option<usize>,
612    #[schema(example = 16)]
613    #[serde(alias = "max_completion_tokens")]
614    pub max_tokens: Option<usize>,
615    #[serde(rename = "n")]
616    #[serde(default = "default_1usize")]
617    #[schema(example = 1)]
618    pub n_choices: usize,
619    #[serde(rename = "stop")]
620    #[schema(example = json!(Option::None::<StopTokens>))]
621    pub stop_seqs: Option<StopTokens>,
622    pub stream: Option<bool>,
623    #[schema(example = 0.7)]
624    pub temperature: Option<f64>,
625    #[schema(example = json!(Option::None::<f64>))]
626    pub top_p: Option<f64>,
627    #[schema(example = json!(Option::None::<String>))]
628    pub suffix: Option<String>,
629    #[serde(rename = "user")]
630    pub _user: Option<String>,
631    #[schema(example = json!(Option::None::<Vec<Tool>>))]
632    pub tools: Option<Vec<Tool>>,
633    #[schema(example = json!(Option::None::<ToolChoice>))]
634    pub tool_choice: Option<ToolChoice>,
635
636    // mistral.rs additional
637    #[schema(example = json!(Option::None::<usize>))]
638    pub top_k: Option<usize>,
639    #[schema(example = json!(Option::None::<Grammar>))]
640    pub grammar: Option<Grammar>,
641    #[schema(example = json!(Option::None::<f64>))]
642    pub min_p: Option<f64>,
643    #[schema(example = json!(Option::None::<f32>))]
644    pub repetition_penalty: Option<f32>,
645    #[schema(example = json!(Option::None::<f32>))]
646    pub dry_multiplier: Option<f32>,
647    #[schema(example = json!(Option::None::<f32>))]
648    pub dry_base: Option<f32>,
649    #[schema(example = json!(Option::None::<usize>))]
650    pub dry_allowed_length: Option<usize>,
651    #[schema(example = json!(Option::None::<String>))]
652    pub dry_sequence_breakers: Option<Vec<String>>,
653}
654
655/// Image generation request
656#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
657pub struct ImageGenerationRequest {
658    #[schema(example = "mistral")]
659    #[serde(default = "default_model")]
660    pub model: String,
661    #[schema(example = "Draw a picture of a majestic, snow-covered mountain.")]
662    pub prompt: String,
663    #[serde(rename = "n")]
664    #[serde(default = "default_1usize")]
665    #[schema(example = 1)]
666    pub n_choices: usize,
667    #[serde(default = "default_response_format")]
668    pub response_format: ImageGenerationResponseFormat,
669    #[serde(default = "default_720usize")]
670    #[schema(example = 720)]
671    pub height: usize,
672    #[serde(default = "default_1280usize")]
673    #[schema(example = 1280)]
674    pub width: usize,
675}
676
677/// Audio format options for speech generation responses.
678#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, ToSchema)]
679#[serde(rename_all = "lowercase")]
680pub enum AudioResponseFormat {
681    /// Widely compatible, lossy compression, good for web streaming
682    #[default]
683    Mp3,
684    /// Good compression efficiency, ideal for real-time communication
685    Opus,
686    /// High-quality lossy compression, commonly used in mobile applications
687    Aac,
688    /// Lossless compression, larger file sizes but good audio quality
689    Flac,
690    /// Uncompressed, largest file sizes but maximum compatibility
691    Wav,
692    ///  Raw audio data, requires additional format specification
693    Pcm,
694}
695
696impl AudioResponseFormat {
697    /// Generate the appropriate MIME content type string for this audio format.
698    pub fn audio_content_type(
699        &self,
700        pcm_rate: usize,
701        pcm_channels: usize,
702        pcm_format: &'static str,
703    ) -> String {
704        let content_type = match &self {
705            AudioResponseFormat::Mp3 => "audio/mpeg".to_string(),
706            AudioResponseFormat::Opus => "audio/ogg; codecs=opus".to_string(),
707            AudioResponseFormat::Aac => "audio/aac".to_string(),
708            AudioResponseFormat::Flac => "audio/flac".to_string(),
709            AudioResponseFormat::Wav => "audio/wav".to_string(),
710            AudioResponseFormat::Pcm => format!("audio/pcm; codecs=1; format={pcm_format}"),
711        };
712
713        format!("{content_type}; rate={pcm_rate}; channels={pcm_channels}")
714    }
715}
716
717/// Speech generation request
718#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
719pub struct SpeechGenerationRequest {
720    /// The TTS model to use for audio generation.
721    #[schema(example = "nari-labs/Dia-1.6B")]
722    #[serde(default = "default_model")]
723    pub model: String,
724    /// The text content to convert to speech.
725    #[schema(
726        example = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face."
727    )]
728    pub input: String,
729    // `voice` and `instructions` are ignored.
730    /// The desired audio format for the generated speech.
731    #[schema(example = "mp3")]
732    pub response_format: AudioResponseFormat,
733}
734
735/// Helper type for messages field in ResponsesCreateRequest
736#[derive(Debug, Clone, Deserialize, Serialize)]
737#[serde(untagged)]
738pub enum ResponsesMessages {
739    Messages(Vec<Message>),
740    String(String),
741}
742
743impl ResponsesMessages {
744    pub fn into_either(self) -> Either<Vec<Message>, String> {
745        match self {
746            ResponsesMessages::Messages(msgs) => Either::Left(msgs),
747            ResponsesMessages::String(s) => Either::Right(s),
748        }
749    }
750}
751
752impl PartialSchema for ResponsesMessages {
753    fn schema() -> RefOr<Schema> {
754        RefOr::T(messages_schema())
755    }
756}
757
758impl ToSchema for ResponsesMessages {
759    fn schemas(
760        schemas: &mut Vec<(
761            String,
762            utoipa::openapi::RefOr<utoipa::openapi::schema::Schema>,
763        )>,
764    ) {
765        schemas.push((
766            ResponsesMessages::name().into(),
767            ResponsesMessages::schema(),
768        ));
769    }
770}
771
772/// Response creation request
773#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
774pub struct ResponsesCreateRequest {
775    #[schema(example = "mistral")]
776    #[serde(default = "default_model")]
777    pub model: String,
778    pub input: ResponsesMessages,
779    #[schema(example = json!(Option::None::<String>))]
780    pub instructions: Option<String>,
781    #[schema(example = json!(Option::None::<Vec<String>>))]
782    pub modalities: Option<Vec<String>>,
783    #[schema(example = json!(Option::None::<String>))]
784    pub previous_response_id: Option<String>,
785    #[schema(example = json!(Option::None::<HashMap<u32, f32>>))]
786    pub logit_bias: Option<HashMap<u32, f32>>,
787    #[serde(default = "default_false")]
788    #[schema(example = false)]
789    pub logprobs: bool,
790    #[schema(example = json!(Option::None::<usize>))]
791    pub top_logprobs: Option<usize>,
792    #[schema(example = 256)]
793    #[serde(alias = "max_completion_tokens", alias = "max_output_tokens")]
794    pub max_tokens: Option<usize>,
795    #[serde(rename = "n")]
796    #[serde(default = "default_1usize")]
797    #[schema(example = 1)]
798    pub n_choices: usize,
799    #[schema(example = json!(Option::None::<f32>))]
800    pub presence_penalty: Option<f32>,
801    #[schema(example = json!(Option::None::<f32>))]
802    pub frequency_penalty: Option<f32>,
803    #[serde(rename = "stop")]
804    #[schema(example = json!(Option::None::<StopTokens>))]
805    pub stop_seqs: Option<StopTokens>,
806    #[schema(example = 0.7)]
807    pub temperature: Option<f64>,
808    #[schema(example = json!(Option::None::<f64>))]
809    pub top_p: Option<f64>,
810    #[schema(example = false)]
811    pub stream: Option<bool>,
812    #[schema(example = json!(Option::None::<Vec<Tool>>))]
813    pub tools: Option<Vec<Tool>>,
814    #[schema(example = json!(Option::None::<ToolChoice>))]
815    pub tool_choice: Option<ToolChoice>,
816    #[schema(example = json!(Option::None::<ResponseFormat>))]
817    pub response_format: Option<ResponseFormat>,
818    #[schema(example = json!(Option::None::<WebSearchOptions>))]
819    pub web_search_options: Option<WebSearchOptions>,
820    #[schema(example = json!(Option::None::<Value>))]
821    pub metadata: Option<Value>,
822    #[schema(example = json!(Option::None::<bool>))]
823    pub output_token_details: Option<bool>,
824    #[schema(example = json!(Option::None::<bool>))]
825    pub parallel_tool_calls: Option<bool>,
826    #[schema(example = json!(Option::None::<bool>))]
827    pub store: Option<bool>,
828    #[schema(example = json!(Option::None::<usize>))]
829    pub max_tool_calls: Option<usize>,
830    #[schema(example = json!(Option::None::<bool>))]
831    pub reasoning_enabled: Option<bool>,
832    #[schema(example = json!(Option::None::<usize>))]
833    pub reasoning_max_tokens: Option<usize>,
834    #[schema(example = json!(Option::None::<usize>))]
835    pub reasoning_top_logprobs: Option<usize>,
836    #[schema(example = json!(Option::None::<Vec<String>>))]
837    pub truncation: Option<HashMap<String, Value>>,
838
839    // mistral.rs additional
840    #[schema(example = json!(Option::None::<usize>))]
841    pub top_k: Option<usize>,
842    #[schema(example = json!(Option::None::<Grammar>))]
843    pub grammar: Option<Grammar>,
844    #[schema(example = json!(Option::None::<f64>))]
845    pub min_p: Option<f64>,
846    #[schema(example = json!(Option::None::<f32>))]
847    pub repetition_penalty: Option<f32>,
848    #[schema(example = json!(Option::None::<f32>))]
849    pub dry_multiplier: Option<f32>,
850    #[schema(example = json!(Option::None::<f32>))]
851    pub dry_base: Option<f32>,
852    #[schema(example = json!(Option::None::<usize>))]
853    pub dry_allowed_length: Option<usize>,
854    #[schema(example = json!(Option::None::<String>))]
855    pub dry_sequence_breakers: Option<Vec<String>>,
856    #[schema(example = json!(Option::None::<bool>))]
857    pub enable_thinking: Option<bool>,
858}
859
860/// Response object
861#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
862pub struct ResponsesObject {
863    pub id: String,
864    pub object: &'static str,
865    pub created_at: f64,
866    pub model: String,
867    pub status: String,
868    pub output: Vec<ResponsesOutput>,
869    pub output_text: Option<String>,
870    pub usage: Option<ResponsesUsage>,
871    pub error: Option<ResponsesError>,
872    pub metadata: Option<Value>,
873    pub instructions: Option<String>,
874    pub incomplete_details: Option<ResponsesIncompleteDetails>,
875}
876
877/// Response usage information
878#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
879pub struct ResponsesUsage {
880    pub input_tokens: usize,
881    pub output_tokens: usize,
882    pub total_tokens: usize,
883    pub input_tokens_details: Option<ResponsesInputTokensDetails>,
884    pub output_tokens_details: Option<ResponsesOutputTokensDetails>,
885}
886
887/// Input tokens details
888#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
889pub struct ResponsesInputTokensDetails {
890    pub audio_tokens: Option<usize>,
891    pub cached_tokens: Option<usize>,
892    pub image_tokens: Option<usize>,
893    pub text_tokens: Option<usize>,
894}
895
896/// Output tokens details
897#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
898pub struct ResponsesOutputTokensDetails {
899    pub audio_tokens: Option<usize>,
900    pub text_tokens: Option<usize>,
901    pub reasoning_tokens: Option<usize>,
902}
903
904/// Response error
905#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
906pub struct ResponsesError {
907    #[serde(rename = "type")]
908    pub error_type: String,
909    pub message: String,
910}
911
912/// Incomplete details for incomplete responses
913#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
914pub struct ResponsesIncompleteDetails {
915    pub reason: String,
916}
917
918/// Response output item
919#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
920pub struct ResponsesOutput {
921    pub id: String,
922    #[serde(rename = "type")]
923    pub output_type: String,
924    pub role: String,
925    pub status: Option<String>,
926    pub content: Vec<ResponsesContent>,
927}
928
929/// Response content item
930#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
931pub struct ResponsesContent {
932    #[serde(rename = "type")]
933    pub content_type: String,
934    pub text: Option<String>,
935    pub annotations: Option<Vec<ResponsesAnnotation>>,
936}
937
938/// Response annotation
939#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
940pub struct ResponsesAnnotation {
941    #[serde(rename = "type")]
942    pub annotation_type: String,
943    pub text: String,
944    pub start_index: usize,
945    pub end_index: usize,
946}
947
948/// Response streaming chunk
949#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
950pub struct ResponsesChunk {
951    pub id: String,
952    pub object: &'static str,
953    pub created_at: f64,
954    pub model: String,
955    pub chunk_type: String,
956    pub delta: Option<ResponsesDelta>,
957    pub usage: Option<ResponsesUsage>,
958    pub metadata: Option<Value>,
959}
960
961/// Response delta for streaming
962#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
963pub struct ResponsesDelta {
964    pub output: Option<Vec<ResponsesDeltaOutput>>,
965    pub status: Option<String>,
966}
967
968/// Response delta output item
969#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
970pub struct ResponsesDeltaOutput {
971    pub id: String,
972    #[serde(rename = "type")]
973    pub output_type: String,
974    pub content: Option<Vec<ResponsesDeltaContent>>,
975}
976
977/// Response delta content item
978#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
979pub struct ResponsesDeltaContent {
980    #[serde(rename = "type")]
981    pub content_type: String,
982    pub text: Option<String>,
983}