mistralrs_core/
model_selected.rs

1use std::path::PathBuf;
2
3use clap::Subcommand;
4
5use crate::{
6    pipeline::{AutoDeviceMapParams, IsqOrganization, NormalLoaderType, VisionLoaderType},
7    DiffusionLoaderType, ModelDType, SpeechLoaderType,
8};
9
10// Default value functions for serde deserialization
11fn default_model_dtype() -> ModelDType {
12    ModelDType::Auto
13}
14
15fn default_max_seq_len() -> usize {
16    AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN
17}
18
19fn default_max_batch_size() -> usize {
20    AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE
21}
22
23fn parse_arch(x: &str) -> Result<NormalLoaderType, String> {
24    x.parse()
25}
26
27fn parse_vision_arch(x: &str) -> Result<VisionLoaderType, String> {
28    x.parse()
29}
30
31fn parse_diffusion_arch(x: &str) -> Result<DiffusionLoaderType, String> {
32    x.parse()
33}
34
35fn parse_speech_arch(x: &str) -> Result<SpeechLoaderType, String> {
36    x.parse()
37}
38
39fn parse_model_dtype(x: &str) -> Result<ModelDType, String> {
40    x.parse()
41}
42
43#[derive(Debug, Clone, Subcommand, serde::Deserialize)]
44pub enum ModelSelected {
45    /// Select the model from a toml file
46    Toml {
47        /// .toml file containing the selector configuration.
48        #[arg(short, long)]
49        file: String,
50    },
51
52    /// Select a model for running via auto loader
53    Run {
54        /// Model ID to load from. May be a HF hub repo or a local path.
55        #[arg(short, long)]
56        model_id: String,
57
58        /// Path to local tokenizer.json file. If specified, it is used over any remote file.
59        #[arg(short, long)]
60        tokenizer_json: Option<String>,
61
62        /// Model data type. Defaults to `auto`.
63        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
64        dtype: ModelDType,
65
66        /// Path to a topology YAML file.
67        #[arg(long)]
68        topology: Option<String>,
69
70        /// ISQ organization: `default` or `moqe`.
71        #[arg(short, long)]
72        organization: Option<IsqOrganization>,
73
74        /// UQFF path to write to.
75        #[arg(short, long)]
76        write_uqff: Option<PathBuf>,
77
78        /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
79        #[arg(short, long)]
80        from_uqff: Option<String>,
81
82        /// .imatrix file to enhance GGUF quantizations with.
83        #[arg(short, long)]
84        imatrix: Option<PathBuf>,
85
86        /// Generate and utilize an imatrix to enhance GGUF quantizations.
87        #[arg(short, long)]
88        calibration_file: Option<PathBuf>,
89
90        /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
91        /// Only supported on specific vision models.
92        #[arg(short = 'e', long)]
93        max_edge: Option<u32>,
94
95        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
96        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
97        max_seq_len: usize,
98
99        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
100        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
101        max_batch_size: usize,
102
103        /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
104        /// Only supported on specific vision models.
105        #[arg(long)]
106        max_num_images: Option<usize>,
107
108        /// Maximum expected image size will have this edge length on both edges.
109        /// This affects automatic device mapping but is not a hard limit.
110        /// Only supported on specific vision models.
111        #[arg(long)]
112        max_image_length: Option<usize>,
113
114        /// Cache path for Hugging Face models downloaded locally.
115        #[arg(long)]
116        hf_cache_path: Option<PathBuf>,
117    },
118
119    /// Select a plain model, without quantization or adapters
120    Plain {
121        /// Model ID to load from. This may be a HF hub repo or a local path.
122        #[arg(short, long)]
123        model_id: String,
124
125        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
126        #[arg(short, long)]
127        #[serde(default)]
128        tokenizer_json: Option<String>,
129
130        /// The architecture of the model.
131        #[arg(short, long, value_parser = parse_arch)]
132        #[serde(default)]
133        arch: Option<NormalLoaderType>,
134
135        /// Model data type. Defaults to `auto`.
136        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
137        #[serde(default = "default_model_dtype")]
138        dtype: ModelDType,
139
140        /// Path to a topology YAML file.
141        #[arg(long)]
142        #[serde(default)]
143        topology: Option<String>,
144
145        #[allow(rustdoc::bare_urls)]
146        /// ISQ organization: `default` or `moqe` (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).
147        #[arg(short, long)]
148        #[serde(default)]
149        organization: Option<IsqOrganization>,
150
151        /// UQFF path to write to.
152        #[arg(short, long)]
153        #[serde(default)]
154        write_uqff: Option<PathBuf>,
155
156        /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;)
157        #[arg(short, long)]
158        #[serde(default)]
159        from_uqff: Option<String>,
160
161        /// .imatrix file to enhance GGUF quantizations with.
162        /// Incompatible with `--calibration-file/-c`
163        #[arg(short, long)]
164        #[serde(default)]
165        imatrix: Option<PathBuf>,
166
167        /// Generate and utilize an imatrix to enhance GGUF quantizations.
168        /// Incompatible with `--imatrix/-i`
169        #[arg(short, long)]
170        #[serde(default)]
171        calibration_file: Option<PathBuf>,
172
173        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
174        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
175        #[serde(default = "default_max_seq_len")]
176        max_seq_len: usize,
177
178        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
179        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
180        #[serde(default = "default_max_batch_size")]
181        max_batch_size: usize,
182
183        /// Cache path for Hugging Face models downloaded locally
184        #[arg(long)]
185        #[serde(default)]
186        hf_cache_path: Option<PathBuf>,
187    },
188
189    /// Select an X-LoRA architecture
190    XLora {
191        /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
192        #[arg(short, long)]
193        model_id: Option<String>,
194
195        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
196        #[arg(short, long)]
197        tokenizer_json: Option<String>,
198
199        /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
200        #[arg(short, long)]
201        xlora_model_id: String,
202
203        /// Ordering JSON file
204        #[arg(short, long)]
205        order: String,
206
207        /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
208        /// This makes the maximum running sequences 1.
209        #[arg(long)]
210        tgt_non_granular_index: Option<usize>,
211
212        /// The architecture of the model.
213        #[arg(short, long, value_parser = parse_arch)]
214        arch: Option<NormalLoaderType>,
215
216        /// Model data type. Defaults to `auto`.
217        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
218        dtype: ModelDType,
219
220        /// Path to a topology YAML file.
221        #[arg(long)]
222        topology: Option<String>,
223
224        /// UQFF path to write to.
225        #[arg(short, long)]
226        write_uqff: Option<PathBuf>,
227
228        /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
229        #[arg(short, long)]
230        from_uqff: Option<String>,
231
232        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
233        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
234        max_seq_len: usize,
235
236        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
237        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
238        max_batch_size: usize,
239
240        /// Cache path for Hugging Face models downloaded locally
241        #[arg(long)]
242        hf_cache_path: Option<PathBuf>,
243    },
244
245    /// Select a LoRA architecture
246    Lora {
247        /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
248        #[arg(short, long)]
249        model_id: Option<String>,
250
251        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
252        #[arg(short, long)]
253        tokenizer_json: Option<String>,
254
255        /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
256        #[arg(short, long)]
257        adapter_model_id: String,
258
259        /// The architecture of the model.
260        #[arg(long, value_parser = parse_arch)]
261        arch: Option<NormalLoaderType>,
262
263        /// Model data type. Defaults to `auto`.
264        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
265        dtype: ModelDType,
266
267        /// Path to a topology YAML file.
268        #[arg(long)]
269        topology: Option<String>,
270
271        /// UQFF path to write to.
272        #[arg(short, long)]
273        write_uqff: Option<PathBuf>,
274
275        /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
276        #[arg(short, long)]
277        from_uqff: Option<String>,
278
279        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
280        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
281        max_seq_len: usize,
282
283        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
284        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
285        max_batch_size: usize,
286
287        /// Cache path for Hugging Face models downloaded locally
288        #[arg(long)]
289        hf_cache_path: Option<PathBuf>,
290    },
291
292    /// Select a GGUF model.
293    GGUF {
294        /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
295        /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
296        /// removing all remote accesses.
297        #[arg(short, long)]
298        tok_model_id: Option<String>,
299
300        /// Quantized model ID to find the `quantized_filename`.
301        /// This may be a HF hub repo or a local path.
302        #[arg(short = 'm', long)]
303        quantized_model_id: String,
304
305        /// Quantized filename(s).
306        /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
307        #[arg(short = 'f', long)]
308        quantized_filename: String,
309
310        /// Model data type. Defaults to `auto`.
311        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
312        dtype: ModelDType,
313
314        /// Path to a topology YAML file.
315        #[arg(long)]
316        topology: Option<String>,
317
318        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
319        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
320        max_seq_len: usize,
321
322        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
323        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
324        max_batch_size: usize,
325    },
326
327    /// Select a GGUF model with X-LoRA.
328    XLoraGGUF {
329        /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
330        /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
331        /// removing all remote accesses.
332        #[arg(short, long)]
333        tok_model_id: Option<String>,
334
335        /// Quantized model ID to find the `quantized_filename`.
336        /// This may be a HF hub repo or a local path.
337        #[arg(short = 'm', long)]
338        quantized_model_id: String,
339
340        /// Quantized filename(s).
341        /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
342        #[arg(short = 'f', long)]
343        quantized_filename: String,
344
345        /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
346        #[arg(short, long)]
347        xlora_model_id: String,
348
349        /// Ordering JSON file
350        #[arg(short, long)]
351        order: String,
352
353        /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
354        /// This makes the maximum running sequences 1.
355        #[arg(long)]
356        tgt_non_granular_index: Option<usize>,
357
358        /// Model data type. Defaults to `auto`.
359        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
360        dtype: ModelDType,
361
362        /// Path to a topology YAML file.
363        #[arg(long)]
364        topology: Option<String>,
365
366        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
367        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
368        max_seq_len: usize,
369
370        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
371        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
372        max_batch_size: usize,
373    },
374
375    /// Select a GGUF model with LoRA.
376    LoraGGUF {
377        /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
378        /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
379        /// removing all remote accesses.
380        #[arg(short, long)]
381        tok_model_id: Option<String>,
382
383        /// Quantized model ID to find the `quantized_filename`.
384        /// This may be a HF hub repo or a local path.
385        #[arg(short = 'm', long)]
386        quantized_model_id: String,
387
388        /// Quantized filename(s).
389        /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
390        #[arg(short = 'f', long)]
391        quantized_filename: String,
392
393        /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
394        #[arg(short, long)]
395        adapters_model_id: String,
396
397        /// Ordering JSON file
398        #[arg(short, long)]
399        order: String,
400
401        /// Model data type. Defaults to `auto`.
402        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
403        dtype: ModelDType,
404
405        /// Path to a topology YAML file.
406        #[arg(long)]
407        topology: Option<String>,
408
409        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
410        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
411        max_seq_len: usize,
412
413        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
414        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
415        max_batch_size: usize,
416    },
417
418    /// Select a GGML model.
419    GGML {
420        /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
421        #[arg(short, long)]
422        tok_model_id: String,
423
424        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
425        #[arg(long)]
426        tokenizer_json: Option<String>,
427
428        /// Quantized model ID to find the `quantized_filename`.
429        /// This may be a HF hub repo or a local path.
430        #[arg(short = 'm', long)]
431        quantized_model_id: String,
432
433        /// Quantized filename.
434        #[arg(short = 'f', long)]
435        quantized_filename: String,
436
437        /// GQA value
438        #[arg(short, long, default_value_t = 1)]
439        gqa: usize,
440
441        /// Model data type. Defaults to `auto`.
442        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
443        dtype: ModelDType,
444
445        /// Path to a topology YAML file.
446        #[arg(long)]
447        topology: Option<String>,
448
449        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
450        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
451        max_seq_len: usize,
452
453        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
454        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
455        max_batch_size: usize,
456    },
457
458    /// Select a GGML model with X-LoRA.
459    XLoraGGML {
460        /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
461        #[arg(short, long)]
462        tok_model_id: Option<String>,
463
464        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
465        #[arg(long)]
466        tokenizer_json: Option<String>,
467
468        /// Quantized model ID to find the `quantized_filename`.
469        /// This may be a HF hub repo or a local path.
470        #[arg(short = 'm', long)]
471        quantized_model_id: String,
472
473        /// Quantized filename.
474        #[arg(short = 'f', long)]
475        quantized_filename: String,
476
477        /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
478        #[arg(short, long)]
479        xlora_model_id: String,
480
481        /// Ordering JSON file
482        #[arg(short, long)]
483        order: String,
484
485        /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
486        /// This makes the maximum running sequences 1.
487        #[arg(long)]
488        tgt_non_granular_index: Option<usize>,
489
490        /// GQA value
491        #[arg(short, long, default_value_t = 1)]
492        gqa: usize,
493
494        /// Model data type. Defaults to `auto`.
495        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
496        dtype: ModelDType,
497
498        /// Path to a topology YAML file.
499        #[arg(long)]
500        topology: Option<String>,
501
502        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
503        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
504        max_seq_len: usize,
505
506        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
507        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
508        max_batch_size: usize,
509    },
510
511    /// Select a GGML model with LoRA.
512    LoraGGML {
513        /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
514        #[arg(short, long)]
515        tok_model_id: Option<String>,
516
517        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
518        #[arg(long)]
519        tokenizer_json: Option<String>,
520
521        /// Quantized model ID to find the `quantized_filename`.
522        /// This may be a HF hub repo or a local path.
523        #[arg(short = 'm', long)]
524        quantized_model_id: String,
525
526        /// Quantized filename.
527        #[arg(short = 'f', long)]
528        quantized_filename: String,
529
530        /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
531        #[arg(short, long)]
532        adapters_model_id: String,
533
534        /// Ordering JSON file
535        #[arg(short, long)]
536        order: String,
537
538        /// GQA value
539        #[arg(short, long, default_value_t = 1)]
540        gqa: usize,
541
542        /// Model data type. Defaults to `auto`.
543        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
544        dtype: ModelDType,
545
546        /// Path to a topology YAML file.
547        #[arg(long)]
548        topology: Option<String>,
549
550        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
551        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
552        max_seq_len: usize,
553
554        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
555        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
556        max_batch_size: usize,
557    },
558
559    /// Select a vision plain model, without quantization or adapters
560    VisionPlain {
561        /// Model ID to load from. This may be a HF hub repo or a local path.
562        #[arg(short, long)]
563        model_id: String,
564
565        /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
566        #[arg(short, long)]
567        tokenizer_json: Option<String>,
568
569        /// The architecture of the model.
570        #[arg(short, long, value_parser = parse_vision_arch)]
571        arch: Option<VisionLoaderType>,
572
573        /// Model data type. Defaults to `auto`.
574        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
575        dtype: ModelDType,
576
577        /// Path to a topology YAML file.
578        #[arg(long)]
579        topology: Option<String>,
580
581        /// UQFF path to write to.
582        #[arg(short, long)]
583        write_uqff: Option<PathBuf>,
584
585        /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
586        #[arg(short, long)]
587        from_uqff: Option<String>,
588
589        /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
590        /// This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.
591        #[arg(short = 'e', long)]
592        max_edge: Option<u32>,
593
594        /// Generate and utilize an imatrix to enhance GGUF quantizations.
595        #[arg(short, long)]
596        calibration_file: Option<PathBuf>,
597
598        /// .cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.
599        #[arg(short, long)]
600        imatrix: Option<PathBuf>,
601
602        /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
603        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
604        max_seq_len: usize,
605
606        /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
607        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
608        max_batch_size: usize,
609
610        /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
611        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_NUM_IMAGES)]
612        max_num_images: usize,
613
614        /// Maximum expected image size will have this edge length on both edges.
615        /// This affects automatic device mapping but is not a hard limit.
616        #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_IMAGE_LENGTH)]
617        max_image_length: usize,
618
619        /// Cache path for Hugging Face models downloaded locally
620        #[arg(long)]
621        hf_cache_path: Option<PathBuf>,
622    },
623
624    /// Select a diffusion model, without quantization or adapters
625    #[command(name = "diffusion")]
626    DiffusionPlain {
627        /// Model ID to load from. This may be a HF hub repo or a local path.
628        #[arg(short, long)]
629        model_id: String,
630
631        /// The architecture of the model.
632        #[arg(short, long, value_parser = parse_diffusion_arch)]
633        arch: DiffusionLoaderType,
634
635        /// Model data type. Defaults to `auto`.
636        #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
637        dtype: ModelDType,
638    },
639
640    Speech {
641        /// Model ID to load from. This may be a HF hub repo or a local path.
642        #[arg(short, long)]
643        model_id: String,
644
645        /// DAC Model ID to load from. If not provided, this is automatically downloaded from the default path for the model.
646        /// This may be a HF hub repo or a local path.
647        #[arg(short, long)]
648        dac_model_id: Option<String>,
649
650        /// The architecture of the model.
651        #[arg(short, long, value_parser = parse_speech_arch)]
652        arch: SpeechLoaderType,
653
654        /// Model data type. Defaults to `auto`.
655        #[arg(long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
656        dtype: ModelDType,
657    },
658
659    /// Select multi-model mode with configuration file
660    #[command(name = "multi-model")]
661    MultiModel {
662        /// Multi-model configuration file path (JSON format)
663        #[arg(short, long)]
664        config: String,
665
666        /// Default model ID to use when no model is specified in requests
667        #[arg(short, long)]
668        default_model_id: Option<String>,
669    },
670}