mistralrs_core/model_selected.rs
1use std::path::PathBuf;
2
3use clap::Subcommand;
4
5use crate::{
6 pipeline::{AutoDeviceMapParams, IsqOrganization, NormalLoaderType, VisionLoaderType},
7 DiffusionLoaderType, ModelDType, SpeechLoaderType,
8};
9
10// Default value functions for serde deserialization
11fn default_model_dtype() -> ModelDType {
12 ModelDType::Auto
13}
14
15fn default_max_seq_len() -> usize {
16 AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN
17}
18
19fn default_max_batch_size() -> usize {
20 AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE
21}
22
23fn parse_arch(x: &str) -> Result<NormalLoaderType, String> {
24 x.parse()
25}
26
27fn parse_vision_arch(x: &str) -> Result<VisionLoaderType, String> {
28 x.parse()
29}
30
31fn parse_diffusion_arch(x: &str) -> Result<DiffusionLoaderType, String> {
32 x.parse()
33}
34
35fn parse_speech_arch(x: &str) -> Result<SpeechLoaderType, String> {
36 x.parse()
37}
38
39fn parse_model_dtype(x: &str) -> Result<ModelDType, String> {
40 x.parse()
41}
42
43#[derive(Debug, Clone, Subcommand, serde::Deserialize)]
44pub enum ModelSelected {
45 /// Select the model from a toml file
46 Toml {
47 /// .toml file containing the selector configuration.
48 #[arg(short, long)]
49 file: String,
50 },
51
52 /// Select a model for running via auto loader
53 Run {
54 /// Model ID to load from. May be a HF hub repo or a local path.
55 #[arg(short, long)]
56 model_id: String,
57
58 /// Path to local tokenizer.json file. If specified, it is used over any remote file.
59 #[arg(short, long)]
60 tokenizer_json: Option<String>,
61
62 /// Model data type. Defaults to `auto`.
63 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
64 dtype: ModelDType,
65
66 /// Path to a topology YAML file.
67 #[arg(long)]
68 topology: Option<String>,
69
70 /// ISQ organization: `default` or `moqe`.
71 #[arg(short, long)]
72 organization: Option<IsqOrganization>,
73
74 /// UQFF path to write to.
75 #[arg(short, long)]
76 write_uqff: Option<PathBuf>,
77
78 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
79 #[arg(short, long)]
80 from_uqff: Option<String>,
81
82 /// .imatrix file to enhance GGUF quantizations with.
83 #[arg(short, long)]
84 imatrix: Option<PathBuf>,
85
86 /// Generate and utilize an imatrix to enhance GGUF quantizations.
87 #[arg(short, long)]
88 calibration_file: Option<PathBuf>,
89
90 /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
91 /// Only supported on specific vision models.
92 #[arg(short = 'e', long)]
93 max_edge: Option<u32>,
94
95 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
96 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
97 max_seq_len: usize,
98
99 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
100 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
101 max_batch_size: usize,
102
103 /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
104 /// Only supported on specific vision models.
105 #[arg(long)]
106 max_num_images: Option<usize>,
107
108 /// Maximum expected image size will have this edge length on both edges.
109 /// This affects automatic device mapping but is not a hard limit.
110 /// Only supported on specific vision models.
111 #[arg(long)]
112 max_image_length: Option<usize>,
113
114 /// Cache path for Hugging Face models downloaded locally.
115 #[arg(long)]
116 hf_cache_path: Option<PathBuf>,
117 },
118
119 /// Select a plain model, without quantization or adapters
120 Plain {
121 /// Model ID to load from. This may be a HF hub repo or a local path.
122 #[arg(short, long)]
123 model_id: String,
124
125 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
126 #[arg(short, long)]
127 #[serde(default)]
128 tokenizer_json: Option<String>,
129
130 /// The architecture of the model.
131 #[arg(short, long, value_parser = parse_arch)]
132 #[serde(default)]
133 arch: Option<NormalLoaderType>,
134
135 /// Model data type. Defaults to `auto`.
136 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
137 #[serde(default = "default_model_dtype")]
138 dtype: ModelDType,
139
140 /// Path to a topology YAML file.
141 #[arg(long)]
142 #[serde(default)]
143 topology: Option<String>,
144
145 #[allow(rustdoc::bare_urls)]
146 /// ISQ organization: `default` or `moqe` (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).
147 #[arg(short, long)]
148 #[serde(default)]
149 organization: Option<IsqOrganization>,
150
151 /// UQFF path to write to.
152 #[arg(short, long)]
153 #[serde(default)]
154 write_uqff: Option<PathBuf>,
155
156 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;)
157 #[arg(short, long)]
158 #[serde(default)]
159 from_uqff: Option<String>,
160
161 /// .imatrix file to enhance GGUF quantizations with.
162 /// Incompatible with `--calibration-file/-c`
163 #[arg(short, long)]
164 #[serde(default)]
165 imatrix: Option<PathBuf>,
166
167 /// Generate and utilize an imatrix to enhance GGUF quantizations.
168 /// Incompatible with `--imatrix/-i`
169 #[arg(short, long)]
170 #[serde(default)]
171 calibration_file: Option<PathBuf>,
172
173 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
174 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
175 #[serde(default = "default_max_seq_len")]
176 max_seq_len: usize,
177
178 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
179 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
180 #[serde(default = "default_max_batch_size")]
181 max_batch_size: usize,
182
183 /// Cache path for Hugging Face models downloaded locally
184 #[arg(long)]
185 #[serde(default)]
186 hf_cache_path: Option<PathBuf>,
187 },
188
189 /// Select an X-LoRA architecture
190 XLora {
191 /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
192 #[arg(short, long)]
193 model_id: Option<String>,
194
195 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
196 #[arg(short, long)]
197 tokenizer_json: Option<String>,
198
199 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
200 #[arg(short, long)]
201 xlora_model_id: String,
202
203 /// Ordering JSON file
204 #[arg(short, long)]
205 order: String,
206
207 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
208 /// This makes the maximum running sequences 1.
209 #[arg(long)]
210 tgt_non_granular_index: Option<usize>,
211
212 /// The architecture of the model.
213 #[arg(short, long, value_parser = parse_arch)]
214 arch: Option<NormalLoaderType>,
215
216 /// Model data type. Defaults to `auto`.
217 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
218 dtype: ModelDType,
219
220 /// Path to a topology YAML file.
221 #[arg(long)]
222 topology: Option<String>,
223
224 /// UQFF path to write to.
225 #[arg(short, long)]
226 write_uqff: Option<PathBuf>,
227
228 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
229 #[arg(short, long)]
230 from_uqff: Option<String>,
231
232 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
233 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
234 max_seq_len: usize,
235
236 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
237 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
238 max_batch_size: usize,
239
240 /// Cache path for Hugging Face models downloaded locally
241 #[arg(long)]
242 hf_cache_path: Option<PathBuf>,
243 },
244
245 /// Select a LoRA architecture
246 Lora {
247 /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
248 #[arg(short, long)]
249 model_id: Option<String>,
250
251 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
252 #[arg(short, long)]
253 tokenizer_json: Option<String>,
254
255 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
256 #[arg(short, long)]
257 adapter_model_id: String,
258
259 /// The architecture of the model.
260 #[arg(long, value_parser = parse_arch)]
261 arch: Option<NormalLoaderType>,
262
263 /// Model data type. Defaults to `auto`.
264 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
265 dtype: ModelDType,
266
267 /// Path to a topology YAML file.
268 #[arg(long)]
269 topology: Option<String>,
270
271 /// UQFF path to write to.
272 #[arg(short, long)]
273 write_uqff: Option<PathBuf>,
274
275 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
276 #[arg(short, long)]
277 from_uqff: Option<String>,
278
279 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
280 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
281 max_seq_len: usize,
282
283 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
284 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
285 max_batch_size: usize,
286
287 /// Cache path for Hugging Face models downloaded locally
288 #[arg(long)]
289 hf_cache_path: Option<PathBuf>,
290 },
291
292 /// Select a GGUF model.
293 GGUF {
294 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
295 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
296 /// removing all remote accesses.
297 #[arg(short, long)]
298 tok_model_id: Option<String>,
299
300 /// Quantized model ID to find the `quantized_filename`.
301 /// This may be a HF hub repo or a local path.
302 #[arg(short = 'm', long)]
303 quantized_model_id: String,
304
305 /// Quantized filename(s).
306 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
307 #[arg(short = 'f', long)]
308 quantized_filename: String,
309
310 /// Model data type. Defaults to `auto`.
311 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
312 dtype: ModelDType,
313
314 /// Path to a topology YAML file.
315 #[arg(long)]
316 topology: Option<String>,
317
318 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
319 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
320 max_seq_len: usize,
321
322 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
323 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
324 max_batch_size: usize,
325 },
326
327 /// Select a GGUF model with X-LoRA.
328 XLoraGGUF {
329 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
330 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
331 /// removing all remote accesses.
332 #[arg(short, long)]
333 tok_model_id: Option<String>,
334
335 /// Quantized model ID to find the `quantized_filename`.
336 /// This may be a HF hub repo or a local path.
337 #[arg(short = 'm', long)]
338 quantized_model_id: String,
339
340 /// Quantized filename(s).
341 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
342 #[arg(short = 'f', long)]
343 quantized_filename: String,
344
345 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
346 #[arg(short, long)]
347 xlora_model_id: String,
348
349 /// Ordering JSON file
350 #[arg(short, long)]
351 order: String,
352
353 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
354 /// This makes the maximum running sequences 1.
355 #[arg(long)]
356 tgt_non_granular_index: Option<usize>,
357
358 /// Model data type. Defaults to `auto`.
359 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
360 dtype: ModelDType,
361
362 /// Path to a topology YAML file.
363 #[arg(long)]
364 topology: Option<String>,
365
366 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
367 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
368 max_seq_len: usize,
369
370 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
371 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
372 max_batch_size: usize,
373 },
374
375 /// Select a GGUF model with LoRA.
376 LoraGGUF {
377 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
378 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
379 /// removing all remote accesses.
380 #[arg(short, long)]
381 tok_model_id: Option<String>,
382
383 /// Quantized model ID to find the `quantized_filename`.
384 /// This may be a HF hub repo or a local path.
385 #[arg(short = 'm', long)]
386 quantized_model_id: String,
387
388 /// Quantized filename(s).
389 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
390 #[arg(short = 'f', long)]
391 quantized_filename: String,
392
393 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
394 #[arg(short, long)]
395 adapters_model_id: String,
396
397 /// Ordering JSON file
398 #[arg(short, long)]
399 order: String,
400
401 /// Model data type. Defaults to `auto`.
402 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
403 dtype: ModelDType,
404
405 /// Path to a topology YAML file.
406 #[arg(long)]
407 topology: Option<String>,
408
409 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
410 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
411 max_seq_len: usize,
412
413 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
414 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
415 max_batch_size: usize,
416 },
417
418 /// Select a GGML model.
419 GGML {
420 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
421 #[arg(short, long)]
422 tok_model_id: String,
423
424 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
425 #[arg(long)]
426 tokenizer_json: Option<String>,
427
428 /// Quantized model ID to find the `quantized_filename`.
429 /// This may be a HF hub repo or a local path.
430 #[arg(short = 'm', long)]
431 quantized_model_id: String,
432
433 /// Quantized filename.
434 #[arg(short = 'f', long)]
435 quantized_filename: String,
436
437 /// GQA value
438 #[arg(short, long, default_value_t = 1)]
439 gqa: usize,
440
441 /// Model data type. Defaults to `auto`.
442 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
443 dtype: ModelDType,
444
445 /// Path to a topology YAML file.
446 #[arg(long)]
447 topology: Option<String>,
448
449 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
450 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
451 max_seq_len: usize,
452
453 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
454 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
455 max_batch_size: usize,
456 },
457
458 /// Select a GGML model with X-LoRA.
459 XLoraGGML {
460 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
461 #[arg(short, long)]
462 tok_model_id: Option<String>,
463
464 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
465 #[arg(long)]
466 tokenizer_json: Option<String>,
467
468 /// Quantized model ID to find the `quantized_filename`.
469 /// This may be a HF hub repo or a local path.
470 #[arg(short = 'm', long)]
471 quantized_model_id: String,
472
473 /// Quantized filename.
474 #[arg(short = 'f', long)]
475 quantized_filename: String,
476
477 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
478 #[arg(short, long)]
479 xlora_model_id: String,
480
481 /// Ordering JSON file
482 #[arg(short, long)]
483 order: String,
484
485 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
486 /// This makes the maximum running sequences 1.
487 #[arg(long)]
488 tgt_non_granular_index: Option<usize>,
489
490 /// GQA value
491 #[arg(short, long, default_value_t = 1)]
492 gqa: usize,
493
494 /// Model data type. Defaults to `auto`.
495 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
496 dtype: ModelDType,
497
498 /// Path to a topology YAML file.
499 #[arg(long)]
500 topology: Option<String>,
501
502 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
503 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
504 max_seq_len: usize,
505
506 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
507 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
508 max_batch_size: usize,
509 },
510
511 /// Select a GGML model with LoRA.
512 LoraGGML {
513 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
514 #[arg(short, long)]
515 tok_model_id: Option<String>,
516
517 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
518 #[arg(long)]
519 tokenizer_json: Option<String>,
520
521 /// Quantized model ID to find the `quantized_filename`.
522 /// This may be a HF hub repo or a local path.
523 #[arg(short = 'm', long)]
524 quantized_model_id: String,
525
526 /// Quantized filename.
527 #[arg(short = 'f', long)]
528 quantized_filename: String,
529
530 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
531 #[arg(short, long)]
532 adapters_model_id: String,
533
534 /// Ordering JSON file
535 #[arg(short, long)]
536 order: String,
537
538 /// GQA value
539 #[arg(short, long, default_value_t = 1)]
540 gqa: usize,
541
542 /// Model data type. Defaults to `auto`.
543 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
544 dtype: ModelDType,
545
546 /// Path to a topology YAML file.
547 #[arg(long)]
548 topology: Option<String>,
549
550 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
551 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
552 max_seq_len: usize,
553
554 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
555 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
556 max_batch_size: usize,
557 },
558
559 /// Select a vision plain model, without quantization or adapters
560 VisionPlain {
561 /// Model ID to load from. This may be a HF hub repo or a local path.
562 #[arg(short, long)]
563 model_id: String,
564
565 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
566 #[arg(short, long)]
567 tokenizer_json: Option<String>,
568
569 /// The architecture of the model.
570 #[arg(short, long, value_parser = parse_vision_arch)]
571 arch: Option<VisionLoaderType>,
572
573 /// Model data type. Defaults to `auto`.
574 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
575 dtype: ModelDType,
576
577 /// Path to a topology YAML file.
578 #[arg(long)]
579 topology: Option<String>,
580
581 /// UQFF path to write to.
582 #[arg(short, long)]
583 write_uqff: Option<PathBuf>,
584
585 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
586 #[arg(short, long)]
587 from_uqff: Option<String>,
588
589 /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
590 /// This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.
591 #[arg(short = 'e', long)]
592 max_edge: Option<u32>,
593
594 /// Generate and utilize an imatrix to enhance GGUF quantizations.
595 #[arg(short, long)]
596 calibration_file: Option<PathBuf>,
597
598 /// .cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.
599 #[arg(short, long)]
600 imatrix: Option<PathBuf>,
601
602 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
603 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
604 max_seq_len: usize,
605
606 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
607 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
608 max_batch_size: usize,
609
610 /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
611 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_NUM_IMAGES)]
612 max_num_images: usize,
613
614 /// Maximum expected image size will have this edge length on both edges.
615 /// This affects automatic device mapping but is not a hard limit.
616 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_IMAGE_LENGTH)]
617 max_image_length: usize,
618
619 /// Cache path for Hugging Face models downloaded locally
620 #[arg(long)]
621 hf_cache_path: Option<PathBuf>,
622 },
623
624 /// Select a diffusion model, without quantization or adapters
625 #[command(name = "diffusion")]
626 DiffusionPlain {
627 /// Model ID to load from. This may be a HF hub repo or a local path.
628 #[arg(short, long)]
629 model_id: String,
630
631 /// The architecture of the model.
632 #[arg(short, long, value_parser = parse_diffusion_arch)]
633 arch: DiffusionLoaderType,
634
635 /// Model data type. Defaults to `auto`.
636 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
637 dtype: ModelDType,
638 },
639
640 Speech {
641 /// Model ID to load from. This may be a HF hub repo or a local path.
642 #[arg(short, long)]
643 model_id: String,
644
645 /// DAC Model ID to load from. If not provided, this is automatically downloaded from the default path for the model.
646 /// This may be a HF hub repo or a local path.
647 #[arg(short, long)]
648 dac_model_id: Option<String>,
649
650 /// The architecture of the model.
651 #[arg(short, long, value_parser = parse_speech_arch)]
652 arch: SpeechLoaderType,
653
654 /// Model data type. Defaults to `auto`.
655 #[arg(long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
656 dtype: ModelDType,
657 },
658
659 /// Select multi-model mode with configuration file
660 #[command(name = "multi-model")]
661 MultiModel {
662 /// Multi-model configuration file path (JSON format)
663 #[arg(short, long)]
664 config: String,
665
666 /// Default model ID to use when no model is specified in requests
667 #[arg(short, long)]
668 default_model_id: Option<String>,
669 },
670}