mistralrs_core/model_selected.rs
1use std::path::PathBuf;
2
3use clap::Subcommand;
4
5use crate::{
6 pipeline::{AutoDeviceMapParams, IsqOrganization, NormalLoaderType, VisionLoaderType},
7 DiffusionLoaderType, ModelDType, SpeechLoaderType,
8};
9
10// Default value functions for serde deserialization
11fn default_model_dtype() -> ModelDType {
12 ModelDType::Auto
13}
14
15fn default_max_seq_len() -> usize {
16 AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN
17}
18
19fn default_max_batch_size() -> usize {
20 AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE
21}
22
23fn parse_arch(x: &str) -> Result<NormalLoaderType, String> {
24 x.parse()
25}
26
27fn parse_vision_arch(x: &str) -> Result<VisionLoaderType, String> {
28 x.parse()
29}
30
31fn parse_diffusion_arch(x: &str) -> Result<DiffusionLoaderType, String> {
32 x.parse()
33}
34
35fn parse_speech_arch(x: &str) -> Result<SpeechLoaderType, String> {
36 x.parse()
37}
38
39fn parse_model_dtype(x: &str) -> Result<ModelDType, String> {
40 x.parse()
41}
42
43#[derive(Debug, Clone, Subcommand, serde::Deserialize)]
44pub enum ModelSelected {
45 /// Select the model from a toml file
46 Toml {
47 /// .toml file containing the selector configuration.
48 #[arg(short, long)]
49 file: String,
50 },
51
52 /// Select a model for running via auto loader
53 Run {
54 /// Model ID to load from. May be a HF hub repo or a local path.
55 #[arg(short, long)]
56 model_id: String,
57
58 /// Path to local tokenizer.json file. If specified, it is used over any remote file.
59 #[arg(short, long)]
60 tokenizer_json: Option<String>,
61
62 /// Model data type. Defaults to `auto`.
63 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
64 dtype: ModelDType,
65
66 /// Path to a topology YAML file.
67 #[arg(long)]
68 topology: Option<String>,
69
70 /// ISQ organization: `default` or `moqe`.
71 #[arg(short, long)]
72 organization: Option<IsqOrganization>,
73
74 /// UQFF path to write to.
75 #[arg(short, long)]
76 write_uqff: Option<PathBuf>,
77
78 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
79 #[arg(short, long)]
80 from_uqff: Option<String>,
81
82 /// .imatrix file to enhance GGUF quantizations with.
83 #[arg(short, long)]
84 imatrix: Option<PathBuf>,
85
86 /// Generate and utilize an imatrix to enhance GGUF quantizations.
87 #[arg(short, long)]
88 calibration_file: Option<PathBuf>,
89
90 /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
91 /// Only supported on specific vision models.
92 #[arg(short = 'e', long)]
93 max_edge: Option<u32>,
94
95 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
96 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
97 max_seq_len: usize,
98
99 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
100 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
101 max_batch_size: usize,
102
103 /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
104 /// Only supported on specific vision models.
105 #[arg(long)]
106 max_num_images: Option<usize>,
107
108 /// Maximum expected image size will have this edge length on both edges.
109 /// This affects automatic device mapping but is not a hard limit.
110 /// Only supported on specific vision models.
111 #[arg(long)]
112 max_image_length: Option<usize>,
113
114 /// Cache path for Hugging Face models downloaded locally.
115 #[arg(long)]
116 hf_cache_path: Option<PathBuf>,
117
118 /// Path to local Matryoshka Transformer configuration CSV file
119 #[arg(long)]
120 matformer_config_path: Option<PathBuf>,
121
122 /// Name of the Matryoshka Transformer slice to use
123 #[arg(long)]
124 matformer_slice_name: Option<String>,
125 },
126
127 /// Select a plain model, without quantization or adapters
128 Plain {
129 /// Model ID to load from. This may be a HF hub repo or a local path.
130 #[arg(short, long)]
131 model_id: String,
132
133 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
134 #[arg(short, long)]
135 #[serde(default)]
136 tokenizer_json: Option<String>,
137
138 /// The architecture of the model.
139 #[arg(short, long, value_parser = parse_arch)]
140 #[serde(default)]
141 arch: Option<NormalLoaderType>,
142
143 /// Model data type. Defaults to `auto`.
144 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
145 #[serde(default = "default_model_dtype")]
146 dtype: ModelDType,
147
148 /// Path to a topology YAML file.
149 #[arg(long)]
150 #[serde(default)]
151 topology: Option<String>,
152
153 #[allow(rustdoc::bare_urls)]
154 /// ISQ organization: `default` or `moqe` (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).
155 #[arg(short, long)]
156 #[serde(default)]
157 organization: Option<IsqOrganization>,
158
159 /// UQFF path to write to.
160 #[arg(short, long)]
161 #[serde(default)]
162 write_uqff: Option<PathBuf>,
163
164 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;)
165 #[arg(short, long)]
166 #[serde(default)]
167 from_uqff: Option<String>,
168
169 /// .imatrix file to enhance GGUF quantizations with.
170 /// Incompatible with `--calibration-file/-c`
171 #[arg(short, long)]
172 #[serde(default)]
173 imatrix: Option<PathBuf>,
174
175 /// Generate and utilize an imatrix to enhance GGUF quantizations.
176 /// Incompatible with `--imatrix/-i`
177 #[arg(short, long)]
178 #[serde(default)]
179 calibration_file: Option<PathBuf>,
180
181 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
182 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
183 #[serde(default = "default_max_seq_len")]
184 max_seq_len: usize,
185
186 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
187 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
188 #[serde(default = "default_max_batch_size")]
189 max_batch_size: usize,
190
191 /// Cache path for Hugging Face models downloaded locally
192 #[arg(long)]
193 #[serde(default)]
194 hf_cache_path: Option<PathBuf>,
195
196 /// Path to local Matryoshka Transformer configuration CSV file
197 #[arg(long)]
198 #[serde(default)]
199 matformer_config_path: Option<PathBuf>,
200
201 /// Name of the Matryoshka Transformer slice to use
202 #[arg(long)]
203 #[serde(default)]
204 matformer_slice_name: Option<String>,
205 },
206
207 /// Select an X-LoRA architecture
208 XLora {
209 /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
210 #[arg(short, long)]
211 model_id: Option<String>,
212
213 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
214 #[arg(short, long)]
215 tokenizer_json: Option<String>,
216
217 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
218 #[arg(short, long)]
219 xlora_model_id: String,
220
221 /// Ordering JSON file
222 #[arg(short, long)]
223 order: String,
224
225 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
226 /// This makes the maximum running sequences 1.
227 #[arg(long)]
228 tgt_non_granular_index: Option<usize>,
229
230 /// The architecture of the model.
231 #[arg(short, long, value_parser = parse_arch)]
232 arch: Option<NormalLoaderType>,
233
234 /// Model data type. Defaults to `auto`.
235 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
236 dtype: ModelDType,
237
238 /// Path to a topology YAML file.
239 #[arg(long)]
240 topology: Option<String>,
241
242 /// UQFF path to write to.
243 #[arg(short, long)]
244 write_uqff: Option<PathBuf>,
245
246 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
247 #[arg(short, long)]
248 from_uqff: Option<String>,
249
250 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
251 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
252 max_seq_len: usize,
253
254 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
255 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
256 max_batch_size: usize,
257
258 /// Cache path for Hugging Face models downloaded locally
259 #[arg(long)]
260 hf_cache_path: Option<PathBuf>,
261 },
262
263 /// Select a LoRA architecture
264 Lora {
265 /// Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
266 #[arg(short, long)]
267 model_id: Option<String>,
268
269 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
270 #[arg(short, long)]
271 tokenizer_json: Option<String>,
272
273 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
274 #[arg(short, long)]
275 adapter_model_id: String,
276
277 /// The architecture of the model.
278 #[arg(long, value_parser = parse_arch)]
279 arch: Option<NormalLoaderType>,
280
281 /// Model data type. Defaults to `auto`.
282 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
283 dtype: ModelDType,
284
285 /// Path to a topology YAML file.
286 #[arg(long)]
287 topology: Option<String>,
288
289 /// UQFF path to write to.
290 #[arg(short, long)]
291 write_uqff: Option<PathBuf>,
292
293 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
294 #[arg(short, long)]
295 from_uqff: Option<String>,
296
297 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
298 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
299 max_seq_len: usize,
300
301 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
302 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
303 max_batch_size: usize,
304
305 /// Cache path for Hugging Face models downloaded locally
306 #[arg(long)]
307 hf_cache_path: Option<PathBuf>,
308 },
309
310 /// Select a GGUF model.
311 GGUF {
312 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
313 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
314 /// removing all remote accesses.
315 #[arg(short, long)]
316 tok_model_id: Option<String>,
317
318 /// Quantized model ID to find the `quantized_filename`.
319 /// This may be a HF hub repo or a local path.
320 #[arg(short = 'm', long)]
321 quantized_model_id: String,
322
323 /// Quantized filename(s).
324 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
325 #[arg(short = 'f', long)]
326 quantized_filename: String,
327
328 /// Model data type. Defaults to `auto`.
329 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
330 dtype: ModelDType,
331
332 /// Path to a topology YAML file.
333 #[arg(long)]
334 topology: Option<String>,
335
336 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
337 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
338 max_seq_len: usize,
339
340 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
341 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
342 max_batch_size: usize,
343 },
344
345 /// Select a GGUF model with X-LoRA.
346 XLoraGGUF {
347 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
348 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
349 /// removing all remote accesses.
350 #[arg(short, long)]
351 tok_model_id: Option<String>,
352
353 /// Quantized model ID to find the `quantized_filename`.
354 /// This may be a HF hub repo or a local path.
355 #[arg(short = 'm', long)]
356 quantized_model_id: String,
357
358 /// Quantized filename(s).
359 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
360 #[arg(short = 'f', long)]
361 quantized_filename: String,
362
363 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
364 #[arg(short, long)]
365 xlora_model_id: String,
366
367 /// Ordering JSON file
368 #[arg(short, long)]
369 order: String,
370
371 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
372 /// This makes the maximum running sequences 1.
373 #[arg(long)]
374 tgt_non_granular_index: Option<usize>,
375
376 /// Model data type. Defaults to `auto`.
377 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
378 dtype: ModelDType,
379
380 /// Path to a topology YAML file.
381 #[arg(long)]
382 topology: Option<String>,
383
384 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
385 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
386 max_seq_len: usize,
387
388 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
389 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
390 max_batch_size: usize,
391 },
392
393 /// Select a GGUF model with LoRA.
394 LoraGGUF {
395 /// `tok_model_id` is the local or remote model ID where you can find a `tokenizer_config.json` file.
396 /// If the `chat_template` is specified, then it will be treated as a path and used over remote files,
397 /// removing all remote accesses.
398 #[arg(short, long)]
399 tok_model_id: Option<String>,
400
401 /// Quantized model ID to find the `quantized_filename`.
402 /// This may be a HF hub repo or a local path.
403 #[arg(short = 'm', long)]
404 quantized_model_id: String,
405
406 /// Quantized filename(s).
407 /// May be a single filename, or use a delimiter of " " (a single space) for multiple files.
408 #[arg(short = 'f', long)]
409 quantized_filename: String,
410
411 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
412 #[arg(short, long)]
413 adapters_model_id: String,
414
415 /// Ordering JSON file
416 #[arg(short, long)]
417 order: String,
418
419 /// Model data type. Defaults to `auto`.
420 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
421 dtype: ModelDType,
422
423 /// Path to a topology YAML file.
424 #[arg(long)]
425 topology: Option<String>,
426
427 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
428 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
429 max_seq_len: usize,
430
431 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
432 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
433 max_batch_size: usize,
434 },
435
436 /// Select a GGML model.
437 GGML {
438 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
439 #[arg(short, long)]
440 tok_model_id: String,
441
442 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
443 #[arg(long)]
444 tokenizer_json: Option<String>,
445
446 /// Quantized model ID to find the `quantized_filename`.
447 /// This may be a HF hub repo or a local path.
448 #[arg(short = 'm', long)]
449 quantized_model_id: String,
450
451 /// Quantized filename.
452 #[arg(short = 'f', long)]
453 quantized_filename: String,
454
455 /// GQA value
456 #[arg(short, long, default_value_t = 1)]
457 gqa: usize,
458
459 /// Model data type. Defaults to `auto`.
460 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
461 dtype: ModelDType,
462
463 /// Path to a topology YAML file.
464 #[arg(long)]
465 topology: Option<String>,
466
467 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
468 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
469 max_seq_len: usize,
470
471 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
472 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
473 max_batch_size: usize,
474 },
475
476 /// Select a GGML model with X-LoRA.
477 XLoraGGML {
478 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
479 #[arg(short, long)]
480 tok_model_id: Option<String>,
481
482 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
483 #[arg(long)]
484 tokenizer_json: Option<String>,
485
486 /// Quantized model ID to find the `quantized_filename`.
487 /// This may be a HF hub repo or a local path.
488 #[arg(short = 'm', long)]
489 quantized_model_id: String,
490
491 /// Quantized filename.
492 #[arg(short = 'f', long)]
493 quantized_filename: String,
494
495 /// Model ID to load X-LoRA from. This may be a HF hub repo or a local path.
496 #[arg(short, long)]
497 xlora_model_id: String,
498
499 /// Ordering JSON file
500 #[arg(short, long)]
501 order: String,
502
503 /// Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached.
504 /// This makes the maximum running sequences 1.
505 #[arg(long)]
506 tgt_non_granular_index: Option<usize>,
507
508 /// GQA value
509 #[arg(short, long, default_value_t = 1)]
510 gqa: usize,
511
512 /// Model data type. Defaults to `auto`.
513 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
514 dtype: ModelDType,
515
516 /// Path to a topology YAML file.
517 #[arg(long)]
518 topology: Option<String>,
519
520 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
521 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
522 max_seq_len: usize,
523
524 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
525 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
526 max_batch_size: usize,
527 },
528
529 /// Select a GGML model with LoRA.
530 LoraGGML {
531 /// Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
532 #[arg(short, long)]
533 tok_model_id: Option<String>,
534
535 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
536 #[arg(long)]
537 tokenizer_json: Option<String>,
538
539 /// Quantized model ID to find the `quantized_filename`.
540 /// This may be a HF hub repo or a local path.
541 #[arg(short = 'm', long)]
542 quantized_model_id: String,
543
544 /// Quantized filename.
545 #[arg(short = 'f', long)]
546 quantized_filename: String,
547
548 /// Model ID to load LoRA from. This may be a HF hub repo or a local path.
549 #[arg(short, long)]
550 adapters_model_id: String,
551
552 /// Ordering JSON file
553 #[arg(short, long)]
554 order: String,
555
556 /// GQA value
557 #[arg(short, long, default_value_t = 1)]
558 gqa: usize,
559
560 /// Model data type. Defaults to `auto`.
561 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
562 dtype: ModelDType,
563
564 /// Path to a topology YAML file.
565 #[arg(long)]
566 topology: Option<String>,
567
568 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
569 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
570 max_seq_len: usize,
571
572 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
573 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
574 max_batch_size: usize,
575 },
576
577 /// Select a vision plain model, without quantization or adapters
578 VisionPlain {
579 /// Model ID to load from. This may be a HF hub repo or a local path.
580 #[arg(short, long)]
581 model_id: String,
582
583 /// Path to local tokenizer.json file. If this is specified it is used over any remote file.
584 #[arg(short, long)]
585 tokenizer_json: Option<String>,
586
587 /// The architecture of the model.
588 #[arg(short, long, value_parser = parse_vision_arch)]
589 arch: Option<VisionLoaderType>,
590
591 /// Model data type. Defaults to `auto`.
592 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
593 dtype: ModelDType,
594
595 /// Path to a topology YAML file.
596 #[arg(long)]
597 topology: Option<String>,
598
599 /// UQFF path to write to.
600 #[arg(short, long)]
601 write_uqff: Option<PathBuf>,
602
603 /// UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
604 #[arg(short, long)]
605 from_uqff: Option<String>,
606
607 /// Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved.
608 /// This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.
609 #[arg(short = 'e', long)]
610 max_edge: Option<u32>,
611
612 /// Generate and utilize an imatrix to enhance GGUF quantizations.
613 #[arg(short, long)]
614 calibration_file: Option<PathBuf>,
615
616 /// .cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.
617 #[arg(short, long)]
618 imatrix: Option<PathBuf>,
619
620 /// Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
621 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_SEQ_LEN)]
622 max_seq_len: usize,
623
624 /// Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
625 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_BATCH_SIZE)]
626 max_batch_size: usize,
627
628 /// Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
629 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_NUM_IMAGES)]
630 max_num_images: usize,
631
632 /// Maximum expected image size will have this edge length on both edges.
633 /// This affects automatic device mapping but is not a hard limit.
634 #[arg(long, default_value_t = AutoDeviceMapParams::DEFAULT_MAX_IMAGE_LENGTH)]
635 max_image_length: usize,
636
637 /// Cache path for Hugging Face models downloaded locally
638 #[arg(long)]
639 hf_cache_path: Option<PathBuf>,
640
641 /// Path to local Matryoshka Transformer configuration CSV file
642 #[arg(long)]
643 matformer_config_path: Option<PathBuf>,
644
645 /// Name of the Matryoshka Transformer slice to use
646 #[arg(long)]
647 matformer_slice_name: Option<String>,
648 },
649
650 /// Select a diffusion model, without quantization or adapters
651 #[command(name = "diffusion")]
652 DiffusionPlain {
653 /// Model ID to load from. This may be a HF hub repo or a local path.
654 #[arg(short, long)]
655 model_id: String,
656
657 /// The architecture of the model.
658 #[arg(short, long, value_parser = parse_diffusion_arch)]
659 arch: DiffusionLoaderType,
660
661 /// Model data type. Defaults to `auto`.
662 #[arg(short, long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
663 dtype: ModelDType,
664 },
665
666 Speech {
667 /// Model ID to load from. This may be a HF hub repo or a local path.
668 #[arg(short, long)]
669 model_id: String,
670
671 /// DAC Model ID to load from. If not provided, this is automatically downloaded from the default path for the model.
672 /// This may be a HF hub repo or a local path.
673 #[arg(short, long)]
674 dac_model_id: Option<String>,
675
676 /// The architecture of the model.
677 #[arg(short, long, value_parser = parse_speech_arch)]
678 arch: SpeechLoaderType,
679
680 /// Model data type. Defaults to `auto`.
681 #[arg(long, default_value_t = ModelDType::Auto, value_parser = parse_model_dtype)]
682 dtype: ModelDType,
683 },
684
685 /// Select multi-model mode with configuration file
686 #[command(name = "multi-model")]
687 MultiModel {
688 /// Multi-model configuration file path (JSON format)
689 #[arg(short, long)]
690 config: String,
691
692 /// Default model ID to use when no model is specified in requests
693 #[arg(short, long)]
694 default_model_id: Option<String>,
695 },
696}