pub enum ModelSelected {
Show 15 variants
Toml {
file: String,
},
Run {Show 17 fields
model_id: String,
tokenizer_json: Option<String>,
dtype: ModelDType,
topology: Option<String>,
organization: Option<IsqOrganization>,
write_uqff: Option<PathBuf>,
from_uqff: Option<String>,
imatrix: Option<PathBuf>,
calibration_file: Option<PathBuf>,
max_edge: Option<u32>,
max_seq_len: usize,
max_batch_size: usize,
max_num_images: Option<usize>,
max_image_length: Option<usize>,
hf_cache_path: Option<PathBuf>,
matformer_config_path: Option<PathBuf>,
matformer_slice_name: Option<String>,
},
Plain {Show 15 fields
model_id: String,
tokenizer_json: Option<String>,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
organization: Option<IsqOrganization>,
write_uqff: Option<PathBuf>,
from_uqff: Option<String>,
imatrix: Option<PathBuf>,
calibration_file: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
hf_cache_path: Option<PathBuf>,
matformer_config_path: Option<PathBuf>,
matformer_slice_name: Option<String>,
},
XLora {Show 13 fields
model_id: Option<String>,
tokenizer_json: Option<String>,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
hf_cache_path: Option<PathBuf>,
},
Lora {
model_id: Option<String>,
tokenizer_json: Option<String>,
adapter_model_id: String,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
hf_cache_path: Option<PathBuf>,
},
GGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
XLoraGGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
LoraGGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
adapters_model_id: String,
order: String,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
GGML {
tok_model_id: String,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
XLoraGGML {
tok_model_id: Option<String>,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
LoraGGML {
tok_model_id: Option<String>,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
adapters_model_id: String,
order: String,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
VisionPlain {Show 17 fields
model_id: String,
tokenizer_json: Option<String>,
arch: Option<VisionLoaderType>,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<String>,
max_edge: Option<u32>,
calibration_file: Option<PathBuf>,
imatrix: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
max_num_images: usize,
max_image_length: usize,
hf_cache_path: Option<PathBuf>,
matformer_config_path: Option<PathBuf>,
matformer_slice_name: Option<String>,
},
DiffusionPlain {
model_id: String,
arch: DiffusionLoaderType,
dtype: ModelDType,
},
Speech {
model_id: String,
dac_model_id: Option<String>,
arch: SpeechLoaderType,
dtype: ModelDType,
},
MultiModel {
config: String,
default_model_id: Option<String>,
},
}Variants§
Toml
Select the model from a toml file
Run
Select a model for running via auto loader
Fields
tokenizer_json: Option<String>Path to local tokenizer.json file. If specified, it is used over any remote file.
dtype: ModelDTypeModel data type. Defaults to auto.
organization: Option<IsqOrganization>ISQ organization: default or moqe.
from_uqff: Option<String>UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
max_edge: Option<u32>Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved. Only supported on specific vision models.
max_seq_len: usizeMaximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
max_batch_size: usizeMaximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
max_num_images: Option<usize>Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit. Only supported on specific vision models.
Plain
Select a plain model, without quantization or adapters
Fields
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: Option<NormalLoaderType>The architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
organization: Option<IsqOrganization>ISQ organization: default or moqe (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).
from_uqff: Option<String>UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;)
imatrix: Option<PathBuf>.imatrix file to enhance GGUF quantizations with.
Incompatible with --calibration-file/-c
calibration_file: Option<PathBuf>Generate and utilize an imatrix to enhance GGUF quantizations.
Incompatible with --imatrix/-i
max_seq_len: usizeMaximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
XLora
Select an X-LoRA architecture
Fields
model_id: Option<String>Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
tgt_non_granular_index: Option<usize>Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
arch: Option<NormalLoaderType>The architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
from_uqff: Option<String>UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
max_seq_len: usizeMaximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
Lora
Select a LoRA architecture
Fields
model_id: Option<String>Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: Option<NormalLoaderType>The architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
from_uqff: Option<String>UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
max_seq_len: usizeMaximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
GGUF
Select a GGUF model.
Fields
tok_model_id: Option<String>tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file.
If the chat_template is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
quantized_filename: StringQuantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
dtype: ModelDTypeModel data type. Defaults to auto.
XLoraGGUF
Select a GGUF model with X-LoRA.
Fields
tok_model_id: Option<String>tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file.
If the chat_template is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
quantized_filename: StringQuantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
tgt_non_granular_index: Option<usize>Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
dtype: ModelDTypeModel data type. Defaults to auto.
LoraGGUF
Select a GGUF model with LoRA.
Fields
tok_model_id: Option<String>tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file.
If the chat_template is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
quantized_filename: StringQuantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
dtype: ModelDTypeModel data type. Defaults to auto.
GGML
Select a GGML model.
Fields
tok_model_id: StringModel ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
dtype: ModelDTypeModel data type. Defaults to auto.
XLoraGGML
Select a GGML model with X-LoRA.
Fields
tok_model_id: Option<String>Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
tgt_non_granular_index: Option<usize>Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
dtype: ModelDTypeModel data type. Defaults to auto.
LoraGGML
Select a GGML model with LoRA.
Fields
tok_model_id: Option<String>Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: StringQuantized model ID to find the quantized_filename.
This may be a HF hub repo or a local path.
dtype: ModelDTypeModel data type. Defaults to auto.
VisionPlain
Select a vision plain model, without quantization or adapters
Fields
tokenizer_json: Option<String>Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: Option<VisionLoaderType>The architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
from_uqff: Option<String>UQFF path to load from. If provided, this takes precedence over applying ISQ. Specify multiple files using a semicolon delimiter (;).
max_edge: Option<u32>Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved. This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.
imatrix: Option<PathBuf>.cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.
max_seq_len: usizeMaximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
max_batch_size: usizeMaximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
max_num_images: usizeMaximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.
DiffusionPlain
Select a diffusion model, without quantization or adapters
Fields
arch: DiffusionLoaderTypeThe architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
Speech
Fields
dac_model_id: Option<String>DAC Model ID to load from. If not provided, this is automatically downloaded from the default path for the model. This may be a HF hub repo or a local path.
arch: SpeechLoaderTypeThe architecture of the model.
dtype: ModelDTypeModel data type. Defaults to auto.
MultiModel
Select multi-model mode with configuration file
Trait Implementations§
Source§impl Clone for ModelSelected
impl Clone for ModelSelected
Source§fn clone(&self) -> ModelSelected
fn clone(&self) -> ModelSelected
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for ModelSelected
impl Debug for ModelSelected
Source§impl<'de> Deserialize<'de> for ModelSelected
impl<'de> Deserialize<'de> for ModelSelected
Source§fn deserialize<__D>(
__deserializer: __D,
) -> Result<ModelSelected, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(
__deserializer: __D,
) -> Result<ModelSelected, <__D as Deserializer<'de>>::Error>where
__D: Deserializer<'de>,
Source§impl FromArgMatches for ModelSelected
impl FromArgMatches for ModelSelected
Source§fn from_arg_matches(
__clap_arg_matches: &ArgMatches,
) -> Result<ModelSelected, Error>
fn from_arg_matches( __clap_arg_matches: &ArgMatches, ) -> Result<ModelSelected, Error>
Source§fn from_arg_matches_mut(
__clap_arg_matches: &mut ArgMatches,
) -> Result<ModelSelected, Error>
fn from_arg_matches_mut( __clap_arg_matches: &mut ArgMatches, ) -> Result<ModelSelected, Error>
Source§fn update_from_arg_matches(
&mut self,
__clap_arg_matches: &ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches( &mut self, __clap_arg_matches: &ArgMatches, ) -> Result<(), Error>
ArgMatches to self.Source§fn update_from_arg_matches_mut<'b>(
&mut self,
__clap_arg_matches: &mut ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches_mut<'b>( &mut self, __clap_arg_matches: &mut ArgMatches, ) -> Result<(), Error>
ArgMatches to self.Source§impl Subcommand for ModelSelected
impl Subcommand for ModelSelected
Source§fn augment_subcommands<'b>(__clap_app: Command) -> Command
fn augment_subcommands<'b>(__clap_app: Command) -> Command
Source§fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command
fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command
Command] so it can instantiate self via
[FromArgMatches::update_from_arg_matches_mut] Read moreSource§fn has_subcommand(__clap_name: &str) -> bool
fn has_subcommand(__clap_name: &str) -> bool
Self can parse a specific subcommandAuto Trait Implementations§
impl Freeze for ModelSelected
impl RefUnwindSafe for ModelSelected
impl Send for ModelSelected
impl Sync for ModelSelected
impl Unpin for ModelSelected
impl UnwindSafe for ModelSelected
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§impl<T> Downcast for Twhere
T: AsAny + ?Sized,
impl<T> Downcast for Twhere
T: AsAny + ?Sized,
§fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
Any.§fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
Any.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more§impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
fn into_sample(self) -> T
§impl<T> Pointable for T
impl<T> Pointable for T
§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
self from the equivalent element of its
superset. Read more§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
self is actually part of its subset T (and can be converted to it).§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
self.to_subset but without any property checks. Always succeeds.§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
self to the equivalent element of its superset.