pub enum ModelSelected {
Toml {
file: String,
},
Plain {
model_id: String,
tokenizer_json: Option<String>,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
organization: Option<IsqOrganization>,
write_uqff: Option<PathBuf>,
from_uqff: Option<PathBuf>,
imatrix: Option<PathBuf>,
calibration_file: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
},
XLora {
model_id: Option<String>,
tokenizer_json: Option<String>,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
},
Lora {
model_id: Option<String>,
tokenizer_json: Option<String>,
adapters_model_id: String,
order: String,
arch: Option<NormalLoaderType>,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
},
GGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
XLoraGGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
LoraGGUF {
tok_model_id: Option<String>,
quantized_model_id: String,
quantized_filename: String,
adapters_model_id: String,
order: String,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
GGML {
tok_model_id: String,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
XLoraGGML {
tok_model_id: Option<String>,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
xlora_model_id: String,
order: String,
tgt_non_granular_index: Option<usize>,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
LoraGGML {
tok_model_id: Option<String>,
tokenizer_json: Option<String>,
quantized_model_id: String,
quantized_filename: String,
adapters_model_id: String,
order: String,
gqa: usize,
dtype: ModelDType,
topology: Option<String>,
max_seq_len: usize,
max_batch_size: usize,
},
VisionPlain {Show 14 fields
model_id: String,
tokenizer_json: Option<String>,
arch: VisionLoaderType,
dtype: ModelDType,
topology: Option<String>,
write_uqff: Option<PathBuf>,
from_uqff: Option<PathBuf>,
max_edge: Option<u32>,
calibration_file: Option<PathBuf>,
imatrix: Option<PathBuf>,
max_seq_len: usize,
max_batch_size: usize,
max_num_images: usize,
max_image_length: usize,
},
DiffusionPlain {
model_id: String,
arch: DiffusionLoaderType,
dtype: ModelDType,
},
}
Variants§
Toml
Select the model from a toml file
Plain
Select a plain model, without quantization or adapters
Fields
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: Option<NormalLoaderType>
The architecture of the model.
dtype: ModelDType
Model data type. Defaults to auto
.
organization: Option<IsqOrganization>
ISQ organization: default
or moqe
(Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).
from_uqff: Option<PathBuf>
UQFF path to load from. If provided, this takes precedence over applying ISQ.
imatrix: Option<PathBuf>
.imatrix file to enhance GGUF quantizations with.
Incompatible with --calibration-file/-c
calibration_file: Option<PathBuf>
Generate and utilize an imatrix to enhance GGUF quantizations.
Incompatible with --imatrix/-i
XLora
Select an X-LoRA architecture
Fields
model_id: Option<String>
Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
tgt_non_granular_index: Option<usize>
Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
arch: Option<NormalLoaderType>
The architecture of the model.
dtype: ModelDType
Model data type. Defaults to auto
.
from_uqff: Option<PathBuf>
UQFF path to load from. If provided, this takes precedence over applying ISQ.
Lora
Select a LoRA architecture
Fields
model_id: Option<String>
Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: Option<NormalLoaderType>
The architecture of the model.
dtype: ModelDType
Model data type. Defaults to auto
.
from_uqff: Option<PathBuf>
UQFF path to load from. If provided, this takes precedence over applying ISQ.
GGUF
Select a GGUF model.
Fields
tok_model_id: Option<String>
tok_model_id
is the local or remote model ID where you can find a tokenizer_config.json
file.
If the chat_template
is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
quantized_filename: String
Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
dtype: ModelDType
Model data type. Defaults to auto
.
XLoraGGUF
Select a GGUF model with X-LoRA.
Fields
tok_model_id: Option<String>
tok_model_id
is the local or remote model ID where you can find a tokenizer_config.json
file.
If the chat_template
is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
quantized_filename: String
Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
tgt_non_granular_index: Option<usize>
Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
dtype: ModelDType
Model data type. Defaults to auto
.
LoraGGUF
Select a GGUF model with LoRA.
Fields
tok_model_id: Option<String>
tok_model_id
is the local or remote model ID where you can find a tokenizer_config.json
file.
If the chat_template
is specified, then it will be treated as a path and used over remote files,
removing all remote accesses.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
quantized_filename: String
Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.
dtype: ModelDType
Model data type. Defaults to auto
.
GGML
Select a GGML model.
Fields
tok_model_id: String
Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
dtype: ModelDType
Model data type. Defaults to auto
.
XLoraGGML
Select a GGML model with X-LoRA.
Fields
tok_model_id: Option<String>
Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
tgt_non_granular_index: Option<usize>
Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.
dtype: ModelDType
Model data type. Defaults to auto
.
LoraGGML
Select a GGML model with LoRA.
Fields
tok_model_id: Option<String>
Model ID to load the tokenizer from. This may be a HF hub repo or a local path.
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
quantized_model_id: String
Quantized model ID to find the quantized_filename
.
This may be a HF hub repo or a local path.
dtype: ModelDType
Model data type. Defaults to auto
.
VisionPlain
Select a vision plain model, without quantization or adapters
Fields
tokenizer_json: Option<String>
Path to local tokenizer.json file. If this is specified it is used over any remote file.
arch: VisionLoaderType
The architecture of the model.
dtype: ModelDType
Model data type. Defaults to auto
.
from_uqff: Option<PathBuf>
UQFF path to load from. If provided, this takes precedence over applying ISQ.
max_edge: Option<u32>
Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved. This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.
imatrix: Option<PathBuf>
.cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.
max_seq_len: usize
Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.
max_batch_size: usize
Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.
DiffusionPlain
Select a diffusion plain model, without quantization or adapters
Fields
arch: DiffusionLoaderType
The architecture of the model.
dtype: ModelDType
Model data type. Defaults to auto
.
Trait Implementations§
Source§impl Debug for ModelSelected
impl Debug for ModelSelected
Source§impl FromArgMatches for ModelSelected
impl FromArgMatches for ModelSelected
Source§fn from_arg_matches(
__clap_arg_matches: &ArgMatches,
) -> Result<ModelSelected, Error>
fn from_arg_matches( __clap_arg_matches: &ArgMatches, ) -> Result<ModelSelected, Error>
Source§fn from_arg_matches_mut(
__clap_arg_matches: &mut ArgMatches,
) -> Result<ModelSelected, Error>
fn from_arg_matches_mut( __clap_arg_matches: &mut ArgMatches, ) -> Result<ModelSelected, Error>
Source§fn update_from_arg_matches(
&mut self,
__clap_arg_matches: &ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches( &mut self, __clap_arg_matches: &ArgMatches, ) -> Result<(), Error>
ArgMatches
to self
.Source§fn update_from_arg_matches_mut<'b>(
&mut self,
__clap_arg_matches: &mut ArgMatches,
) -> Result<(), Error>
fn update_from_arg_matches_mut<'b>( &mut self, __clap_arg_matches: &mut ArgMatches, ) -> Result<(), Error>
ArgMatches
to self
.Source§impl Subcommand for ModelSelected
impl Subcommand for ModelSelected
Source§fn augment_subcommands<'b>(__clap_app: Command) -> Command
fn augment_subcommands<'b>(__clap_app: Command) -> Command
Source§fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command
fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command
Command
] so it can instantiate self
via
[FromArgMatches::update_from_arg_matches_mut
] Read moreSource§fn has_subcommand(__clap_name: &str) -> bool
fn has_subcommand(__clap_name: &str) -> bool
Self
can parse a specific subcommandAuto Trait Implementations§
impl Freeze for ModelSelected
impl RefUnwindSafe for ModelSelected
impl Send for ModelSelected
impl Sync for ModelSelected
impl Unpin for ModelSelected
impl UnwindSafe for ModelSelected
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
§impl<T> Downcast for Twhere
T: AsAny + ?Sized,
impl<T> Downcast for Twhere
T: AsAny + ?Sized,
§fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
Any
.§fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
Any
.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more