
Enum ModelSelected

pub enum ModelSelected {
    Toml {
        file: String,
    Plain {
        model_id: String,
        tokenizer_json: Option<String>,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        organization: Option<IsqOrganization>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
        imatrix: Option<PathBuf>,
        calibration_file: Option<PathBuf>,
        max_seq_len: usize,
        max_batch_size: usize,
    XLora {
        model_id: Option<String>,
        tokenizer_json: Option<String>,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
        max_seq_len: usize,
        max_batch_size: usize,
    Lora {
        model_id: Option<String>,
        tokenizer_json: Option<String>,
        adapters_model_id: String,
        order: String,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
        max_seq_len: usize,
        max_batch_size: usize,
    GGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    XLoraGGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    LoraGGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        adapters_model_id: String,
        order: String,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    GGML {
        tok_model_id: String,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        gqa: usize,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    XLoraGGML {
        tok_model_id: Option<String>,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        gqa: usize,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    LoraGGML {
        tok_model_id: Option<String>,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        adapters_model_id: String,
        order: String,
        gqa: usize,
        dtype: ModelDType,
        topology: Option<String>,
        max_seq_len: usize,
        max_batch_size: usize,
    VisionPlain {
Show 14 fields model_id: String, tokenizer_json: Option<String>, arch: VisionLoaderType, dtype: ModelDType, topology: Option<String>, write_uqff: Option<PathBuf>, from_uqff: Option<PathBuf>, max_edge: Option<u32>, calibration_file: Option<PathBuf>, imatrix: Option<PathBuf>, max_seq_len: usize, max_batch_size: usize, max_num_images: usize, max_image_length: usize,
}, DiffusionPlain { model_id: String, arch: DiffusionLoaderType, dtype: ModelDType, }, }




Select the model from a toml file


§file: String

.toml file containing the selector configuration.



Select a plain model, without quantization or adapters


§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§organization: Option<IsqOrganization>

ISQ organization: default or moqe (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§imatrix: Option<PathBuf>

.imatrix file to enhance GGUF quantizations with. Incompatible with --calibration-file/-c

§calibration_file: Option<PathBuf>

Generate and utilize an imatrix to enhance GGUF quantizations. Incompatible with --imatrix/-i

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select an X-LoRA architecture


§model_id: Option<String>

Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a LoRA architecture


§model_id: Option<String>

Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGUF model.


§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGUF model with X-LoRA.


§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGUF model with LoRA.


§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGML model.


§tok_model_id: String

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§gqa: usize

GQA value

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGML model with X-LoRA.


§tok_model_id: Option<String>

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§gqa: usize

GQA value

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a GGML model with LoRA.


§tok_model_id: Option<String>

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§gqa: usize

GQA value

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.



Select a vision plain model, without quantization or adapters


§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§arch: VisionLoaderType

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§max_edge: Option<u32>

Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved. This is only supported on the Qwen2-VL and Idefics models. Others handle this internally.

§calibration_file: Option<PathBuf>

Generate and utilize an imatrix to enhance GGUF quantizations.

§imatrix: Option<PathBuf>

.cimatrix file to enhance GGUF quantizations with. This must be a .cimatrix file.

§max_seq_len: usize

Maximum prompt sequence length to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_batch_size: usize

Maximum prompt batch size to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_num_images: usize

Maximum prompt number of images to expect for this model. This affects automatic device mapping but is not a hard limit.

§max_image_length: usize

Maximum expected image size will have this edge length on both edges. This affects automatic device mapping but is not a hard limit.



Select a diffusion plain model, without quantization or adapters


§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§arch: DiffusionLoaderType

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

Trait Implementations§


impl Debug for ModelSelected


fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

impl FromArgMatches for ModelSelected


fn from_arg_matches( __clap_arg_matches: &ArgMatches, ) -> Result<ModelSelected, Error>

Instantiate Self from [ArgMatches], parsing the arguments as needed. Read more

fn from_arg_matches_mut( __clap_arg_matches: &mut ArgMatches, ) -> Result<ModelSelected, Error>

Instantiate Self from [ArgMatches], parsing the arguments as needed. Read more

fn update_from_arg_matches( &mut self, __clap_arg_matches: &ArgMatches, ) -> Result<(), Error>

Assign values from ArgMatches to self.

fn update_from_arg_matches_mut<'b>( &mut self, __clap_arg_matches: &mut ArgMatches, ) -> Result<(), Error>

Assign values from ArgMatches to self.

impl Subcommand for ModelSelected


fn augment_subcommands<'b>(__clap_app: Command) -> Command

Append to [Command] so it can instantiate Self via [FromArgMatches::from_arg_matches_mut] Read more

fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command

Append to [Command] so it can instantiate self via [FromArgMatches::update_from_arg_matches_mut] Read more

fn has_subcommand(__clap_name: &str) -> bool

Test whether Self can parse a specific subcommand

Auto Trait Implementations§

Blanket Implementations§


impl<T> Any for T
where T: 'static + ?Sized,


fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

impl<T> AsAny for T
where T: Any,


fn as_any(&self) -> &(dyn Any + 'static)


fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)


fn type_name(&self) -> &'static str

Gets the type name of self

impl<T> Borrow<T> for T
where T: ?Sized,


fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T
where T: ?Sized,


fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

impl<T> Downcast for T
where T: AsAny + ?Sized,


fn is<T>(&self) -> bool
where T: AsAny,

Returns true if the boxed type is the same as T. Read more

fn downcast_ref<T>(&self) -> Option<&T>
where T: AsAny,

Forward to the method defined on the type Any.

fn downcast_mut<T>(&mut self) -> Option<&mut T>
where T: AsAny,

Forward to the method defined on the type Any.

impl<T> From<T> for T


fn from(t: T) -> T

Returns the argument unchanged.


impl<T> Instrument for T


fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

impl<T, U> Into<U> for T
where U: From<T>,


fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.


impl<T> IntoEither for T


fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

impl<T> Pointable for T


const ALIGN: usize

The alignment of pointer.

type Init = T

The type for initializers.

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more

impl<T, U> TryFrom<U> for T
where U: Into<T>,


type Error = Infallible

The type returned in the event of a conversion error.

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,


type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.

impl<V, T> VZip<V> for T
where V: MultiLane<T>,


fn vzip(self) -> V


impl<T> WithSubscriber for T


fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more

impl<T> ErasedDestructor for T
where T: 'static,


impl<T> MaybeSendSync for T


impl<T> Ungil for T
where T: Send,