mistralrs

Enum ModelSelected

source
pub enum ModelSelected {
    Toml {
        file: String,
    },
    Plain {
        model_id: String,
        tokenizer_json: Option<String>,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        organization: Option<IsqOrganization>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
        imatrix: Option<PathBuf>,
        calibration_file: Option<PathBuf>,
    },
    XLora {
        model_id: Option<String>,
        tokenizer_json: Option<String>,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
    },
    Lora {
        model_id: Option<String>,
        tokenizer_json: Option<String>,
        adapters_model_id: String,
        order: String,
        arch: Option<NormalLoaderType>,
        dtype: ModelDType,
        topology: Option<String>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
    },
    GGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        topology: Option<String>,
    },
    XLoraGGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        topology: Option<String>,
    },
    LoraGGUF {
        tok_model_id: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        adapters_model_id: String,
        order: String,
        topology: Option<String>,
    },
    GGML {
        tok_model_id: String,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        gqa: usize,
        topology: Option<String>,
    },
    XLoraGGML {
        tok_model_id: Option<String>,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        xlora_model_id: String,
        order: String,
        tgt_non_granular_index: Option<usize>,
        gqa: usize,
        topology: Option<String>,
    },
    LoraGGML {
        tok_model_id: Option<String>,
        tokenizer_json: Option<String>,
        quantized_model_id: String,
        quantized_filename: String,
        adapters_model_id: String,
        order: String,
        gqa: usize,
        topology: Option<String>,
    },
    VisionPlain {
        model_id: String,
        tokenizer_json: Option<String>,
        arch: VisionLoaderType,
        dtype: ModelDType,
        topology: Option<String>,
        write_uqff: Option<PathBuf>,
        from_uqff: Option<PathBuf>,
        max_edge: Option<u32>,
        calibration_file: Option<PathBuf>,
    },
    DiffusionPlain {
        model_id: String,
        arch: DiffusionLoaderType,
        dtype: ModelDType,
    },
}

Variants§

§

Toml

Select the model from a toml file

Fields

§file: String

.toml file containing the selector configuration.

§

Plain

Select a plain model, without quantization or adapters

Fields

§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§organization: Option<IsqOrganization>

ISQ organization: default or moqe (Mixture of Quantized Experts: https://arxiv.org/abs/2310.02410).

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§imatrix: Option<PathBuf>

.imatrix file to enhance GGUF quantizations with. Incompatible with --calibration-file/-c

§calibration_file: Option<PathBuf>

Generate and utilize an imatrix to enhance GGUF quantizations. Incompatible with --imatrix/-i

§

XLora

Select an X-LoRA architecture

Fields

§model_id: Option<String>

Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§

Lora

Select a LoRA architecture

Fields

§model_id: Option<String>

Force a base model ID to load from instead of using the ordering file. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§arch: Option<NormalLoaderType>

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§

GGUF

Select a GGUF model.

Fields

§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§topology: Option<String>

Path to a topology YAML file.

§

XLoraGGUF

Select a GGUF model with X-LoRA.

Fields

§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§topology: Option<String>

Path to a topology YAML file.

§

LoraGGUF

Select a GGUF model with LoRA.

Fields

§tok_model_id: Option<String>

tok_model_id is the local or remote model ID where you can find a tokenizer_config.json file. If the chat_template is specified, then it will be treated as a path and used over remote files, removing all remote accesses.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename(s). May be a single filename, or use a delimiter of “ “ (a single space) for multiple files.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§topology: Option<String>

Path to a topology YAML file.

§

GGML

Select a GGML model.

Fields

§tok_model_id: String

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§gqa: usize

GQA value

§topology: Option<String>

Path to a topology YAML file.

§

XLoraGGML

Select a GGML model with X-LoRA.

Fields

§tok_model_id: Option<String>

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§xlora_model_id: String

Model ID to load X-LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§tgt_non_granular_index: Option<usize>

Index of completion tokens to generate scalings up until. If this is 1, then there will be one completion token generated before it is cached. This makes the maximum running sequences 1.

§gqa: usize

GQA value

§topology: Option<String>

Path to a topology YAML file.

§

LoraGGML

Select a GGML model with LoRA.

Fields

§tok_model_id: Option<String>

Model ID to load the tokenizer from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§quantized_model_id: String

Quantized model ID to find the quantized_filename. This may be a HF hub repo or a local path.

§quantized_filename: String

Quantized filename.

§adapters_model_id: String

Model ID to load LoRA from. This may be a HF hub repo or a local path.

§order: String

Ordering JSON file

§gqa: usize

GQA value

§topology: Option<String>

Path to a topology YAML file.

§

VisionPlain

Select a vision plain model, without quantization or adapters

Fields

§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§tokenizer_json: Option<String>

Path to local tokenizer.json file. If this is specified it is used over any remote file.

§arch: VisionLoaderType

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

§topology: Option<String>

Path to a topology YAML file.

§write_uqff: Option<PathBuf>

UQFF path to write to.

§from_uqff: Option<PathBuf>

UQFF path to load from. If provided, this takes precedence over applying ISQ.

§max_edge: Option<u32>

Automatically resize and pad images to this maximum edge length. Aspect ratio is preserved. This is only supported on the Qwen2-VL and Idefics 2 models. Others handle this internally.

§calibration_file: Option<PathBuf>

Generate and utilize an imatrix to enhance GGUF quantizations.

§

DiffusionPlain

Select a diffusion plain model, without quantization or adapters

Fields

§model_id: String

Model ID to load from. This may be a HF hub repo or a local path.

§arch: DiffusionLoaderType

The architecture of the model.

§dtype: ModelDType

Model data type. Defaults to auto.

Trait Implementations§

source§

impl Debug for ModelSelected

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more
source§

impl FromArgMatches for ModelSelected

source§

fn from_arg_matches( __clap_arg_matches: &ArgMatches, ) -> Result<ModelSelected, Error>

Instantiate Self from [ArgMatches], parsing the arguments as needed. Read more
source§

fn from_arg_matches_mut( __clap_arg_matches: &mut ArgMatches, ) -> Result<ModelSelected, Error>

Instantiate Self from [ArgMatches], parsing the arguments as needed. Read more
source§

fn update_from_arg_matches( &mut self, __clap_arg_matches: &ArgMatches, ) -> Result<(), Error>

Assign values from ArgMatches to self.
source§

fn update_from_arg_matches_mut<'b>( &mut self, __clap_arg_matches: &mut ArgMatches, ) -> Result<(), Error>

Assign values from ArgMatches to self.
source§

impl Subcommand for ModelSelected

source§

fn augment_subcommands<'b>(__clap_app: Command) -> Command

Append to [Command] so it can instantiate Self via [FromArgMatches::from_arg_matches_mut] Read more
source§

fn augment_subcommands_for_update<'b>(__clap_app: Command) -> Command

Append to [Command] so it can instantiate self via [FromArgMatches::update_from_arg_matches_mut] Read more
source§

fn has_subcommand(__clap_name: &str) -> bool

Test whether Self can parse a specific subcommand

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> AsAny for T
where T: Any,

§

fn as_any(&self) -> &(dyn Any + 'static)

§

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

§

fn type_name(&self) -> &'static str

Gets the type name of self
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
§

impl<T> Downcast for T
where T: AsAny + ?Sized,

§

fn is<T>(&self) -> bool
where T: AsAny,

Returns true if the boxed type is the same as T. Read more
§

fn downcast_ref<T>(&self) -> Option<&T>
where T: AsAny,

Forward to the method defined on the type Any.
§

fn downcast_mut<T>(&mut self) -> Option<&mut T>
where T: AsAny,

Forward to the method defined on the type Any.
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
§

impl<T> Pointable for T

§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

source§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

impl<T> ErasedDestructor for T
where T: 'static,

§

impl<T> MaybeSendSync for T

§

impl<T> Ungil for T
where T: Send,