mistralrs_quant

Trait QuantMethod

source
pub trait QuantMethod:
    Send
    + Sync
    + Debug
    + QuantizedSerde {
Show 15 methods // Required methods fn new(method: QuantMethodConfig) -> Result<Self> where Self: Sized; fn dequantize_w(&self) -> Result<Tensor>; fn forward(&self, a: &Tensor) -> Result<Tensor>; fn quantized_act_type(&self) -> Option<DType>; fn dtype_and_device(&self) -> (DType, Device); fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>; fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, ) -> Result<Arc<dyn QuantMethod>>; fn maybe_to_gguf_quant(self: Arc<Self>) -> Result<Arc<dyn QuantMethod>>; fn get_bias_mut(&mut self) -> Option<&mut Tensor>; fn get_max_isq_cpu_threads(&self, dtype: IsqType) -> Option<NonZeroUsize>; // Provided methods fn forward_autocast(&self, a: &Tensor) -> Result<Tensor> { ... } fn forward_via_half(&self, a: &Tensor) -> Result<Tensor> { ... } fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)> { ... } fn begin_track_stats(&mut self) -> Result<()> { ... } fn end_track_stats(&self) -> Result<Tensor> { ... }
}
Expand description

Quantized method for a quantized matmul.

Required Methods§

source

fn new(method: QuantMethodConfig) -> Result<Self>
where Self: Sized,

source

fn dequantize_w(&self) -> Result<Tensor>

source

fn forward(&self, a: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights.

source

fn quantized_act_type(&self) -> Option<DType>

If a quantized method, return the activation dtype.

source

fn dtype_and_device(&self) -> (DType, Device)

Weight dtype and device

source

fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>

Add a delta weight from LoRA to the weights. This should be prescaled with alpha.

source

fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, ) -> Result<Arc<dyn QuantMethod>>

If the quant is backed by a qmatmul.

source

fn maybe_to_gguf_quant(self: Arc<Self>) -> Result<Arc<dyn QuantMethod>>

Convert to an equivalent gguf quantization, if applicable.

source

fn get_bias_mut(&mut self) -> Option<&mut Tensor>

If the quant is backed by a qmatmul.

source

fn get_max_isq_cpu_threads(&self, dtype: IsqType) -> Option<NonZeroUsize>

Provided Methods§

source

fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights. Automatically cast to required quantization actiation type and back

source

fn forward_via_half(&self, a: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights. This may go via half precision if it is supported.

source

fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>

source

fn begin_track_stats(&mut self) -> Result<()>

Begin tracking stats into an ImatrixLayerStats

source

fn end_track_stats(&self) -> Result<Tensor>

End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.

Trait Implementations§

source§

impl Module for dyn QuantMethod

source§

fn forward(&self, xs: &Tensor) -> Result<Tensor>

Implementors§