mistralrs_quant

Trait QuantMethod

pub trait QuantMethod:
    Send
    + Sync
    + Debug
    + QuantizedSerde {
Show 14 methods    // Required methods
    fn new(method: QuantMethodConfig) -> Result<Self>
       where Self: Sized;
    fn dequantize_w(&self) -> Result<Tensor>;
    fn forward(&self, a: &Tensor) -> Result<Tensor>;
    fn quantized_act_type(&self) -> Option<DType>;
    fn dtype_and_device(&self) -> (DType, Device);
    fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>;
    fn apply_isq(
        self: Arc<Self>,
        dtype: Option<IsqType>,
        device: Device,
        n_quantized: &AtomicUsize,
        imatrix_weight: Option<Vec<f32>>,
        guard: QuantizeOntoGuard,
    ) -> Result<Arc<dyn QuantMethod>>;

    // Provided methods
    fn forward_autocast(&self, a: &Tensor) -> Result<Tensor> { ... }
    fn gather_forward_autocast(
        &self,
        a: &Tensor,
        indices: &Tensor,
    ) -> Result<Tensor> { ... }
    fn gather_forward(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor> { ... }
    fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)> { ... }
    fn begin_track_stats(&mut self) -> Result<()> { ... }
    fn end_track_stats(&self) -> Result<Tensor> { ... }
    fn is_distributed(&self) -> Option<DistributedKind> { ... }
}

Expand description

Quantized method for a quantized matmul.

Required Methods§

fn new(method: QuantMethodConfig) -> Result<Self>
where Self: Sized,

fn dequantize_w(&self) -> Result<Tensor>

fn forward(&self, a: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights.

fn quantized_act_type(&self) -> Option<DType>

If a quantized method, return the activation dtype.

fn dtype_and_device(&self) -> (DType, Device)

Weight dtype and device

fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>

Add a delta weight from LoRA to the weights. This should be prescaled with alpha.

fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, guard: QuantizeOntoGuard, ) -> Result<Arc<dyn QuantMethod>>

If the quant is backed by a qmatmul.

Provided Methods§

fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights. Automatically cast to required quantization activation type and back

fn gather_forward_autocast( &self, a: &Tensor, indices: &Tensor, ) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights. Automatically cast to required quantization activation type and back.

If a is (n_tokens, n_experts, cols), self weights are (n_experts, rows, cols), then the indices are (n_tokens, n_experts).

fn gather_forward(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor>

Compute matmul of self and a. self should contain the weights.

If a is (n_tokens, n_experts, cols), self weights are (n_experts, rows, cols), then the indices are (n_tokens, n_experts).

fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>

fn begin_track_stats(&mut self) -> Result<()>

Begin tracking stats into an ImatrixLayerStats

fn end_track_stats(&self) -> Result<Tensor>

End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.

fn is_distributed(&self) -> Option<DistributedKind>

Trait Implementations§

impl Module for dyn QuantMethod

fn forward(&self, xs: &Tensor) -> Result<Tensor>

Implementors§

impl QuantMethod for ColumnParallelLayer

impl QuantMethod for ReplicatedLayer

impl QuantMethod for RowParallelLayer

impl QuantMethod for AfqLayer

impl QuantMethod for BnbLinear

impl QuantMethod for DummyLayer

impl QuantMethod for FP8Linear

impl QuantMethod for GgufMatMul

impl QuantMethod for GptqLayer

impl QuantMethod for HqqLayer

impl QuantMethod for MXFP4Layer

impl QuantMethod for UnquantLinear