pub trait QuantMethod:
Send
+ Sync
+ Debug
+ QuantizedSerde {
Show 13 methods
// Required methods
fn new(method: QuantMethodConfig) -> Result<Self>
where Self: Sized;
fn dequantize_w(&self) -> Result<Tensor>;
fn forward(&self, a: &Tensor) -> Result<Tensor>;
fn quantized_act_type(&self) -> Option<DType>;
fn dtype_and_device(&self) -> (DType, Device);
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>;
fn apply_isq(
self: Arc<Self>,
dtype: Option<IsqType>,
device: Device,
n_quantized: &AtomicUsize,
imatrix_weight: Option<Vec<f32>>,
) -> Result<Arc<dyn QuantMethod>>;
fn get_max_isq_cpu_threads(&self, dtype: IsqType) -> Option<NonZeroUsize>;
// Provided methods
fn forward_autocast(&self, a: &Tensor) -> Result<Tensor> { ... }
fn forward_via_half(&self, a: &Tensor) -> Result<Tensor> { ... }
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)> { ... }
fn begin_track_stats(&mut self) -> Result<()> { ... }
fn end_track_stats(&self) -> Result<Tensor> { ... }
}
Expand description
Quantized method for a quantized matmul.
Required Methods§
fn new(method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
fn dequantize_w(&self) -> Result<Tensor>
Sourcefn forward(&self, a: &Tensor) -> Result<Tensor>
fn forward(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
Sourcefn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
Sourcefn dtype_and_device(&self) -> (DType, Device)
fn dtype_and_device(&self) -> (DType, Device)
Weight dtype and device
Sourcefn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
Add a delta weight from LoRA to the weights. This should be prescaled with alpha.
Sourcefn apply_isq(
self: Arc<Self>,
dtype: Option<IsqType>,
device: Device,
n_quantized: &AtomicUsize,
imatrix_weight: Option<Vec<f32>>,
) -> Result<Arc<dyn QuantMethod>>
fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, ) -> Result<Arc<dyn QuantMethod>>
If the quant is backed by a qmatmul.
fn get_max_isq_cpu_threads(&self, dtype: IsqType) -> Option<NonZeroUsize>
Provided Methods§
Sourcefn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
Automatically cast to required quantization actiation type and back
Sourcefn forward_via_half(&self, a: &Tensor) -> Result<Tensor>
fn forward_via_half(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
This may go via half precision if it is supported.
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>
Sourcefn begin_track_stats(&mut self) -> Result<()>
fn begin_track_stats(&mut self) -> Result<()>
Begin tracking stats into an ImatrixLayerStats
Sourcefn end_track_stats(&self) -> Result<Tensor>
fn end_track_stats(&self) -> Result<Tensor>
End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.