pub trait QuantMethod:
Send
+ Sync
+ Debug {
// Required methods
fn new(method: QuantMethodConfig) -> Result<Self>
where Self: Sized;
fn dequantize_w(&self, out_ty: DType) -> Result<Tensor>;
fn forward(&self, a: &Tensor) -> Result<Tensor>;
fn quantized_act_type(&self) -> Option<DType>;
fn to_device(&self, dev: &Device) -> Result<Arc<dyn QuantMethod>>;
fn device(&self) -> Device;
fn size_in_bytes(&self) -> Result<usize>;
// Provided methods
fn forward_autocast(&self, a: &Tensor) -> Result<Tensor> { ... }
fn forward_via_half(&self, a: &Tensor) -> Result<Tensor> { ... }
}
Expand description
Quantized method for a quantized matmul.
Required Methods§
fn new(method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
fn dequantize_w(&self, out_ty: DType) -> Result<Tensor>
sourcefn forward(&self, a: &Tensor) -> Result<Tensor>
fn forward(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
sourcefn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
sourcefn to_device(&self, dev: &Device) -> Result<Arc<dyn QuantMethod>>
fn to_device(&self, dev: &Device) -> Result<Arc<dyn QuantMethod>>
Cast this layer to the given device.
fn device(&self) -> Device
fn size_in_bytes(&self) -> Result<usize>
Provided Methods§
sourcefn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
Automatically cast to required quantization actiation type and back
sourcefn forward_via_half(&self, a: &Tensor) -> Result<Tensor>
fn forward_via_half(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self
and a
. self
should contain the weights.
This may go via half precision if it is supported.