pub struct AfqLayer { /* private fields */ }
Implementations§
Source§impl AfqLayer
impl AfqLayer
pub fn get_isq_type_from_uqff(data: Cow<'_, [u8]>) -> Result<IsqType>
pub fn afq_linear_b( in_dim: usize, out_dim: usize, config: &QuantizedConfig, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
pub fn afq_packed_linear_b( num_local_experts: usize, in_dim: usize, out_dim: usize, config: &QuantizedConfig, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
Trait Implementations§
Source§impl QuantMethod for AfqLayer
impl QuantMethod for AfqLayer
fn new(method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
fn dequantize_w(&self) -> Result<Tensor>
Source§fn forward(&self, x: &Tensor) -> Result<Tensor>
fn forward(&self, x: &Tensor) -> Result<Tensor>
Compute matmul of
self
and a
. self
should contain the weights.Source§fn gather_forward(&self, x: &Tensor, indices: &Tensor) -> Result<Tensor>
fn gather_forward(&self, x: &Tensor, indices: &Tensor) -> Result<Tensor>
Source§fn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
Source§fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
Add a delta weight from LoRA to the weights. This should be prescaled with alpha.
Source§fn dtype_and_device(&self) -> (DType, Device)
fn dtype_and_device(&self) -> (DType, Device)
Weight dtype and device
Source§fn apply_isq(
self: Arc<Self>,
_dtype: Option<IsqType>,
_device: Device,
_n_quantized: &AtomicUsize,
_imatrix_weight: Option<Vec<f32>>,
_guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>
fn apply_isq( self: Arc<Self>, _dtype: Option<IsqType>, _device: Device, _n_quantized: &AtomicUsize, _imatrix_weight: Option<Vec<f32>>, _guard: QuantizeOntoGuard, ) -> Result<Arc<dyn QuantMethod>>
If the quant is backed by a qmatmul.
Source§fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
fn forward_autocast(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of
self
and a
. self
should contain the weights.
Automatically cast to required quantization activation type and backSource§fn gather_forward_autocast(
&self,
a: &Tensor,
indices: &Tensor,
) -> Result<Tensor>
fn gather_forward_autocast( &self, a: &Tensor, indices: &Tensor, ) -> Result<Tensor>
Compute matmul of
self
and a
. self
should contain the weights.
Automatically cast to required quantization activation type and back. Read morefn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>
Source§fn begin_track_stats(&mut self) -> Result<()>
fn begin_track_stats(&mut self) -> Result<()>
Begin tracking stats into an ImatrixLayerStats
Source§fn end_track_stats(&self) -> Result<Tensor>
fn end_track_stats(&self) -> Result<Tensor>
End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.
fn is_distributed(&self) -> Option<DistributedKind>
Source§impl QuantizedSerde for AfqLayer
impl QuantizedSerde for AfqLayer
fn name(&self) -> &'static str
fn isq_serde_supported(&self) -> bool
Source§fn serialize_with_bias(&self, bias: Option<Tensor>) -> Result<Cow<'_, [u8]>>
fn serialize_with_bias(&self, bias: Option<Tensor>) -> Result<Cow<'_, [u8]>>
NOT meant for external calling
fn deserialize(
data: Cow<'_, [u8]>,
device: &Device,
_comm: &Arc<Comm>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>where
Self: Sized,
fn deserialize_ext_bias(
data: Cow<'_, [u8]>,
device: &Device,
guard: QuantizeOntoGuard,
) -> Result<(Arc<dyn QuantMethod>, Option<Tensor>)>where
Self: Sized,
fn serialize(&self) -> Result<Cow<'_, [u8]>>
Auto Trait Implementations§
impl Freeze for AfqLayer
impl !RefUnwindSafe for AfqLayer
impl Send for AfqLayer
impl Sync for AfqLayer
impl Unpin for AfqLayer
impl !UnwindSafe for AfqLayer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more