pub struct SpeculativePipeline { /* private fields */ }
Expand description
Speculative decoding pipeline: https://arxiv.org/pdf/2211.17192
§Algorithm
Given draft model q and target model p with probability distributions
q_i(x) and p_i(x) for each token
- Keep the sample for token i if q_i(x) <= p_i(x)
- This means the target model agrees
- Else (q_i(x) > p_i(x)) accept that token with prob p_i(x)/q_i(x)
- If rejected, sample token from from p’_i(x) = norm(max(0, p(x) − q(x))) and do not take any more’
Implementations§
source§impl SpeculativePipeline
impl SpeculativePipeline
pub fn new( target: Arc<Mutex<dyn Pipeline>>, draft: Arc<Mutex<dyn Pipeline>>, config: SpeculativeConfig, ) -> Result<SpeculativePipeline, Error>
Trait Implementations§
source§impl Pipeline for SpeculativePipeline
impl Pipeline for SpeculativePipeline
fn forward_inputs( &mut self, _inputs: Box<dyn Any>, _return_raw_logits: bool, ) -> Result<ForwardInputsResult, Error>
fn sample_causal_gen<'life0, 'life1, 'life2, 'life3, 'async_trait>(
&'life0 self,
_seqs: &'life1 mut [&'life2 mut Sequence],
_logits: Vec<Tensor>,
_prefix_cacher: &'life3 mut PrefixCacheManagerV2,
_disable_eos_stop: bool,
_rng: Arc<Mutex<Isaac64Rng>>,
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
SpeculativePipeline: 'async_trait,
source§fn step<'life0, 'life1, 'life2, 'life3, 'life4, 'async_trait>(
&'life0 mut self,
input_seqs: &'life1 mut [&'life2 mut Sequence],
is_prompt: bool,
_return_raw_logits: bool,
prefix_cacher: &'life3 mut PrefixCacheManagerV2,
disable_eos_stop: bool,
rng: Arc<Mutex<Isaac64Rng>>,
backend_metadata: CacheBackendMetadata<'life4>,
) -> Pin<Box<dyn Future<Output = Result<Duration, Error>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
'life4: 'async_trait,
SpeculativePipeline: 'async_trait,
fn step<'life0, 'life1, 'life2, 'life3, 'life4, 'async_trait>(
&'life0 mut self,
input_seqs: &'life1 mut [&'life2 mut Sequence],
is_prompt: bool,
_return_raw_logits: bool,
prefix_cacher: &'life3 mut PrefixCacheManagerV2,
disable_eos_stop: bool,
rng: Arc<Mutex<Isaac64Rng>>,
backend_metadata: CacheBackendMetadata<'life4>,
) -> Pin<Box<dyn Future<Output = Result<Duration, Error>> + Send + 'async_trait>>where
'life0: 'async_trait,
'life1: 'async_trait,
'life2: 'async_trait,
'life3: 'async_trait,
'life4: 'async_trait,
SpeculativePipeline: 'async_trait,
Returns the total of model execution time.
fn category(&self) -> ModelCategory
Auto Trait Implementations§
impl Freeze for SpeculativePipeline
impl !RefUnwindSafe for SpeculativePipeline
impl Send for SpeculativePipeline
impl Sync for SpeculativePipeline
impl Unpin for SpeculativePipeline
impl !UnwindSafe for SpeculativePipeline
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
§impl<T> Downcast for Twhere
T: AsAny + ?Sized,
impl<T> Downcast for Twhere
T: AsAny + ?Sized,
§fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
Forward to the method defined on the type
Any
.§fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
Forward to the method defined on the type
Any
.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more