mistralrs_core/scheduler/
mod.rs1mod default_scheduler;
2
3use std::sync::Arc;
4
5pub use default_scheduler::{DefaultScheduler, DefaultSchedulerMethod, DefaultSchedulerOutput};
6use tokio::sync::Mutex;
7
8use crate::{
9 paged_attention::{
10 BlockEngine, BlockTables, CacheConfig, PagedAttentionScheduler,
11 PagedAttentionSchedulerConfig, PagedAttentionSchedulerOutput,
12 },
13 sequence::Sequence,
14};
15
16#[derive(Clone)]
17pub enum SchedulerConfig {
18 DefaultScheduler {
19 method: DefaultSchedulerMethod,
20 },
21 PagedAttentionMeta {
22 max_num_seqs: usize,
23 config: CacheConfig,
24 },
25}
26
27impl SchedulerConfig {
28 pub fn into_scheduler(self) -> Arc<Mutex<dyn Scheduler>> {
29 match self {
30 Self::DefaultScheduler { method } => {
31 Arc::new(Mutex::new(DefaultScheduler::new(method)))
32 }
33 Self::PagedAttentionMeta {
34 max_num_seqs,
35 config,
36 } => Arc::new(Mutex::new(PagedAttentionScheduler::new(
37 PagedAttentionSchedulerConfig { max_num_seqs },
38 config,
39 ))),
40 }
41 }
42}
43
44pub enum SchedulerOutput<'a> {
45 DefaultScheduler {
46 output: DefaultSchedulerOutput<'a>,
47 },
48 PagedAttention {
49 output: PagedAttentionSchedulerOutput,
50 },
51}
52
53pub trait Scheduler: Send + Sync {
54 fn schedule(&mut self) -> SchedulerOutput<'_>;
55 fn waiting_len(&self) -> usize;
56 fn running_len(&self) -> usize;
57 fn add_seq(&mut self, seq: Sequence);
58 fn free_finished_sequence_groups(&mut self);
60
61 fn block_tables(&self) -> Option<&BlockTables>;
63 fn block_size(&self) -> Option<usize>;
64 fn block_engine(&mut self) -> Option<&mut BlockEngine>;
65}