mistralrs_core/scheduler/
mod.rs

1mod default_scheduler;
2
3use std::sync::Arc;
4
5pub use default_scheduler::{DefaultScheduler, DefaultSchedulerMethod, DefaultSchedulerOutput};
6use tokio::sync::Mutex;
7
8use crate::{
9    paged_attention::{
10        BlockEngine, BlockTables, CacheConfig, PagedAttentionScheduler,
11        PagedAttentionSchedulerConfig, PagedAttentionSchedulerOutput,
12    },
13    sequence::Sequence,
14};
15
16#[derive(Clone)]
17pub enum SchedulerConfig {
18    DefaultScheduler {
19        method: DefaultSchedulerMethod,
20    },
21    PagedAttentionMeta {
22        max_num_seqs: usize,
23        config: CacheConfig,
24    },
25}
26
27impl SchedulerConfig {
28    pub fn into_scheduler(self) -> Arc<Mutex<dyn Scheduler>> {
29        match self {
30            Self::DefaultScheduler { method } => {
31                Arc::new(Mutex::new(DefaultScheduler::new(method)))
32            }
33            Self::PagedAttentionMeta {
34                max_num_seqs,
35                config,
36            } => Arc::new(Mutex::new(PagedAttentionScheduler::new(
37                PagedAttentionSchedulerConfig { max_num_seqs },
38                config,
39            ))),
40        }
41    }
42}
43
44pub enum SchedulerOutput<'a> {
45    DefaultScheduler {
46        output: DefaultSchedulerOutput<'a>,
47    },
48    PagedAttention {
49        output: PagedAttentionSchedulerOutput,
50    },
51}
52
53pub trait Scheduler: Send + Sync {
54    fn schedule(&mut self) -> SchedulerOutput<'_>;
55    fn waiting_len(&self) -> usize;
56    fn running_len(&self) -> usize;
57    fn add_seq(&mut self, seq: Sequence);
58    /// This may do nothing. It depends on the implementation
59    fn free_finished_sequence_groups(&mut self);
60
61    // PagedAttention metadata
62    fn block_tables(&self) -> Option<&BlockTables>;
63    fn block_size(&self) -> Option<usize>;
64    fn block_engine(&mut self) -> Option<&mut BlockEngine>;
65}