mistralrs_core/scheduler/
mod.rs

1mod default_scheduler;
2
3use std::sync::Arc;
4
5pub use default_scheduler::{DefaultScheduler, DefaultSchedulerMethod, DefaultSchedulerOutput};
6use tokio::sync::Mutex;
7
8use crate::{
9    engine::IntervalLogger,
10    paged_attention::{
11        BlockEngine, BlockTables, CacheConfig, PagedAttentionScheduler,
12        PagedAttentionSchedulerConfig, PagedAttentionSchedulerOutput,
13    },
14    sequence::Sequence,
15};
16
17#[derive(Clone)]
18pub enum SchedulerConfig {
19    DefaultScheduler {
20        method: DefaultSchedulerMethod,
21    },
22    PagedAttentionMeta {
23        max_num_seqs: usize,
24        config: CacheConfig,
25    },
26}
27
28impl SchedulerConfig {
29    pub fn into_scheduler(self) -> Arc<Mutex<dyn Scheduler>> {
30        match self {
31            Self::DefaultScheduler { method } => {
32                Arc::new(Mutex::new(DefaultScheduler::new(method)))
33            }
34            Self::PagedAttentionMeta {
35                max_num_seqs,
36                config,
37            } => Arc::new(Mutex::new(PagedAttentionScheduler::new(
38                PagedAttentionSchedulerConfig { max_num_seqs },
39                config,
40            ))),
41        }
42    }
43}
44
45pub enum SchedulerOutput<'a> {
46    DefaultScheduler {
47        output: DefaultSchedulerOutput<'a>,
48    },
49    PagedAttention {
50        output: PagedAttentionSchedulerOutput,
51    },
52}
53
54pub trait Scheduler: Send + Sync {
55    fn schedule(&mut self, logger: &IntervalLogger) -> SchedulerOutput<'_>;
56    fn waiting_len(&self) -> usize;
57    fn running_len(&self) -> usize;
58    fn add_seq(&mut self, seq: Sequence);
59    /// This may do nothing. It depends on the implementation
60    fn free_finished_sequence_groups(&mut self);
61
62    // PagedAttention metadata
63    fn block_tables(&self) -> Option<BlockTables>;
64    fn block_size(&self) -> Option<usize>;
65    fn block_engine(&self) -> Option<Arc<Mutex<BlockEngine>>>;
66}