mistralrs_core/scheduler/
mod.rs1mod default_scheduler;
2
3use std::sync::Arc;
4
5pub use default_scheduler::{DefaultScheduler, DefaultSchedulerMethod, DefaultSchedulerOutput};
6use tokio::sync::Mutex;
7
8use crate::{
9 engine::IntervalLogger,
10 paged_attention::{
11 BlockEngine, BlockTables, CacheConfig, PagedAttentionScheduler,
12 PagedAttentionSchedulerConfig, PagedAttentionSchedulerOutput,
13 },
14 sequence::Sequence,
15};
16
17#[derive(Clone)]
18pub enum SchedulerConfig {
19 DefaultScheduler {
20 method: DefaultSchedulerMethod,
21 },
22 PagedAttentionMeta {
23 max_num_seqs: usize,
24 config: CacheConfig,
25 },
26}
27
28impl SchedulerConfig {
29 pub fn into_scheduler(self) -> Arc<Mutex<dyn Scheduler>> {
30 match self {
31 Self::DefaultScheduler { method } => {
32 Arc::new(Mutex::new(DefaultScheduler::new(method)))
33 }
34 Self::PagedAttentionMeta {
35 max_num_seqs,
36 config,
37 } => Arc::new(Mutex::new(PagedAttentionScheduler::new(
38 PagedAttentionSchedulerConfig { max_num_seqs },
39 config,
40 ))),
41 }
42 }
43}
44
45pub enum SchedulerOutput<'a> {
46 DefaultScheduler {
47 output: DefaultSchedulerOutput<'a>,
48 },
49 PagedAttention {
50 output: PagedAttentionSchedulerOutput,
51 },
52}
53
54pub trait Scheduler: Send + Sync {
55 fn schedule(&mut self, logger: &IntervalLogger) -> SchedulerOutput<'_>;
56 fn waiting_len(&self) -> usize;
57 fn running_len(&self) -> usize;
58 fn add_seq(&mut self, seq: Sequence);
59 fn free_finished_sequence_groups(&mut self);
61
62 fn block_tables(&self) -> Option<BlockTables>;
64 fn block_size(&self) -> Option<usize>;
65 fn block_engine(&self) -> Option<Arc<Mutex<BlockEngine>>>;
66}