mistralrs_core/dummy_paged_attention/
cache_engine.rs

1use std::{
2    collections::HashMap,
3    sync::{Arc, Mutex, MutexGuard},
4};
5
6use candle_core::{DType, Device, Result, Tensor};
7
8use super::config::ModelConfigLike;
9
10#[derive(Clone, Debug)]
11pub struct CacheConfig {
12    pub block_size: usize,
13    pub num_gpu_blocks: usize,
14    pub num_cpu_blocks: usize,
15}
16
17pub type KVCache = (Tensor, Tensor);
18
19pub struct CacheEngine {
20    dummy_cache: Arc<Mutex<Vec<KVCache>>>,
21}
22
23impl CacheEngine {
24    pub fn new(
25        _model_config: &dyn ModelConfigLike,
26        _cache_config: &CacheConfig,
27        _dtype: DType,
28        _device: &Device,
29        _layer_devices: Vec<Option<Device>>,
30    ) -> Result<Self> {
31        Ok(Self {
32            dummy_cache: Arc::new(Mutex::new(Vec::new())),
33        })
34    }
35
36    pub fn get_kv_cache(&self) -> MutexGuard<'_, Vec<KVCache>> {
37        loop {
38            if let Ok(v) = self.dummy_cache.try_lock() {
39                return v;
40            }
41        }
42    }
43}
44
45impl CacheEngine {
46    pub fn execute_scheduler_ops(
47        &self,
48        blocks_to_swap_in: HashMap<usize, usize>,
49        blocks_to_swap_out: HashMap<usize, usize>,
50        blocks_to_copy: HashMap<usize, Vec<usize>>,
51    ) -> Result<()> {
52        if !blocks_to_swap_in.is_empty() {
53            self.swap_in(blocks_to_swap_in)?;
54        }
55        if !blocks_to_swap_out.is_empty() {
56            self.swap_out(blocks_to_swap_out)?;
57        }
58        if !blocks_to_copy.is_empty() {
59            self.copy(blocks_to_copy)?;
60        }
61        Ok(())
62    }
63
64    pub fn swap_in(&self, _src_to_dst: HashMap<usize, usize>) -> Result<()> {
65        Ok(())
66    }
67
68    pub fn swap_out(&self, _src_to_dst: HashMap<usize, usize>) -> Result<()> {
69        Ok(())
70    }
71
72    pub fn copy(&self, _src_to_dst: HashMap<usize, Vec<usize>>) -> Result<()> {
73        Ok(())
74    }
75}
mistralrs_core/dummy_paged_attention/cache_engine.rs

mistralrs_core/dummy_paged_attention/
cache_engine.rs