mistralrs_core/dummy_paged_attention/
cache_engine.rs1use std::{
2 collections::HashMap,
3 sync::{Arc, Mutex, MutexGuard},
4};
5
6use candle_core::{DType, Device, Result, Tensor};
7
8use super::config::ModelConfigLike;
9
10#[derive(Clone, Debug)]
11pub struct CacheConfig {
12 pub block_size: usize,
13 pub num_gpu_blocks: usize,
14 pub num_cpu_blocks: usize,
15}
16
17pub type KVCache = (Tensor, Tensor);
18
19pub struct CacheEngine {
20 dummy_cache: Arc<Mutex<Vec<KVCache>>>,
21}
22
23impl CacheEngine {
24 pub fn new(
25 _model_config: &dyn ModelConfigLike,
26 _cache_config: &CacheConfig,
27 _dtype: DType,
28 _device: &Device,
29 _layer_devices: Vec<Option<Device>>,
30 ) -> Result<Self> {
31 Ok(Self {
32 dummy_cache: Arc::new(Mutex::new(Vec::new())),
33 })
34 }
35
36 pub fn get_kv_cache(&self) -> MutexGuard<'_, Vec<KVCache>> {
37 loop {
38 if let Ok(v) = self.dummy_cache.try_lock() {
39 return v;
40 }
41 }
42 }
43}
44
45impl CacheEngine {
46 pub fn execute_scheduler_ops(
47 &self,
48 blocks_to_swap_in: HashMap<usize, usize>,
49 blocks_to_swap_out: HashMap<usize, usize>,
50 blocks_to_copy: HashMap<usize, Vec<usize>>,
51 ) -> Result<()> {
52 if !blocks_to_swap_in.is_empty() {
53 self.swap_in(blocks_to_swap_in)?;
54 }
55 if !blocks_to_swap_out.is_empty() {
56 self.swap_out(blocks_to_swap_out)?;
57 }
58 if !blocks_to_copy.is_empty() {
59 self.copy(blocks_to_copy)?;
60 }
61 Ok(())
62 }
63
64 pub fn swap_in(&self, _src_to_dst: HashMap<usize, usize>) -> Result<()> {
65 Ok(())
66 }
67
68 pub fn swap_out(&self, _src_to_dst: HashMap<usize, usize>) -> Result<()> {
69 Ok(())
70 }
71
72 pub fn copy(&self, _src_to_dst: HashMap<usize, Vec<usize>>) -> Result<()> {
73 Ok(())
74 }
75}