mistralrs_core/vision_models/
preprocessor_config.rs

1use std::collections::HashMap;
2
3use candle_core::Result;
4use image::imageops::FilterType;
5use serde::Deserialize;
6
7#[derive(Deserialize, Debug, Clone, Default)]
8#[allow(dead_code)]
9pub struct PreProcessorConfig {
10    pub(crate) do_convert_rgb: Option<bool>,
11    pub(crate) do_image_splitting: Option<bool>,
12    pub(crate) do_normalize: Option<bool>,
13    pub(crate) do_pad: Option<bool>,
14    pub(crate) do_rescale: Option<bool>,
15    pub(crate) do_resize: Option<bool>,
16    pub(crate) do_center_crop: Option<bool>,
17    #[serde(alias = "norm_mean")]
18    pub(crate) image_mean: Option<[f64; 3]>,
19    #[serde(alias = "norm_std")]
20    pub(crate) image_std: Option<[f64; 3]>,
21    pub(crate) rescale_factor: Option<f64>,
22    #[serde(alias = "resample")]
23    pub(crate) resampling: Option<usize>,
24    pub(crate) max_image_size: Option<HashMap<String, u32>>,
25    pub(crate) size: Option<HashMap<String, u32>>,
26    pub(crate) crop_size: Option<HashMap<String, u32>>,
27    pub(crate) num_img_tokens: Option<usize>,
28    pub(crate) num_crops: Option<usize>,
29    pub(crate) max_image_tiles: Option<usize>,
30    pub(crate) min_pixels: Option<usize>,
31    pub(crate) max_pixels: Option<usize>,
32    pub(crate) patch_size: Option<usize>,
33    pub(crate) merge_size: Option<usize>,
34    pub(crate) temporal_patch_size: Option<usize>,
35    pub(crate) max_slice_nums: Option<usize>,
36    pub(crate) scale_resolution: Option<usize>,
37    pub(crate) image_feature_size: Option<usize>,
38    pub(crate) use_image_id: Option<bool>,
39    pub(crate) slice_mode: Option<bool>,
40    pub(crate) im_start_token: Option<String>,
41    pub(crate) slice_start_token: Option<String>,
42    pub(crate) unk_token: Option<String>,
43    pub(crate) im_end_token: Option<String>,
44    pub(crate) slice_end_token: Option<String>,
45    pub(crate) im_id_start: Option<String>,
46    pub(crate) im_id_end: Option<String>,
47    pub(crate) dynamic_hd: Option<usize>,
48    #[serde(alias = "image_seq_length")]
49    pub(crate) image_seq_len: Option<usize>,
50    pub(crate) pan_and_scan_min_crop_size: Option<usize>,
51    pub(crate) pan_and_scan_max_num_crops: Option<usize>,
52    pub(crate) pan_and_scan_min_ratio_to_activate: Option<f64>,
53    pub(crate) do_pan_and_scan: Option<bool>,
54    pub(crate) default_to_square: Option<bool>,
55}
56
57#[allow(dead_code)]
58pub(crate) trait ToFilter {
59    fn to_filter(self) -> Result<FilterType>;
60}
61
62impl ToFilter for Option<usize> {
63    // https://github.com/python-pillow/Pillow/blob/4b68563e8a818fb9c528fa159ddf3f4eaefa35e6/src/PIL/Image.py#L164-L170
64    // Default: https://github.com/huggingface/transformers/blob/0df888ffb72ea370555efdef45985378d3cc7b2b/src/transformers/models/idefics2/image_processing_idefics2.py#L226
65    fn to_filter(self) -> Result<FilterType> {
66        match self {
67            Some(0) => Ok(FilterType::Nearest),
68            Some(1) => Ok(FilterType::Lanczos3),
69            Some(2) | None => Ok(FilterType::Triangle), // BiLinear
70            Some(3) => Ok(FilterType::CatmullRom),      // BiCubic
71            Some(4) => Ok(FilterType::Nearest),
72            Some(x) => candle_core::bail!("Filter number {x} not supported"),
73        }
74    }
75}