mistralrs_core/vision_models/
image_processor.rs1#![allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
2
3use candle_core::{Device, Result, Tensor};
4use image::DynamicImage;
5
6use crate::pipeline::InputsProcessor;
7
8use super::preprocessor_config::PreProcessorConfig;
9
10#[allow(dead_code)]
11pub(crate) struct PreprocessedImages {
12 pub(crate) pixel_values: Tensor,
15 pub(crate) pixel_attention_mask: Option<Tensor>,
17 pub(crate) image_sizes: Option<(usize, usize)>,
19 pub(crate) num_img_tokens: Option<Vec<usize>>,
20 pub(crate) aspect_ratio_ids: Option<Tensor>,
22 pub(crate) aspect_ratio_mask: Option<Tensor>,
24 pub(crate) num_tiles: Option<Vec<usize>>,
26 pub(crate) image_grid_thw: Option<Tensor>,
28 pub(crate) video_grid_thw: Option<Tensor>,
30 pub(crate) rows: Option<Vec<usize>>,
32 pub(crate) cols: Option<Vec<usize>>,
34 pub(crate) pixel_values_list: Option<Vec<Tensor>>,
36 pub(crate) tgt_sizes: Option<Tensor>,
38 pub(crate) image_sizes_all: Option<Vec<(u32, u32)>>,
40 pub(crate) num_crops: Option<Vec<usize>>,
42}
43
44pub trait ImagePreProcessor: InputsProcessor {
46 const DEFAULT_MEAN: [f64; 3];
47 const DEFAULT_STD: [f64; 3];
48
49 #[allow(clippy::too_many_arguments)]
53 fn preprocess(
54 &self,
55 images: Vec<DynamicImage>,
56 videos: Vec<Vec<DynamicImage>>,
57 config: &PreProcessorConfig,
58 device: &Device,
59 batch_info: (usize, usize),
60 ) -> Result<PreprocessedImages>;
61}