mistralrs_core/vision_models/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
use std::any::Any;

use candle_core::Tensor;

pub(crate) mod clip;
pub(crate) mod idefics2;
pub(crate) use idefics2::idefics2_input_processor;
pub(crate) mod image_processor;
pub(crate) mod llava;
pub(crate) mod mllama;
pub(crate) mod phi3;
pub(crate) use phi3::phi3_inputs_processor;
pub(crate) mod preprocessor_config;
pub(crate) mod processor_config;
pub(crate) mod qwen2vl;
pub(crate) use llava::llava15;
pub(crate) use llava::llava_inputs_processor;
pub(crate) use llava::llava_next;
pub(crate) use llava::llava_next_inputs_processor;
pub(crate) mod idefics3;

use crate::pipeline::text_models_inputs_processor::{FlashParams, PagedAttentionInputMetadata};

pub struct ModelInputs {
    pub input_ids: Tensor,
    pub seqlen_offsets: Vec<usize>,
    pub seqlen_offsets_kernel: Tensor,
    pub context_lens: Vec<(usize, usize)>,
    pub position_ids: Vec<usize>,
    pub pixel_values: Option<Tensor>,
    pub model_specific_args: Box<dyn Any>,
    pub paged_attn_meta: Option<PagedAttentionInputMetadata>,
    pub flash_meta: FlashParams,
}