mistralrs_vision/
ops.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
use candle_core::{Result, Tensor};

/// Pad an image of shape (c, h, w) to (c, max_h, max_w) by padding with zeros on the right and bottom.
pub fn pad(image: &Tensor, max_h: usize, max_w: usize) -> Result<Tensor> {
    let (c, h, w) = image.dims3()?;
    let new_image = Tensor::zeros((c, max_h, max_w), image.dtype(), image.device())?;
    new_image.slice_assign(&[&(..c), &(..h), &(..w)], image)
}

/// Generate pixel mask of shape (c, max_h, max_w). 1 indicates valid pixel, 0 indicates padding.
///
/// The input tensor is of shape (c, max_h, max_w) and the output mask is the same shape and
/// represents where pixels are. The mask shape is in the top left corner is passed as `h` and `w`.
pub fn make_pixel_mask(image: &Tensor, h: usize, w: usize) -> Result<Tensor> {
    let (_c, max_h, max_w) = image.dims3()?;
    let mask = Tensor::ones((h, w), image.dtype(), image.device())?;
    let zeros = Tensor::zeros((max_h, max_w), image.dtype(), image.device())?;
    // TODO(EricLBuehler): https://github.com/huggingface/candle/pull/2223 will make this nicer
    zeros.slice_assign(&[&(..h), &(..w)], &mask)
}

/// Given the image sizes (h, w) and the minimum and maximum lengths, calculate the image dimensions
/// which will preserve aspect ration while respecing the minimum and maximum lengths.
pub fn get_resize_image_size(
    (h, w): (usize, usize),
    (min_len, max_len): (usize, usize),
) -> (usize, usize) {
    let aspect_ratio = w as f64 / h as f64;

    let (new_h, new_w) = if w >= h && w > max_len {
        ((max_len as f64 / aspect_ratio) as usize, max_len)
    } else if h > w && h > max_len {
        (max_len, (max_len as f64 * aspect_ratio) as usize)
    } else {
        (h, w)
    };
    (new_h.max(min_len), new_w.max(min_len))
}