Load a pre-quantized UQFF multimodal model

Load a pre-quantized UQFF multimodal model.

Run with: cargo run --release --example uqff_multimodal -p mistralrs

//! Load a pre-quantized UQFF multimodal model.
//!
//! Run with: `cargo run --release --example uqff_multimodal -p mistralrs`

use anyhow::Result;
use mistralrs::{IsqBits, MultimodalMessages, TextMessageRole, UqffMultimodalModelBuilder};

#[tokio::main]
async fn main() -> Result<()> {
    let model = UqffMultimodalModelBuilder::new(
        "EricB/Phi-3.5-vision-instruct-UQFF",
        vec!["phi3.5-vision-instruct-q8_0.uqff".into()],
    )
    .into_inner()
    .with_auto_isq(IsqBits::Four)
    .with_logging()
    .build()
    .await?;

    let bytes = match reqwest::blocking::get(
        "https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
    ) {
        Ok(http_resp) => http_resp.bytes()?.to_vec(),
        Err(e) => anyhow::bail!(e),
    };
    let image = image::load_from_memory(&bytes)?;

    let messages = MultimodalMessages::new().add_image_message(
        TextMessageRole::User,
        "What is depicted here? Please describe the scene in detail.",
        vec![image],
    );

    let response = model.send_chat_request(messages).await?;

    println!("{}", response.choices[0].message.content.as_ref().unwrap());
    dbg!(
        response.usage.avg_prompt_tok_per_sec,
        response.usage.avg_compl_tok_per_sec
    );

    Ok(())
}

Source: mistralrs/examples/quantization/uqff_multimodal/main.rs