Skip to content

Load a pre-quantized UQFF multimodal model

Load a pre-quantized UQFF multimodal model.

Run with: cargo run --release --example uqff_multimodal -p mistralrs

//! Load a pre-quantized UQFF multimodal model.
//!
//! Run with: `cargo run --release --example uqff_multimodal -p mistralrs`
use anyhow::Result;
use mistralrs::{IsqBits, MultimodalMessages, TextMessageRole, UqffMultimodalModelBuilder};
#[tokio::main]
async fn main() -> Result<()> {
let model = UqffMultimodalModelBuilder::new(
"EricB/Phi-3.5-vision-instruct-UQFF",
vec!["phi3.5-vision-instruct-q8_0.uqff".into()],
)
.into_inner()
.with_auto_isq(IsqBits::Four)
.with_logging()
.build()
.await?;
let bytes = match reqwest::blocking::get(
"https://cdn.britannica.com/45/5645-050-B9EC0205/head-treasure-flower-disk-flowers-inflorescence-ray.jpg",
) {
Ok(http_resp) => http_resp.bytes()?.to_vec(),
Err(e) => anyhow::bail!(e),
};
let image = image::load_from_memory(&bytes)?;
let messages = MultimodalMessages::new().add_image_message(
TextMessageRole::User,
"What is depicted here? Please describe the scene in detail.",
vec![image],
);
let response = model.send_chat_request(messages).await?;
println!("{}", response.choices[0].message.content.as_ref().unwrap());
dbg!(
response.usage.avg_prompt_tok_per_sec,
response.usage.avg_compl_tok_per_sec
);
Ok(())
}

Source: mistralrs/examples/quantization/uqff_multimodal/main.rs