Skip to content

Load and run a GGUF model from a local file path

Load and run a GGUF model from a local file path.

Run with: cargo run --release --example gguf_locally -p mistralrs

//! Load and run a GGUF model from a local file path.
//!
//! Run with: `cargo run --release --example gguf_locally -p mistralrs`
use anyhow::Result;
use mistralrs::{
GgufModelBuilder, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,
};
#[tokio::main]
async fn main() -> Result<()> {
// We do not use any files from remote servers here, and instead load the
// chat template from the specified file, and the tokenizer and model from a
// local GGUF file at the path specified.
let model = GgufModelBuilder::new(
"gguf_models/mistral_v0.1/",
vec!["mistral-7b-instruct-v0.1.Q4_K_M.gguf"],
)
.with_chat_template("chat_templates/mistral.json")
.with_logging()
.with_paged_attn(PagedAttentionMetaBuilder::default().build()?)
.build()
.await?;
let messages = TextMessages::new().add_message(
TextMessageRole::User,
"Hello! How are you? Please write generic binary search function in Rust.",
);
let response = model.send_chat_request(messages).await?;
println!("{}", response.choices[0].message.content.as_ref().unwrap());
dbg!(
response.usage.avg_prompt_tok_per_sec,
response.usage.avg_compl_tok_per_sec
);
// Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
let request = RequestBuilder::new().return_logprobs(true).add_message(
TextMessageRole::User,
"Please write a mathematical equation where a few numbers are added.",
);
let response = model.send_chat_request(request).await?;
println!(
"Logprobs: {:?}",
&response.choices[0]
.logprobs
.as_ref()
.unwrap()
.content
.as_ref()
.unwrap()[0..3]
);
Ok(())
}

Source: mistralrs/examples/getting_started/gguf_locally/main.rs