Load and run a GGUF model from a local file path
Load and run a GGUF model from a local file path.
Run with: cargo run --release --example gguf_locally -p mistralrs
//! Load and run a GGUF model from a local file path.//!//! Run with: `cargo run --release --example gguf_locally -p mistralrs`
use anyhow::Result;use mistralrs::{ GgufModelBuilder, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,};
#[tokio::main]async fn main() -> Result<()> { // We do not use any files from remote servers here, and instead load the // chat template from the specified file, and the tokenizer and model from a // local GGUF file at the path specified. let model = GgufModelBuilder::new( "gguf_models/mistral_v0.1/", vec!["mistral-7b-instruct-v0.1.Q4_K_M.gguf"], ) .with_chat_template("chat_templates/mistral.json") .with_logging() .with_paged_attn(PagedAttentionMetaBuilder::default().build()?) .build() .await?;
let messages = TextMessages::new().add_message( TextMessageRole::User, "Hello! How are you? Please write generic binary search function in Rust.", );
let response = model.send_chat_request(messages).await?;
println!("{}", response.choices[0].message.content.as_ref().unwrap()); dbg!( response.usage.avg_prompt_tok_per_sec, response.usage.avg_compl_tok_per_sec );
// Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability. let request = RequestBuilder::new().return_logprobs(true).add_message( TextMessageRole::User, "Please write a mathematical equation where a few numbers are added.", );
let response = model.send_chat_request(request).await?;
println!( "Logprobs: {:?}", &response.choices[0] .logprobs .as_ref() .unwrap() .content .as_ref() .unwrap()[0..3] );
Ok(())}Source: mistralrs/examples/getting_started/gguf_locally/main.rs