Skip to content

text_models

Runnable Rust SDK example text_models.

/// Unified text model example.
///
/// Change `MODEL_ID` to run any supported text model. Tested model IDs:
///
/// | Model | MODEL_ID |
/// |------------------------------|---------------------------------------------|
/// | Phi-3.5 Mini | `microsoft/Phi-3.5-mini-instruct` |
/// | Phi-3.5 MoE | `microsoft/Phi-3.5-MoE-instruct` |
/// | Gemma 2 | `google/gemma-2-9b-it` |
/// | DeepSeek-R1 | `deepseek-ai/DeepSeek-R1` |
/// | DeepSeek-V2-Lite | `deepseek-ai/DeepSeek-V2-Lite` |
/// | SmolLM3 | `HuggingFaceTB/SmolLM3-3B` |
/// | GPT-OSS | `openai/gpt-oss-20b` |
/// | Granite | `ibm-granite/granite-4.0-tiny-preview` |
/// | GLM-4 MoE | `zai-org/GLM-4.7-Flash` |
/// | Qwen3 (thinking mode) | `Qwen/Qwen3-30B-A3B` |
/// | Llama 3.3 | `meta-llama/Llama-3.3-70B-Instruct` |
///
/// Run with: `cargo run --release --example text_models -p mistralrs`
use anyhow::Result;
use mistralrs::{IsqBits, ModelBuilder, PagedAttentionMetaBuilder, TextMessageRole, TextMessages};
const MODEL_ID: &str = "Qwen/Qwen3-4B";
#[tokio::main]
async fn main() -> Result<()> {
let model = ModelBuilder::new(MODEL_ID)
.with_auto_isq(IsqBits::Four)
.with_logging()
.with_paged_attn(PagedAttentionMetaBuilder::default().build()?)
.build()
.await?;
let messages = TextMessages::new()
.add_message(
TextMessageRole::System,
"You are an AI agent with a specialty in programming.",
)
.add_message(
TextMessageRole::User,
"Hello! How are you? Please write generic binary search function in Rust.",
);
let response = model.send_chat_request(messages).await?;
println!("{}", response.choices[0].message.content.as_ref().unwrap());
dbg!(
response.usage.avg_prompt_tok_per_sec,
response.usage.avg_compl_tok_per_sec
);
// ---------------------------------------------------------------
// Thinking mode (Qwen3, DeepSeek-R1, and other reasoning models)
//
// Thinking is enabled by default. You can toggle it with /think
// and /no_think tags in the user message, or via
// `TextMessages::enable_thinking(bool)`.
// ---------------------------------------------------------------
// Uncomment the block below to demo thinking mode toggling:
//
// let mut msgs = TextMessages::new();
// msgs = msgs.add_message(TextMessageRole::User, "How many rs in strawberry?");
// let resp = model.send_chat_request(msgs.clone()).await?;
// println!("{}", resp.choices[0].message.content.as_ref().unwrap());
//
// msgs = msgs
// .add_message(TextMessageRole::Assistant, resp.choices[0].message.content.as_ref().unwrap())
// .add_message(TextMessageRole::User, "How many rs in blueberry? /no_think");
// let resp = model.send_chat_request(msgs).await?;
// println!("{}", resp.choices[0].message.content.as_ref().unwrap());
Ok(())
}

Source: mistralrs/examples/models/text_models/main.rs