mistralrs/lib.rs
1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! ## Example
14//! ```no_run
15//! use anyhow::Result;
16//! use mistralrs::{
17//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
18//! };
19//!
20//! #[tokio::main]
21//! async fn main() -> Result<()> {
22//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
23//! .with_isq(IsqType::Q8_0)
24//! .with_logging()
25//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
26//! .build()
27//! .await?;
28//!
29//! let messages = TextMessages::new()
30//! .add_message(
31//! TextMessageRole::System,
32//! "You are an AI agent with a specialty in programming.",
33//! )
34//! .add_message(
35//! TextMessageRole::User,
36//! "Hello! How are you? Please write generic binary search function in Rust.",
37//! );
38//!
39//! let response = model.send_chat_request(messages).await?;
40//!
41//! println!("{}", response.choices[0].message.content.as_ref().unwrap());
42//! dbg!(
43//! response.usage.avg_prompt_tok_per_sec,
44//! response.usage.avg_compl_tok_per_sec
45//! );
46//!
47//! Ok(())
48//! }
49//! ```
50//!
51//! ## Streaming example
52//! ```no_run
53//! use anyhow::Result;
54//! use mistralrs::{
55//! IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
56//! TextModelBuilder,
57//! };
58//! use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
59//!
60//! #[tokio::main]
61//! async fn main() -> Result<()> {
62//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
63//! .with_isq(IsqType::Q8_0)
64//! .with_logging()
65//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
66//! .build()
67//! .await?;
68//!
69//! let messages = TextMessages::new()
70//! .add_message(
71//! TextMessageRole::System,
72//! "You are an AI agent with a specialty in programming.",
73//! )
74//! .add_message(
75//! TextMessageRole::User,
76//! "Hello! How are you? Please write generic binary search function in Rust.",
77//! );
78//!
79//! let mut stream = model.stream_chat_request(messages).await?;
80
81//! while let Some(chunk) = stream.next().await {
82//! if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
83//! if let Some(ChunkChoice {
84//! delta:
85//! Delta {
86//! content: Some(content),
87//! ..
88//! },
89//! ..
90//! }) = choices.first()
91//! {
92//! print!("{}", content);
93//! };
94//! }
95//! }
96//! Ok(())
97//! }
98//! ```
99//!
100//! ## MCP example
101//!
102//! The MCP client integrates seamlessly with mistral.rs model builders:
103//!
104//! ```rust,no_run
105//! use mistralrs::{TextModelBuilder, IsqType};
106//! use mistralrs_core::mcp_client::{McpClientConfig, McpServerConfig, McpServerSource};
107//!
108//! #[tokio::main]
109//! async fn main() -> anyhow::Result<()> {
110//! let mcp_config = McpClientConfig {
111//! servers: vec![/* your server configs */],
112//! auto_register_tools: true,
113//! tool_timeout_secs: Some(30),
114//! max_concurrent_calls: Some(5),
115//! };
116//!
117//! let model = TextModelBuilder::new("path/to/model".to_string())
118//! .with_isq(IsqType::Q8_0)
119//! .with_mcp_client(mcp_config) // MCP tools automatically registered
120//! .build()
121//! .await?;
122//!
123//! // MCP tools are now available for automatic tool calling
124//! Ok(())
125//! }
126//! ```
127
128mod anymoe;
129mod diffusion_model;
130mod gguf;
131mod gguf_lora_model;
132mod gguf_xlora_model;
133mod lora_model;
134mod messages;
135mod model;
136mod speculative;
137mod speech_model;
138mod text_model;
139mod vision_model;
140mod xlora_model;
141
142pub use anymoe::AnyMoeModelBuilder;
143pub use diffusion_model::DiffusionModelBuilder;
144pub use gguf::GgufModelBuilder;
145pub use gguf_lora_model::GgufLoraModelBuilder;
146pub use gguf_xlora_model::GgufXLoraModelBuilder;
147pub use lora_model::LoraModelBuilder;
148pub use messages::{RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages};
149pub use mistralrs_core::{
150 McpClient, McpClientConfig, McpServerConfig, McpServerSource, McpToolInfo,
151};
152pub use mistralrs_core::{SearchCallback, SearchResult, ToolCallback};
153pub use model::{best_device, Model};
154pub use speculative::TextSpeculativeBuilder;
155pub use speech_model::SpeechModelBuilder;
156pub use text_model::{PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder};
157pub use vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
158pub use xlora_model::XLoraModelBuilder;
159
160pub use candle_core::{DType, Device, Result, Tensor};
161pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
162pub use mistralrs_core::*;