mistralrs/lib.rs
1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! For loading multiple models simultaneously, use [`MultiModelBuilder`].
14//! The returned [`Model`] supports `_with_model` method variants and runtime
15//! model management (unload/reload).
16//!
17//! ## Example
18//! ```no_run
19//! use anyhow::Result;
20//! use mistralrs::{
21//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
22//! };
23//!
24//! #[tokio::main]
25//! async fn main() -> Result<()> {
26//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
27//! .with_isq(IsqType::Q8_0)
28//! .with_logging()
29//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
30//! .build()
31//! .await?;
32//!
33//! let messages = TextMessages::new()
34//! .add_message(
35//! TextMessageRole::System,
36//! "You are an AI agent with a specialty in programming.",
37//! )
38//! .add_message(
39//! TextMessageRole::User,
40//! "Hello! How are you? Please write generic binary search function in Rust.",
41//! );
42//!
43//! let response = model.send_chat_request(messages).await?;
44//!
45//! println!("{}", response.choices[0].message.content.as_ref().unwrap());
46//! dbg!(
47//! response.usage.avg_prompt_tok_per_sec,
48//! response.usage.avg_compl_tok_per_sec
49//! );
50//!
51//! Ok(())
52//! }
53//! ```
54//!
55//! ## Streaming example
56//! ```no_run
57//! use anyhow::Result;
58//! use mistralrs::{
59//! IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
60//! TextModelBuilder,
61//! };
62//! use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
63//!
64//! #[tokio::main]
65//! async fn main() -> Result<()> {
66//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
67//! .with_isq(IsqType::Q8_0)
68//! .with_logging()
69//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
70//! .build()
71//! .await?;
72//!
73//! let messages = TextMessages::new()
74//! .add_message(
75//! TextMessageRole::System,
76//! "You are an AI agent with a specialty in programming.",
77//! )
78//! .add_message(
79//! TextMessageRole::User,
80//! "Hello! How are you? Please write generic binary search function in Rust.",
81//! );
82//!
83//! let mut stream = model.stream_chat_request(messages).await?;
84
85//! while let Some(chunk) = stream.next().await {
86//! if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
87//! if let Some(ChunkChoice {
88//! delta:
89//! Delta {
90//! content: Some(content),
91//! ..
92//! },
93//! ..
94//! }) = choices.first()
95//! {
96//! print!("{}", content);
97//! };
98//! }
99//! }
100//! Ok(())
101//! }
102//! ```
103//!
104//! ## MCP example
105//!
106//! The MCP client integrates seamlessly with mistral.rs model builders:
107//!
108//! ```rust,no_run
109//! use mistralrs::{TextModelBuilder, IsqType};
110//! use mistralrs_core::{McpClientConfig, McpServerConfig, McpServerSource};
111//!
112//! #[tokio::main]
113//! async fn main() -> anyhow::Result<()> {
114//! let mcp_config = McpClientConfig {
115//! servers: vec![/* your server configs */],
116//! auto_register_tools: true,
117//! tool_timeout_secs: Some(30),
118//! max_concurrent_calls: Some(5),
119//! };
120//!
121//! let model = TextModelBuilder::new("path/to/model".to_string())
122//! .with_isq(IsqType::Q8_0)
123//! .with_mcp_client(mcp_config) // MCP tools automatically registered
124//! .build()
125//! .await?;
126//!
127//! // MCP tools are now available for automatic tool calling
128//! Ok(())
129//! }
130//! ```
131
132mod agent;
133mod anymoe;
134mod diffusion_model;
135mod embedding_model;
136mod gguf;
137mod gguf_lora_model;
138mod gguf_xlora_model;
139mod lora_model;
140mod messages;
141mod model;
142pub mod model_builder_trait;
143mod speculative;
144mod speech_model;
145mod text_model;
146mod vision_model;
147mod xlora_model;
148
149pub use agent::{
150 Agent, AgentBuilder, AgentConfig, AgentEvent, AgentResponse, AgentStep, AgentStopReason,
151 AgentStream, AsyncToolCallback, ToolCallbackType, ToolResult,
152};
153pub use anymoe::AnyMoeModelBuilder;
154pub use diffusion_model::DiffusionModelBuilder;
155pub use embedding_model::{EmbeddingModelBuilder, UqffEmbeddingModelBuilder};
156pub use gguf::GgufModelBuilder;
157pub use gguf_lora_model::GgufLoraModelBuilder;
158pub use gguf_xlora_model::GgufXLoraModelBuilder;
159pub use lora_model::LoraModelBuilder;
160pub use messages::{
161 EmbeddingRequest, EmbeddingRequestBuilder, EmbeddingRequestInput, RequestBuilder, RequestLike,
162 TextMessageRole, TextMessages, VisionMessages,
163};
164pub use mistralrs_core::{
165 McpClient, McpClientConfig, McpServerConfig, McpServerSource, McpToolInfo,
166};
167pub use mistralrs_core::{SearchCallback, SearchResult, ToolCallback};
168pub use model::{best_device, Model};
169pub use model_builder_trait::{AnyModelBuilder, MultiModelBuilder};
170pub use speculative::TextSpeculativeBuilder;
171pub use speech_model::SpeechModelBuilder;
172pub use text_model::{PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder};
173pub use vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
174pub use xlora_model::XLoraModelBuilder;
175
176pub use candle_core::{DType, Device, Result, Tensor};
177pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
178pub use mistralrs_core::*;
179
180// Re-export the tool proc macro for ergonomic tool definition
181pub use mistralrs_macros::tool;
182
183// Re-export schemars for use in tool definitions
184pub use schemars;