mistralrs/
lib.rs

1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! ## Example
14//! ```no_run
15//! use anyhow::Result;
16//! use mistralrs::{
17//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
18//! };
19//!
20//! #[tokio::main]
21//! async fn main() -> Result<()> {
22//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
23//!         .with_isq(IsqType::Q8_0)
24//!         .with_logging()
25//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
26//!         .build()
27//!         .await?;
28//!
29//!     let messages = TextMessages::new()
30//!         .add_message(
31//!             TextMessageRole::System,
32//!             "You are an AI agent with a specialty in programming.",
33//!         )
34//!         .add_message(
35//!             TextMessageRole::User,
36//!             "Hello! How are you? Please write generic binary search function in Rust.",
37//!         );
38//!
39//!     let response = model.send_chat_request(messages).await?;
40//!
41//!     println!("{}", response.choices[0].message.content.as_ref().unwrap());
42//!     dbg!(
43//!         response.usage.avg_prompt_tok_per_sec,
44//!         response.usage.avg_compl_tok_per_sec
45//!     );
46//!
47//!     Ok(())
48//! }
49//! ```
50//!
51//! ## Streaming example
52//! ```no_run
53//!    use anyhow::Result;
54//!    use mistralrs::{
55//!        IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
56//!        TextModelBuilder,
57//!    };
58//!    use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
59//!
60//!    #[tokio::main]
61//!    async fn main() -> Result<()> {
62//!        let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
63//!            .with_isq(IsqType::Q8_0)
64//!            .with_logging()
65//!            .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
66//!            .build()
67//!            .await?;
68//!
69//!        let messages = TextMessages::new()
70//!            .add_message(
71//!                TextMessageRole::System,
72//!                "You are an AI agent with a specialty in programming.",
73//!            )
74//!            .add_message(
75//!                TextMessageRole::User,
76//!                "Hello! How are you? Please write generic binary search function in Rust.",
77//!            );
78//!
79//!        let mut stream = model.stream_chat_request(messages).await?;
80
81//!        while let Some(chunk) = stream.next().await {
82//!            if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
83//!                if let Some(ChunkChoice {
84//!                    delta:
85//!                        Delta {
86//!                            content: Some(content),
87//!                            ..
88//!                        },
89//!                    ..
90//!                }) = choices.first()
91//!                {
92//!                    print!("{}", content);
93//!                };
94//!            }
95//!        }
96//!        Ok(())
97//!    }
98//! ```
99//!
100//! ## MCP example
101//!
102//! The MCP client integrates seamlessly with mistral.rs model builders:
103//!
104//! ```rust,no_run
105//! use mistralrs::{TextModelBuilder, IsqType};
106//! use mistralrs_core::mcp_client::{McpClientConfig, McpServerConfig, McpServerSource};
107//!
108//! #[tokio::main]
109//! async fn main() -> anyhow::Result<()> {
110//!     let mcp_config = McpClientConfig {
111//!         servers: vec![/* your server configs */],
112//!         auto_register_tools: true,
113//!         tool_timeout_secs: Some(30),
114//!         max_concurrent_calls: Some(5),
115//!     };
116//!     
117//!     let model = TextModelBuilder::new("path/to/model".to_string())
118//!         .with_isq(IsqType::Q8_0)
119//!         .with_mcp_client(mcp_config)  // MCP tools automatically registered
120//!         .build()
121//!         .await?;
122//!     
123//!     // MCP tools are now available for automatic tool calling
124//!     Ok(())
125//! }
126//! ```
127
128mod anymoe;
129mod diffusion_model;
130mod gguf;
131mod gguf_lora_model;
132mod gguf_xlora_model;
133mod lora_model;
134mod messages;
135mod model;
136mod speculative;
137mod speech_model;
138mod text_model;
139mod vision_model;
140mod xlora_model;
141
142pub use anymoe::AnyMoeModelBuilder;
143pub use diffusion_model::DiffusionModelBuilder;
144pub use gguf::GgufModelBuilder;
145pub use gguf_lora_model::GgufLoraModelBuilder;
146pub use gguf_xlora_model::GgufXLoraModelBuilder;
147pub use lora_model::LoraModelBuilder;
148pub use messages::{RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages};
149pub use mistralrs_core::{
150    McpClient, McpClientConfig, McpServerConfig, McpServerSource, McpToolInfo,
151};
152pub use mistralrs_core::{SearchCallback, SearchResult, ToolCallback};
153pub use model::{best_device, Model};
154pub use speculative::TextSpeculativeBuilder;
155pub use speech_model::SpeechModelBuilder;
156pub use text_model::{PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder};
157pub use vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
158pub use xlora_model::XLoraModelBuilder;
159
160pub use candle_core::{DType, Device, Result, Tensor};
161pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
162pub use mistralrs_core::*;