mistralrs/
lib.rs

1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! ## Example
14//! ```no_run
15//! use anyhow::Result;
16//! use mistralrs::{
17//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
18//! };
19//!
20//! #[tokio::main]
21//! async fn main() -> Result<()> {
22//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
23//!         .with_isq(IsqType::Q8_0)
24//!         .with_logging()
25//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
26//!         .build()
27//!         .await?;
28//!
29//!     let messages = TextMessages::new()
30//!         .add_message(
31//!             TextMessageRole::System,
32//!             "You are an AI agent with a specialty in programming.",
33//!         )
34//!         .add_message(
35//!             TextMessageRole::User,
36//!             "Hello! How are you? Please write generic binary search function in Rust.",
37//!         );
38//!
39//!     let response = model.send_chat_request(messages).await?;
40//!
41//!     println!("{}", response.choices[0].message.content.as_ref().unwrap());
42//!     dbg!(
43//!         response.usage.avg_prompt_tok_per_sec,
44//!         response.usage.avg_compl_tok_per_sec
45//!     );
46//!
47//!     Ok(())
48//! }
49//! ```
50//!
51//! ## Streaming example
52//! ```no_run
53//!    use anyhow::Result;
54//!    use mistralrs::{
55//!        IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
56//!        TextModelBuilder,
57//!    };
58//!    use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
59//!
60//!    #[tokio::main]
61//!    async fn main() -> Result<()> {
62//!        let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
63//!            .with_isq(IsqType::Q8_0)
64//!            .with_logging()
65//!            .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
66//!            .build()
67//!            .await?;
68//!
69//!        let messages = TextMessages::new()
70//!            .add_message(
71//!                TextMessageRole::System,
72//!                "You are an AI agent with a specialty in programming.",
73//!            )
74//!            .add_message(
75//!                TextMessageRole::User,
76//!                "Hello! How are you? Please write generic binary search function in Rust.",
77//!            );
78//!
79//!        let mut stream = model.stream_chat_request(messages).await?;
80
81//!        while let Some(chunk) = stream.next().await {
82//!            if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
83//!                if let Some(ChunkChoice {
84//!                    delta:
85//!                        Delta {
86//!                            content: Some(content),
87//!                            ..
88//!                        },
89//!                    ..
90//!                }) = choices.first()
91//!                {
92//!                    print!("{}", content);
93//!                };
94//!            }
95//!        }
96//!        Ok(())
97//!    }
98//! ```
99//!
100//! ## MCP example
101//!
102//! The MCP client integrates seamlessly with mistral.rs model builders:
103//!
104//! ```rust,no_run
105//! use mistralrs::{TextModelBuilder, IsqType};
106//! use mistralrs_core::{McpClientConfig, McpServerConfig, McpServerSource};
107//!
108//! #[tokio::main]
109//! async fn main() -> anyhow::Result<()> {
110//!     let mcp_config = McpClientConfig {
111//!         servers: vec![/* your server configs */],
112//!         auto_register_tools: true,
113//!         tool_timeout_secs: Some(30),
114//!         max_concurrent_calls: Some(5),
115//!     };
116//!     
117//!     let model = TextModelBuilder::new("path/to/model".to_string())
118//!         .with_isq(IsqType::Q8_0)
119//!         .with_mcp_client(mcp_config)  // MCP tools automatically registered
120//!         .build()
121//!         .await?;
122//!     
123//!     // MCP tools are now available for automatic tool calling
124//!     Ok(())
125//! }
126//! ```
127
128mod anymoe;
129mod diffusion_model;
130mod embedding_model;
131mod gguf;
132mod gguf_lora_model;
133mod gguf_xlora_model;
134mod lora_model;
135mod messages;
136mod model;
137mod multi_model;
138mod speculative;
139mod speech_model;
140mod text_model;
141mod vision_model;
142mod xlora_model;
143
144pub use anymoe::AnyMoeModelBuilder;
145pub use diffusion_model::DiffusionModelBuilder;
146pub use embedding_model::{EmbeddingModelBuilder, UqffEmbeddingModelBuilder};
147pub use gguf::GgufModelBuilder;
148pub use gguf_lora_model::GgufLoraModelBuilder;
149pub use gguf_xlora_model::GgufXLoraModelBuilder;
150pub use lora_model::LoraModelBuilder;
151pub use messages::{
152    EmbeddingRequest, EmbeddingRequestBuilder, EmbeddingRequestInput, RequestBuilder, RequestLike,
153    TextMessageRole, TextMessages, VisionMessages,
154};
155pub use mistralrs_core::{
156    McpClient, McpClientConfig, McpServerConfig, McpServerSource, McpToolInfo,
157};
158pub use mistralrs_core::{SearchCallback, SearchResult, ToolCallback};
159pub use model::{best_device, Model};
160pub use multi_model::MultiModel;
161pub use speculative::TextSpeculativeBuilder;
162pub use speech_model::SpeechModelBuilder;
163pub use text_model::{PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder};
164pub use vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
165pub use xlora_model::XLoraModelBuilder;
166
167pub use candle_core::{DType, Device, Result, Tensor};
168pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
169pub use mistralrs_core::*;