mistralrs/
lib.rs

1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
14//!
15//! ## Example
16//! ```no_run
17//! use anyhow::Result;
18//! use mistralrs::{
19//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
20//! };
21//!
22//! #[tokio::main]
23//! async fn main() -> Result<()> {
24//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
25//!         .with_isq(IsqType::Q8_0)
26//!         .with_logging()
27//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
28//!         .build()
29//!         .await?;
30//!
31//!     let messages = TextMessages::new()
32//!         .add_message(
33//!             TextMessageRole::System,
34//!             "You are an AI agent with a specialty in programming.",
35//!         )
36//!         .add_message(
37//!             TextMessageRole::User,
38//!             "Hello! How are you? Please write generic binary search function in Rust.",
39//!         );
40//!
41//!     let response = model.send_chat_request(messages).await?;
42//!
43//!     println!("{}", response.choices[0].message.content.as_ref().unwrap());
44//!     dbg!(
45//!         response.usage.avg_prompt_tok_per_sec,
46//!         response.usage.avg_compl_tok_per_sec
47//!     );
48//!
49//!     Ok(())
50//! }
51//! ```
52//!
53//! ## Streaming example
54//! ```no_run
55//!    use anyhow::Result;
56//!    use mistralrs::{
57//!        IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
58//!        TextModelBuilder,
59//!    };
60//!    use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
61//!
62//!    #[tokio::main]
63//!    async fn main() -> Result<()> {
64//!        let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
65//!            .with_isq(IsqType::Q8_0)
66//!            .with_logging()
67//!            .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
68//!            .build()
69//!            .await?;
70//!
71//!        let messages = TextMessages::new()
72//!            .add_message(
73//!                TextMessageRole::System,
74//!                "You are an AI agent with a specialty in programming.",
75//!            )
76//!            .add_message(
77//!                TextMessageRole::User,
78//!                "Hello! How are you? Please write generic binary search function in Rust.",
79//!            );
80//!
81//!        let mut stream = model.stream_chat_request(messages).await?;
82
83//!        while let Some(chunk) = stream.next().await {
84//!            if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
85//!                if let Some(ChunkChoice {
86//!                    delta:
87//!                        Delta {
88//!                            content: Some(content),
89//!                            ..
90//!                        },
91//!                    ..
92//!                }) = choices.first()
93//!                {
94//!                    print!("content");
95//!                };
96//!            }
97//!        }
98//!        Ok(())
99//!    }
100//! ```
101
102mod anymoe;
103mod diffusion_model;
104mod gguf;
105mod gguf_lora_model;
106mod gguf_xlora_model;
107mod lora_model;
108mod messages;
109mod model;
110mod speculative;
111mod text_model;
112mod vision_model;
113mod xlora_model;
114
115/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
116pub mod v0_4_api {
117    pub use super::anymoe::AnyMoeModelBuilder;
118    pub use super::diffusion_model::DiffusionModelBuilder;
119    pub use super::gguf::GgufModelBuilder;
120    pub use super::gguf_lora_model::GgufLoraModelBuilder;
121    pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
122    pub use super::lora_model::LoraModelBuilder;
123    pub use super::messages::{
124        RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
125    };
126    pub use super::model::{best_device, Model};
127    pub use super::speculative::TextSpeculativeBuilder;
128    pub use super::text_model::{
129        PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder,
130    };
131    pub use super::vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
132    pub use super::xlora_model::XLoraModelBuilder;
133}
134
135pub use v0_4_api::*;
136
137pub use candle_core::{DType, Device, Result, Tensor};
138pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
139pub use mistralrs_core::*;