mistralrs/lib.rs
1//! This crate provides an asynchronous API to `mistral.rs`.
2//!
3//! To get started loading a model, check out the following builders:
4//! - [`TextModelBuilder`]
5//! - [`LoraModelBuilder`]
6//! - [`XLoraModelBuilder`]
7//! - [`GgufModelBuilder`]
8//! - [`GgufLoraModelBuilder`]
9//! - [`GgufXLoraModelBuilder`]
10//! - [`VisionModelBuilder`]
11//! - [`AnyMoeModelBuilder`]
12//!
13//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
14//!
15//! ## Example
16//! ```no_run
17//! use anyhow::Result;
18//! use mistralrs::{
19//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
20//! };
21//!
22//! #[tokio::main]
23//! async fn main() -> Result<()> {
24//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
25//! .with_isq(IsqType::Q8_0)
26//! .with_logging()
27//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
28//! .build()
29//! .await?;
30//!
31//! let messages = TextMessages::new()
32//! .add_message(
33//! TextMessageRole::System,
34//! "You are an AI agent with a specialty in programming.",
35//! )
36//! .add_message(
37//! TextMessageRole::User,
38//! "Hello! How are you? Please write generic binary search function in Rust.",
39//! );
40//!
41//! let response = model.send_chat_request(messages).await?;
42//!
43//! println!("{}", response.choices[0].message.content.as_ref().unwrap());
44//! dbg!(
45//! response.usage.avg_prompt_tok_per_sec,
46//! response.usage.avg_compl_tok_per_sec
47//! );
48//!
49//! Ok(())
50//! }
51//! ```
52//!
53//! ## Streaming example
54//! ```no_run
55//! use anyhow::Result;
56//! use mistralrs::{
57//! IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
58//! TextModelBuilder,
59//! };
60//! use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
61//!
62//! #[tokio::main]
63//! async fn main() -> Result<()> {
64//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
65//! .with_isq(IsqType::Q8_0)
66//! .with_logging()
67//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
68//! .build()
69//! .await?;
70//!
71//! let messages = TextMessages::new()
72//! .add_message(
73//! TextMessageRole::System,
74//! "You are an AI agent with a specialty in programming.",
75//! )
76//! .add_message(
77//! TextMessageRole::User,
78//! "Hello! How are you? Please write generic binary search function in Rust.",
79//! );
80//!
81//! let mut stream = model.stream_chat_request(messages).await?;
82
83//! while let Some(chunk) = stream.next().await {
84//! if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
85//! if let Some(ChunkChoice {
86//! delta:
87//! Delta {
88//! content: Some(content),
89//! ..
90//! },
91//! ..
92//! }) = choices.first()
93//! {
94//! print!("content");
95//! };
96//! }
97//! }
98//! Ok(())
99//! }
100//! ```
101
102mod anymoe;
103mod diffusion_model;
104mod gguf;
105mod gguf_lora_model;
106mod gguf_xlora_model;
107mod lora_model;
108mod messages;
109mod model;
110mod speculative;
111mod text_model;
112mod vision_model;
113mod xlora_model;
114
115/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
116pub mod v0_4_api {
117 pub use super::anymoe::AnyMoeModelBuilder;
118 pub use super::diffusion_model::DiffusionModelBuilder;
119 pub use super::gguf::GgufModelBuilder;
120 pub use super::gguf_lora_model::GgufLoraModelBuilder;
121 pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
122 pub use super::lora_model::LoraModelBuilder;
123 pub use super::messages::{
124 RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
125 };
126 pub use super::model::{best_device, Model};
127 pub use super::speculative::TextSpeculativeBuilder;
128 pub use super::text_model::{
129 PagedAttentionMetaBuilder, TextModelBuilder, UqffTextModelBuilder,
130 };
131 pub use super::vision_model::{UqffVisionModelBuilder, VisionModelBuilder};
132 pub use super::xlora_model::XLoraModelBuilder;
133}
134
135pub use v0_4_api::*;
136
137pub use candle_core::{DType, Device, Result, Tensor};
138pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
139pub use mistralrs_core::*;