mistralrs/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//! This crate provides an asynchronous API to `mistral.rs`.
//!
//! To get started loading a model, check out the following builders:
//! - [`TextModelBuilder`]
//! - [`LoraModelBuilder`]
//! - [`XLoraModelBuilder`]
//! - [`GgufModelBuilder`]
//! - [`GgufLoraModelBuilder`]
//! - [`GgufXLoraModelBuilder`]
//! - [`VisionModelBuilder`]
//! - [`AnyMoeModelBuilder`]
//!
//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
//!
//! ## Example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//!         .with_isq(IsqType::Q8_0)
//!         .with_logging()
//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//!         .build()
//!         .await?;
//!
//!     let messages = TextMessages::new()
//!         .add_message(
//!             TextMessageRole::System,
//!             "You are an AI agent with a specialty in programming.",
//!         )
//!         .add_message(
//!             TextMessageRole::User,
//!             "Hello! How are you? Please write generic binary search function in Rust.",
//!         );
//!
//!     let response = model.send_chat_request(messages).await?;
//!
//!     println!("{}", response.choices[0].message.content.as_ref().unwrap());
//!     dbg!(
//!         response.usage.avg_prompt_tok_per_sec,
//!         response.usage.avg_compl_tok_per_sec
//!     );
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Streaming example
//! ```no_run
//!    use anyhow::Result;
//!    use mistralrs::{
//!        IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
//!        TextModelBuilder,
//!    };
//!    use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
//!
//!    #[tokio::main]
//!    async fn main() -> Result<()> {
//!        let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//!            .with_isq(IsqType::Q8_0)
//!            .with_logging()
//!            .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//!            .build()
//!            .await?;
//!
//!        let messages = TextMessages::new()
//!            .add_message(
//!                TextMessageRole::System,
//!                "You are an AI agent with a specialty in programming.",
//!            )
//!            .add_message(
//!                TextMessageRole::User,
//!                "Hello! How are you? Please write generic binary search function in Rust.",
//!            );
//!
//!        let mut stream = model.stream_chat_request(messages).await?;

//!        while let Some(chunk) = stream.next().await {
//!            if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
//!                if let Some(ChunkChoice {
//!                    delta:
//!                        Delta {
//!                            content: Some(content),
//!                            ..
//!                        },
//!                    ..
//!                }) = choices.first()
//!                {
//!                    print!("content");
//!                };
//!            }
//!        }
//!        Ok(())
//!    }
//! ```

mod anymoe;
mod diffusion_model;
mod gguf;
mod gguf_lora_model;
mod gguf_xlora_model;
mod lora_model;
mod messages;
mod model;
mod text_model;
mod vision_model;
mod xlora_model;

/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
pub mod v0_4_api {
    pub use super::anymoe::AnyMoeModelBuilder;
    pub use super::diffusion_model::DiffusionModelBuilder;
    pub use super::gguf::GgufModelBuilder;
    pub use super::gguf_lora_model::GgufLoraModelBuilder;
    pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
    pub use super::lora_model::LoraModelBuilder;
    pub use super::messages::{
        RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
    };
    pub use super::model::{best_device, Model};
    pub use super::text_model::{PagedAttentionMetaBuilder, TextModelBuilder};
    pub use super::vision_model::VisionModelBuilder;
    pub use super::xlora_model::XLoraModelBuilder;
}

pub use v0_4_api::*;

pub use candle_core::{DType, Device, Result, Tensor};
pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
pub use mistralrs_core::*;