mistralrs/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
//! This crate provides an asynchronous API to `mistral.rs`.
//!
//! To get started loading a model, check out the following builders:
//! - [`TextModelBuilder`]
//! - [`LoraModelBuilder`]
//! - [`XLoraModelBuilder`]
//! - [`GgufModelBuilder`]
//! - [`GgufLoraModelBuilder`]
//! - [`GgufXLoraModelBuilder`]
//! - [`VisionModelBuilder`]
//! - [`AnyMoeModelBuilder`]
//!
//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
//!
//! ## Example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//!         .with_isq(IsqType::Q8_0)
//!         .with_logging()
//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//!         .build()
//!         .await?;
//!
//!     let messages = TextMessages::new()
//!         .add_message(
//!             TextMessageRole::System,
//!             "You are an AI agent with a specialty in programming.",
//!         )
//!         .add_message(
//!             TextMessageRole::User,
//!             "Hello! How are you? Please write generic binary search function in Rust.",
//!         );
//!
//!     let response = model.send_chat_request(messages).await?;
//!
//!     println!("{}", response.choices[0].message.content.as_ref().unwrap());
//!     dbg!(
//!         response.usage.avg_prompt_tok_per_sec,
//!         response.usage.avg_compl_tok_per_sec
//!     );
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Streaming example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//!     IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder, Response
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//!     let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//!         .with_isq(IsqType::Q8_0)
//!         .with_logging()
//!         .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//!         .build()
//!         .await?;
//!
//!     let messages = TextMessages::new()
//!         .add_message(
//!             TextMessageRole::System,
//!             "You are an AI agent with a specialty in programming.",
//!         )
//!         .add_message(
//!             TextMessageRole::User,
//!             "Hello! How are you? Please write generic binary search function in Rust.",
//!         );
//!
//!     let mut stream = model.stream_chat_request(messages).await?;
//!
//!     while let Some(chunk) = stream.next().await {
//!         if let Response::Chunk(chunk) = chunk{
//!             print!("{}", chunk.choices[0].delta.content);
//!         }
//!         // Handle the error cases.
//!
//!     }
//!     Ok(())
//! }
//! ```

mod anymoe;
mod diffusion_model;
mod gguf;
mod gguf_lora_model;
mod gguf_xlora_model;
mod lora_model;
mod messages;
mod model;
mod text_model;
mod vision_model;
mod xlora_model;

/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
pub mod v0_4_api {
    pub use super::anymoe::AnyMoeModelBuilder;
    pub use super::diffusion_model::DiffusionModelBuilder;
    pub use super::gguf::GgufModelBuilder;
    pub use super::gguf_lora_model::GgufLoraModelBuilder;
    pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
    pub use super::lora_model::LoraModelBuilder;
    pub use super::messages::{
        RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
    };
    pub use super::model::{best_device, Model};
    pub use super::text_model::{PagedAttentionMetaBuilder, TextModelBuilder};
    pub use super::vision_model::VisionModelBuilder;
    pub use super::xlora_model::XLoraModelBuilder;
}

pub use v0_4_api::*;

pub use candle_core::{DType, Device, Result, Tensor};
pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
pub use mistralrs_core::*;