mistralrs/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
//! This crate provides an asynchronous API to `mistral.rs`.
//!
//! To get started loading a model, check out the following builders:
//! - [`TextModelBuilder`]
//! - [`LoraModelBuilder`]
//! - [`XLoraModelBuilder`]
//! - [`GgufModelBuilder`]
//! - [`GgufLoraModelBuilder`]
//! - [`GgufXLoraModelBuilder`]
//! - [`VisionModelBuilder`]
//! - [`AnyMoeModelBuilder`]
//!
//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
//!
//! ## Example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//! .with_isq(IsqType::Q8_0)
//! .with_logging()
//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//! .build()
//! .await?;
//!
//! let messages = TextMessages::new()
//! .add_message(
//! TextMessageRole::System,
//! "You are an AI agent with a specialty in programming.",
//! )
//! .add_message(
//! TextMessageRole::User,
//! "Hello! How are you? Please write generic binary search function in Rust.",
//! );
//!
//! let response = model.send_chat_request(messages).await?;
//!
//! println!("{}", response.choices[0].message.content.as_ref().unwrap());
//! dbg!(
//! response.usage.avg_prompt_tok_per_sec,
//! response.usage.avg_compl_tok_per_sec
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! ## Streaming example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//! IsqType, PagedAttentionMetaBuilder, Response, TextMessageRole, TextMessages,
//! TextModelBuilder,
//! };
//! use mistralrs_core::{ChatCompletionChunkResponse, ChunkChoice, Delta};
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//! .with_isq(IsqType::Q8_0)
//! .with_logging()
//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//! .build()
//! .await?;
//!
//! let messages = TextMessages::new()
//! .add_message(
//! TextMessageRole::System,
//! "You are an AI agent with a specialty in programming.",
//! )
//! .add_message(
//! TextMessageRole::User,
//! "Hello! How are you? Please write generic binary search function in Rust.",
//! );
//!
//! let mut stream = model.stream_chat_request(messages).await?;
//! while let Some(chunk) = stream.next().await {
//! if let Response::Chunk(ChatCompletionChunkResponse { choices, .. }) = chunk {
//! if let Some(ChunkChoice {
//! delta:
//! Delta {
//! content: Some(content),
//! ..
//! },
//! ..
//! }) = choices.first()
//! {
//! print!("content");
//! };
//! }
//! }
//! Ok(())
//! }
//! ```
mod anymoe;
mod diffusion_model;
mod gguf;
mod gguf_lora_model;
mod gguf_xlora_model;
mod lora_model;
mod messages;
mod model;
mod text_model;
mod vision_model;
mod xlora_model;
/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
pub mod v0_4_api {
pub use super::anymoe::AnyMoeModelBuilder;
pub use super::diffusion_model::DiffusionModelBuilder;
pub use super::gguf::GgufModelBuilder;
pub use super::gguf_lora_model::GgufLoraModelBuilder;
pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
pub use super::lora_model::LoraModelBuilder;
pub use super::messages::{
RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
};
pub use super::model::{best_device, Model};
pub use super::text_model::{PagedAttentionMetaBuilder, TextModelBuilder};
pub use super::vision_model::VisionModelBuilder;
pub use super::xlora_model::XLoraModelBuilder;
}
pub use v0_4_api::*;
pub use candle_core::{DType, Device, Result, Tensor};
pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
pub use mistralrs_core::*;