mistralrs/lib.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
//! This crate provides an asynchronous API to `mistral.rs`.
//!
//! To get started loading a model, check out the following builders:
//! - [`TextModelBuilder`]
//! - [`LoraModelBuilder`]
//! - [`XLoraModelBuilder`]
//! - [`GgufModelBuilder`]
//! - [`GgufLoraModelBuilder`]
//! - [`GgufXLoraModelBuilder`]
//! - [`VisionModelBuilder`]
//! - [`AnyMoeModelBuilder`]
//!
//! Check out the [`v0_4_api`] module for concise documentation of this, newer API.
//!
//! ## Example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder,
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//! .with_isq(IsqType::Q8_0)
//! .with_logging()
//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//! .build()
//! .await?;
//!
//! let messages = TextMessages::new()
//! .add_message(
//! TextMessageRole::System,
//! "You are an AI agent with a specialty in programming.",
//! )
//! .add_message(
//! TextMessageRole::User,
//! "Hello! How are you? Please write generic binary search function in Rust.",
//! );
//!
//! let response = model.send_chat_request(messages).await?;
//!
//! println!("{}", response.choices[0].message.content.as_ref().unwrap());
//! dbg!(
//! response.usage.avg_prompt_tok_per_sec,
//! response.usage.avg_compl_tok_per_sec
//! );
//!
//! Ok(())
//! }
//! ```
//!
//! ## Streaming example
//! ```no_run
//! use anyhow::Result;
//! use mistralrs::{
//! IsqType, PagedAttentionMetaBuilder, TextMessageRole, TextMessages, TextModelBuilder, Response
//! };
//!
//! #[tokio::main]
//! async fn main() -> Result<()> {
//! let model = TextModelBuilder::new("microsoft/Phi-3.5-mini-instruct".to_string())
//! .with_isq(IsqType::Q8_0)
//! .with_logging()
//! .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
//! .build()
//! .await?;
//!
//! let messages = TextMessages::new()
//! .add_message(
//! TextMessageRole::System,
//! "You are an AI agent with a specialty in programming.",
//! )
//! .add_message(
//! TextMessageRole::User,
//! "Hello! How are you? Please write generic binary search function in Rust.",
//! );
//!
//! let mut stream = model.stream_chat_request(messages).await?;
//!
//! while let Some(chunk) = stream.next().await {
//! if let Response::Chunk(chunk) = chunk{
//! print!("{}", chunk.choices[0].delta.content);
//! }
//! // Handle the error cases.
//!
//! }
//! Ok(())
//! }
//! ```
mod anymoe;
mod diffusion_model;
mod gguf;
mod gguf_lora_model;
mod gguf_xlora_model;
mod lora_model;
mod messages;
mod model;
mod text_model;
mod vision_model;
mod xlora_model;
/// This will be the API as of v0.4.0. Other APIs will *not* be deprecated, but moved into a module such as this one.
pub mod v0_4_api {
pub use super::anymoe::AnyMoeModelBuilder;
pub use super::diffusion_model::DiffusionModelBuilder;
pub use super::gguf::GgufModelBuilder;
pub use super::gguf_lora_model::GgufLoraModelBuilder;
pub use super::gguf_xlora_model::GgufXLoraModelBuilder;
pub use super::lora_model::LoraModelBuilder;
pub use super::messages::{
RequestBuilder, RequestLike, TextMessageRole, TextMessages, VisionMessages,
};
pub use super::model::{best_device, Model};
pub use super::text_model::{PagedAttentionMetaBuilder, TextModelBuilder};
pub use super::vision_model::VisionModelBuilder;
pub use super::xlora_model::XLoraModelBuilder;
}
pub use v0_4_api::*;
pub use candle_core::{DType, Device, Result, Tensor};
pub use candle_nn::loss::cross_entropy as cross_entropy_loss;
pub use mistralrs_core::*;