Skip to content

Batch multiple embedding requests for efficient parallel encoding

Batch multiple embedding requests for efficient parallel encoding.

Run with: cargo run --release --example batching_embeddings -p mistralrs

//! Batch multiple embedding requests for efficient parallel encoding.
//!
//! Run with: `cargo run --release --example batching_embeddings -p mistralrs`
use anyhow::Result;
use mistralrs::{EmbeddingModelBuilder, EmbeddingRequest};
#[tokio::main]
async fn main() -> Result<()> {
let model = EmbeddingModelBuilder::new("google/embeddinggemma-300m")
.with_logging()
.build()
.await?;
let a = model
.generate_embeddings(
EmbeddingRequest::builder()
.add_prompt("task: search result | query: What is graphene?"),
)
.await?;
let b =
model
.generate_embeddings(EmbeddingRequest::builder().add_prompt(
"task: search result | query: What is an apple's significance to gravity?",
))
.await?;
let batched = model
.generate_embeddings(EmbeddingRequest::builder().add_prompts((0..100).map(|i| {
if i % 2 == 0 {
"task: search result | query: What is graphene?"
} else {
"task: search result | query: What is an apple's significance to gravity?"
}
})))
.await?;
for (i, embedding) in batched.into_iter().enumerate() {
if i % 2 == 0 {
assert_eq!(embedding, a[0]);
} else {
assert_eq!(embedding, b[0]);
}
}
Ok(())
}

Source: mistralrs/examples/advanced/batching_embeddings/main.rs