pub struct MistralRsForServerBuilder { /* private fields */ }
Expand description
A builder for creating a mistral.rs instance with configured options for the mistral.rs server.
§Examples
Basic usage:
use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;
let args = Args::parse();
let mistralrs = MistralRsForServerBuilder::new()
.with_truncate_sequence(args.truncate_sequence)
.with_model(args.model)
.with_max_seqs(args.max_seqs)
.with_no_kv_cache(args.no_kv_cache)
.with_token_source(args.token_source)
.with_interactive_mode(args.interactive_mode)
.with_prefix_cache_n(args.prefix_cache_n)
.with_paged_attn(args.paged_attn)
.with_cpu(args.cpu)
.with_enable_search(args.enable_search)
.with_seed_optional(args.seed)
.with_log_optional(args.log)
.with_chat_template_optional(args.chat_template)
.with_jinja_explicit_optional(args.jinja_explicit)
.with_num_device_layers_optional(args.num_device_layers)
.with_in_situ_quant_optional(args.in_situ_quant)
.with_paged_attn_gpu_mem_optional(args.paged_attn_gpu_mem)
.with_paged_attn_gpu_mem_usage_optional(args.paged_attn_gpu_mem_usage)
.with_paged_ctxt_len_optional(args.paged_ctxt_len)
.with_paged_attn_block_size_optional(args.paged_attn_block_size)
.with_prompt_chunksize_optional(args.prompt_chunksize)
.build()
.await?;
Implementations§
Source§impl MistralRsForServerBuilder
impl MistralRsForServerBuilder
Sourcepub fn new() -> Self
pub fn new() -> Self
Creates a new MistralRsForServerBuilder
with default settings.
This is equivalent to calling Default::default()
.
§Examples
use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;
let builder = mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder::new();
Sourcepub fn with_device(self, device: Device) -> Self
pub fn with_device(self, device: Device) -> Self
Sets the Candle device to use for model execution.
Sourcepub fn with_seed(self, seed: u64) -> Self
pub fn with_seed(self, seed: u64) -> Self
Sets the random seed for deterministic model behavior.
Sourcepub fn with_seed_optional(self, seed: Option<u64>) -> Self
pub fn with_seed_optional(self, seed: Option<u64>) -> Self
Sets the random seed if provided.
Sourcepub fn with_log_optional(self, log: Option<String>) -> Self
pub fn with_log_optional(self, log: Option<String>) -> Self
Sets the logging configuration if provided.
Sourcepub fn with_truncate_sequence(self, truncate_sequence: bool) -> Self
pub fn with_truncate_sequence(self, truncate_sequence: bool) -> Self
Sets whether to truncate sequences that exceed the maximum model length.
Sourcepub fn with_model(self, model: ModelSelected) -> Self
pub fn with_model(self, model: ModelSelected) -> Self
Sets the model to be used.
Sourcepub fn with_model_config(self, model_config: ModelConfig) -> Self
pub fn with_model_config(self, model_config: ModelConfig) -> Self
Add a model to the multi-model configuration.
Sourcepub fn with_model_configs(self, model_configs: Vec<ModelConfig>) -> Self
pub fn with_model_configs(self, model_configs: Vec<ModelConfig>) -> Self
Add multiple models to the multi-model configuration.
Sourcepub fn with_default_model_id(self, default_model_id: String) -> Self
pub fn with_default_model_id(self, default_model_id: String) -> Self
Set the default model ID to use when none is specified in requests.
Sourcepub fn add_model_config(self, config: ModelConfig) -> Self
pub fn add_model_config(self, config: ModelConfig) -> Self
Add a model configuration.
Sourcepub fn add_model(self, model_id: String, model: ModelSelected) -> Self
pub fn add_model(self, model_id: String, model: ModelSelected) -> Self
Add a model with just an ID and ModelSelected (convenience method).
Sourcepub fn with_max_seqs(self, max_seqs: usize) -> Self
pub fn with_max_seqs(self, max_seqs: usize) -> Self
Sets the maximum number of concurrent sequences.
Sourcepub fn with_no_kv_cache(self, no_kv_cache: bool) -> Self
pub fn with_no_kv_cache(self, no_kv_cache: bool) -> Self
Sets whether to disable the key-value cache.
Sourcepub fn with_chat_template(self, chat_template: String) -> Self
pub fn with_chat_template(self, chat_template: String) -> Self
Sets the chat template configuration.
Sourcepub fn with_chat_template_optional(self, chat_template: Option<String>) -> Self
pub fn with_chat_template_optional(self, chat_template: Option<String>) -> Self
Sets the chat template configuration if provided.
Sourcepub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
pub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
Sets an explicit JINJA chat template file.
Sourcepub fn with_jinja_explicit_optional(
self,
jinja_explicit: Option<String>,
) -> Self
pub fn with_jinja_explicit_optional( self, jinja_explicit: Option<String>, ) -> Self
Sets an explicit JINJA chat template file if provided.
Sourcepub fn with_token_source(self, token_source: TokenSource) -> Self
pub fn with_token_source(self, token_source: TokenSource) -> Self
Sets the token source for authentication.
Sourcepub fn with_interactive_mode(self, interactive_mode: bool) -> Self
pub fn with_interactive_mode(self, interactive_mode: bool) -> Self
Sets whether to run in interactive mode.
Sourcepub fn with_prefix_cache_n(self, prefix_cache_n: usize) -> Self
pub fn with_prefix_cache_n(self, prefix_cache_n: usize) -> Self
Sets the number of prefix caches to hold on the device.
Sourcepub fn with_num_device_layers(self, num_device_layers: Vec<String>) -> Self
pub fn with_num_device_layers(self, num_device_layers: Vec<String>) -> Self
Sets the device layer mapping
Sourcepub fn with_num_device_layers_optional(
self,
num_device_layers: Option<Vec<String>>,
) -> Self
pub fn with_num_device_layers_optional( self, num_device_layers: Option<Vec<String>>, ) -> Self
Sets the device layer mapping if provided.
Sourcepub fn with_in_situ_quant(self, in_situ_quant: String) -> Self
pub fn with_in_situ_quant(self, in_situ_quant: String) -> Self
Sets the in-situ quantization method.
Sourcepub fn with_in_situ_quant_optional(self, in_situ_quant: Option<String>) -> Self
pub fn with_in_situ_quant_optional(self, in_situ_quant: Option<String>) -> Self
Sets the in-situ quantization method if provided.
Sourcepub fn set_paged_attn(self, paged_attn: Option<bool>) -> Self
pub fn set_paged_attn(self, paged_attn: Option<bool>) -> Self
Sets PagedAttention.
Unlike other with_PROP
or with_PROP_optional
methods, this method
sets the value to whatever Option<bool>
is passed in as None
, Some(true)
and Some(false)
have different implications.
None
: default behavior for target device (e.g. enable for CUDA, disable for Metal)
Some(true)
: enable (if supported by target device)
Some(false)
: disable
Sourcepub fn with_paged_attn_gpu_mem(self, paged_attn_gpu_mem: usize) -> Self
pub fn with_paged_attn_gpu_mem(self, paged_attn_gpu_mem: usize) -> Self
Sets the GPU memory allocation for PagedAttention KV cache.
Sourcepub fn with_paged_attn_gpu_mem_optional(
self,
paged_attn_gpu_mem: Option<usize>,
) -> Self
pub fn with_paged_attn_gpu_mem_optional( self, paged_attn_gpu_mem: Option<usize>, ) -> Self
Sets the GPU memory allocation for PagedAttention KV cache if provided.
Sourcepub fn with_paged_attn_gpu_mem_usage(
self,
paged_attn_gpu_mem_usage: f32,
) -> Self
pub fn with_paged_attn_gpu_mem_usage( self, paged_attn_gpu_mem_usage: f32, ) -> Self
Sets the percentage of GPU memory to utilize for PagedAttention.
Sourcepub fn with_paged_attn_gpu_mem_usage_optional(
self,
paged_attn_gpu_mem_usage: Option<f32>,
) -> Self
pub fn with_paged_attn_gpu_mem_usage_optional( self, paged_attn_gpu_mem_usage: Option<f32>, ) -> Self
Sets the percentage of GPU memory to utilize for PagedAttention if provided.
Sourcepub fn with_paged_ctxt_len(self, paged_ctxt_len: usize) -> Self
pub fn with_paged_ctxt_len(self, paged_ctxt_len: usize) -> Self
Sets the total context length for KV cache allocation.
Sourcepub fn with_paged_ctxt_len_optional(self, paged_ctxt_len: Option<usize>) -> Self
pub fn with_paged_ctxt_len_optional(self, paged_ctxt_len: Option<usize>) -> Self
Sets the total context length for KV cache allocation if provided.
Sourcepub fn with_paged_attn_block_size(self, paged_attn_block_size: usize) -> Self
pub fn with_paged_attn_block_size(self, paged_attn_block_size: usize) -> Self
Sets the block size for PagedAttention.
Sourcepub fn with_paged_attn_cache_type(self, cache_type: PagedCacheType) -> Self
pub fn with_paged_attn_cache_type(self, cache_type: PagedCacheType) -> Self
Sets the block size for PagedAttention.
Sourcepub fn with_paged_attn_block_size_optional(
self,
paged_attn_block_size: Option<usize>,
) -> Self
pub fn with_paged_attn_block_size_optional( self, paged_attn_block_size: Option<usize>, ) -> Self
Sets the block size for PagedAttention if provided.
Sourcepub fn with_prompt_chunksize(self, prompt_chunksize: usize) -> Self
pub fn with_prompt_chunksize(self, prompt_chunksize: usize) -> Self
Sets the prompt chunking size.
Sourcepub fn with_prompt_chunksize_optional(
self,
prompt_chunksize: Option<usize>,
) -> Self
pub fn with_prompt_chunksize_optional( self, prompt_chunksize: Option<usize>, ) -> Self
Sets the prompt chunking size if provided.
Sourcepub fn with_enable_search(self, enable_search: bool) -> Self
pub fn with_enable_search(self, enable_search: bool) -> Self
Sets whether to enable web search functionality.
Sourcepub fn with_search_bert_model(self, search_bert_model: String) -> Self
pub fn with_search_bert_model(self, search_bert_model: String) -> Self
Sets the BERT model for web search assistance.
Sourcepub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
pub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
Override the search function used when web_search_options
is enabled.
Sourcepub fn with_mcp_config(self, mcp_config: McpClientConfig) -> Self
pub fn with_mcp_config(self, mcp_config: McpClientConfig) -> Self
Sets the MCP client configuration.
Sourcepub fn with_mcp_config_optional(
self,
mcp_config: Option<McpClientConfig>,
) -> Self
pub fn with_mcp_config_optional( self, mcp_config: Option<McpClientConfig>, ) -> Self
Sets the MCP client configuration if provided.
Sourcepub async fn build(self) -> Result<SharedMistralRsState>
pub async fn build(self) -> Result<SharedMistralRsState>
Sourcepub async fn build_multi_model(self) -> Result<SharedMistralRsState>
pub async fn build_multi_model(self) -> Result<SharedMistralRsState>
Build a multi-model instance
Trait Implementations§
Auto Trait Implementations§
impl Freeze for MistralRsForServerBuilder
impl !RefUnwindSafe for MistralRsForServerBuilder
impl Send for MistralRsForServerBuilder
impl Sync for MistralRsForServerBuilder
impl Unpin for MistralRsForServerBuilder
impl !UnwindSafe for MistralRsForServerBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
§impl<T> Downcast for Twhere
T: AsAny + ?Sized,
impl<T> Downcast for Twhere
T: AsAny + ?Sized,
§fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
fn downcast_ref<T>(&self) -> Option<&T>where
T: AsAny,
Any
.§fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
fn downcast_mut<T>(&mut self) -> Option<&mut T>where
T: AsAny,
Any
.§impl<T> Instrument for T
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more§impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
impl<F, T> IntoSample<T> for Fwhere
T: FromSample<F>,
fn into_sample(self) -> T
§impl<T> Pointable for T
impl<T> Pointable for T
§impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
Source§impl<R, P> ReadPrimitive<R> for P
impl<R, P> ReadPrimitive<R> for P
Source§fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
fn read_from_little_endian(read: &mut R) -> Result<Self, Error>
ReadEndian::read_from_little_endian()
.