Struct MistralRsForServerBuilder Copy item path

impl MistralRsForServerBuilder

pub fn new() -> Self

Creates a new MistralRsForServerBuilder with default settings.

This is equivalent to calling Default::default().

§Examples

use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;

let builder = mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder::new();

pub fn with_device(self, device: Device) -> Self

Sets the Candle device to use for model execution.

pub fn with_seed(self, seed: u64) -> Self

Sets the random seed for deterministic model behavior.

pub fn with_seed_optional(self, seed: Option<u64>) -> Self

Sets the random seed if provided.

pub fn with_log(self, log: String) -> Self

Sets the logging configuration.

pub fn with_log_optional(self, log: Option<String>) -> Self

Sets the logging configuration if provided.

pub fn with_truncate_sequence(self, truncate_sequence: bool) -> Self

Sets whether to truncate sequences that exceed the maximum model length.

pub fn with_model(self, model: ModelSelected) -> Self

Sets the model to be used.

pub fn with_model_config(self, model_config: ModelConfig) -> Self

Add a model to the multi-model configuration.

pub fn with_model_configs(self, model_configs: Vec<ModelConfig>) -> Self

Add multiple models to the multi-model configuration.

pub fn with_default_model_id(self, default_model_id: String) -> Self

Set the default model ID to use when none is specified in requests.

pub fn add_model_config(self, config: ModelConfig) -> Self

Add a model configuration.

pub fn add_model(self, model_id: String, model: ModelSelected) -> Self

Add a model with just an ID and ModelSelected (convenience method).

pub fn with_max_seqs(self, max_seqs: usize) -> Self

Sets the maximum number of concurrent sequences.

pub fn with_no_kv_cache(self, no_kv_cache: bool) -> Self

Sets whether to disable the key-value cache.

pub fn with_chat_template(self, chat_template: String) -> Self

Sets the chat template configuration.

pub fn with_chat_template_optional(self, chat_template: Option<String>) -> Self

Sets the chat template configuration if provided.

pub fn with_jinja_explicit(self, jinja_explicit: String) -> Self

Sets an explicit JINJA chat template file.

pub fn with_jinja_explicit_optional( self, jinja_explicit: Option<String>, ) -> Self

Sets an explicit JINJA chat template file if provided.

pub fn with_token_source(self, token_source: TokenSource) -> Self

Sets the token source for authentication.

pub fn with_interactive_mode(self, interactive_mode: bool) -> Self

Sets whether to run in interactive mode.

pub fn with_prefix_cache_n(self, prefix_cache_n: usize) -> Self

Sets the number of prefix caches to hold on the device.

pub fn with_num_device_layers(self, num_device_layers: Vec<String>) -> Self

Sets the device layer mapping

pub fn with_num_device_layers_optional( self, num_device_layers: Option<Vec<String>>, ) -> Self

Sets the device layer mapping if provided.

pub fn with_in_situ_quant(self, in_situ_quant: String) -> Self

Sets the in-situ quantization method.

pub fn with_in_situ_quant_optional(self, in_situ_quant: Option<String>) -> Self

Sets the in-situ quantization method if provided.

pub fn set_paged_attn(self, paged_attn: Option<bool>) -> Self

Sets PagedAttention.

Unlike other with_PROP or with_PROP_optional methods, this method sets the value to whatever Option<bool> is passed in as None, Some(true) and Some(false) have different implications.

None: default behavior for target device (e.g. enable for CUDA, disable for Metal) Some(true): enable (if supported by target device) Some(false): disable

pub fn with_paged_attn_gpu_mem(self, paged_attn_gpu_mem: usize) -> Self

Sets the GPU memory allocation for PagedAttention KV cache.

pub fn with_paged_attn_gpu_mem_optional( self, paged_attn_gpu_mem: Option<usize>, ) -> Self

Sets the GPU memory allocation for PagedAttention KV cache if provided.

pub fn with_paged_attn_gpu_mem_usage( self, paged_attn_gpu_mem_usage: f32, ) -> Self

Sets the percentage of GPU memory to utilize for PagedAttention.

pub fn with_paged_attn_gpu_mem_usage_optional( self, paged_attn_gpu_mem_usage: Option<f32>, ) -> Self

Sets the percentage of GPU memory to utilize for PagedAttention if provided.

pub fn with_paged_ctxt_len(self, paged_ctxt_len: usize) -> Self

Sets the total context length for KV cache allocation.

pub fn with_paged_ctxt_len_optional(self, paged_ctxt_len: Option<usize>) -> Self

Sets the total context length for KV cache allocation if provided.

pub fn with_paged_attn_block_size(self, paged_attn_block_size: usize) -> Self

Sets the block size for PagedAttention.

pub fn with_paged_attn_cache_type(self, cache_type: PagedCacheType) -> Self

Sets the block size for PagedAttention.

pub fn with_paged_attn_block_size_optional( self, paged_attn_block_size: Option<usize>, ) -> Self

Sets the block size for PagedAttention if provided.

pub fn with_prompt_chunksize(self, prompt_chunksize: usize) -> Self

Sets the prompt chunking size.

pub fn with_prompt_chunksize_optional( self, prompt_chunksize: Option<usize>, ) -> Self

Sets the prompt chunking size if provided.

pub fn with_cpu(self, cpu: bool) -> Self

Sets whether to force CPU-only execution.

pub fn with_enable_search(self, enable_search: bool) -> Self

Sets whether to enable web search functionality.

pub fn with_search_bert_model(self, search_bert_model: String) -> Self

Sets the BERT model for web search assistance.

pub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self

Override the search function used when web_search_options is enabled.

pub fn with_mcp_config(self, mcp_config: McpClientConfig) -> Self

Sets the MCP client configuration.

pub fn with_mcp_config_optional( self, mcp_config: Option<McpClientConfig>, ) -> Self

Sets the MCP client configuration if provided.

pub async fn build(self) -> Result<SharedMistralRsState>

Builds the configured mistral.rs instance.

§Examples

use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;

let shared_mistralrs = MistralRsForServerBuilder::new()
    .with_model(model)
    .with_in_situ_quant("8".to_string())
    .set_paged_attn(Some(true))
    .build()
    .await?;

pub async fn build_multi_model(self) -> Result<SharedMistralRsState>

Build a multi-model instance

Trait Implementations§

impl Default for MistralRsForServerBuilder

fn default() -> Self

Creates a new builder with default configuration.

Auto Trait Implementations§

impl !UnwindSafe for MistralRsForServerBuilder

Blanket Implementations§

impl<T> Any for T
where T: 'static + ?Sized,

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

impl<T> AsAny for T
where T: Any,

fn as_any(&self) -> &(dyn Any + 'static)

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

fn type_name(&self) -> &'static str

Gets the type name of self

impl<T> Borrow<T> for T
where T: ?Sized,

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

impl<T> BorrowMut<T> for T
where T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

impl<T> Downcast for T
where T: AsAny + ?Sized,

fn is<T>(&self) -> bool
where T: AsAny,

Returns true if the boxed type is the same as T. Read more

fn downcast_ref<T>(&self) -> Option<&T>
where T: AsAny,

Forward to the method defined on the type Any.

fn downcast_mut<T>(&mut self) -> Option<&mut T>
where T: AsAny,

Forward to the method defined on the type Any.

impl<T> From<T> for T

fn from(t: T) -> T

Returns the argument unchanged.

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

impl<T, U> Into for T
where U: From<T>,

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more