Struct MistralRsForServerBuilder

Source
pub struct MistralRsForServerBuilder { /* private fields */ }
Expand description

A builder for creating a mistral.rs instance with configured options for the mistral.rs server.

§Examples

Basic usage:

use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;

let args = Args::parse();

let mistralrs = MistralRsForServerBuilder::new()
       .with_truncate_sequence(args.truncate_sequence)
       .with_model(args.model)
       .with_max_seqs(args.max_seqs)
       .with_no_kv_cache(args.no_kv_cache)
       .with_token_source(args.token_source)
       .with_interactive_mode(args.interactive_mode)
       .with_prefix_cache_n(args.prefix_cache_n)
       .with_paged_attn(args.paged_attn)
       .with_cpu(args.cpu)
       .with_enable_search(args.enable_search)
       .with_seed_optional(args.seed)
       .with_log_optional(args.log)
       .with_chat_template_optional(args.chat_template)
       .with_jinja_explicit_optional(args.jinja_explicit)
       .with_num_device_layers_optional(args.num_device_layers)
       .with_in_situ_quant_optional(args.in_situ_quant)
       .with_paged_attn_gpu_mem_optional(args.paged_attn_gpu_mem)
       .with_paged_attn_gpu_mem_usage_optional(args.paged_attn_gpu_mem_usage)
       .with_paged_ctxt_len_optional(args.paged_ctxt_len)
       .with_paged_attn_block_size_optional(args.paged_attn_block_size)
       .with_prompt_chunksize_optional(args.prompt_chunksize)
       .build()
       .await?;

Implementations§

Source§

impl MistralRsForServerBuilder

Source

pub fn new() -> Self

Creates a new MistralRsForServerBuilder with default settings.

This is equivalent to calling Default::default().

§Examples
use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;

let builder = mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder::new();
Source

pub fn with_device(self, device: Device) -> Self

Sets the Candle device to use for model execution.

Source

pub fn with_seed(self, seed: u64) -> Self

Sets the random seed for deterministic model behavior.

Source

pub fn with_seed_optional(self, seed: Option<u64>) -> Self

Sets the random seed if provided.

Source

pub fn with_log(self, log: String) -> Self

Sets the logging configuration.

Source

pub fn with_log_optional(self, log: Option<String>) -> Self

Sets the logging configuration if provided.

Source

pub fn with_truncate_sequence(self, truncate_sequence: bool) -> Self

Sets whether to truncate sequences that exceed the maximum model length.

Source

pub fn with_model(self, model: ModelSelected) -> Self

Sets the model to be used.

Source

pub fn with_model_config(self, model_config: ModelConfig) -> Self

Add a model to the multi-model configuration.

Source

pub fn with_model_configs(self, model_configs: Vec<ModelConfig>) -> Self

Add multiple models to the multi-model configuration.

Source

pub fn with_default_model_id(self, default_model_id: String) -> Self

Set the default model ID to use when none is specified in requests.

Source

pub fn add_model_config(self, config: ModelConfig) -> Self

Add a model configuration.

Source

pub fn add_model(self, model_id: String, model: ModelSelected) -> Self

Add a model with just an ID and ModelSelected (convenience method).

Source

pub fn with_max_seqs(self, max_seqs: usize) -> Self

Sets the maximum number of concurrent sequences.

Source

pub fn with_no_kv_cache(self, no_kv_cache: bool) -> Self

Sets whether to disable the key-value cache.

Source

pub fn with_chat_template(self, chat_template: String) -> Self

Sets the chat template configuration.

Source

pub fn with_chat_template_optional(self, chat_template: Option<String>) -> Self

Sets the chat template configuration if provided.

Source

pub fn with_jinja_explicit(self, jinja_explicit: String) -> Self

Sets an explicit JINJA chat template file.

Source

pub fn with_jinja_explicit_optional( self, jinja_explicit: Option<String>, ) -> Self

Sets an explicit JINJA chat template file if provided.

Source

pub fn with_token_source(self, token_source: TokenSource) -> Self

Sets the token source for authentication.

Source

pub fn with_interactive_mode(self, interactive_mode: bool) -> Self

Sets whether to run in interactive mode.

Source

pub fn with_prefix_cache_n(self, prefix_cache_n: usize) -> Self

Sets the number of prefix caches to hold on the device.

Source

pub fn with_num_device_layers(self, num_device_layers: Vec<String>) -> Self

Sets the device layer mapping

Source

pub fn with_num_device_layers_optional( self, num_device_layers: Option<Vec<String>>, ) -> Self

Sets the device layer mapping if provided.

Source

pub fn with_in_situ_quant(self, in_situ_quant: String) -> Self

Sets the in-situ quantization method.

Source

pub fn with_in_situ_quant_optional(self, in_situ_quant: Option<String>) -> Self

Sets the in-situ quantization method if provided.

Source

pub fn set_paged_attn(self, paged_attn: Option<bool>) -> Self

Sets PagedAttention.

Unlike other with_PROP or with_PROP_optional methods, this method sets the value to whatever Option<bool> is passed in as None, Some(true) and Some(false) have different implications.

None: default behavior for target device (e.g. enable for CUDA, disable for Metal) Some(true): enable (if supported by target device) Some(false): disable

Source

pub fn with_paged_attn_gpu_mem(self, paged_attn_gpu_mem: usize) -> Self

Sets the GPU memory allocation for PagedAttention KV cache.

Source

pub fn with_paged_attn_gpu_mem_optional( self, paged_attn_gpu_mem: Option<usize>, ) -> Self

Sets the GPU memory allocation for PagedAttention KV cache if provided.

Source

pub fn with_paged_attn_gpu_mem_usage( self, paged_attn_gpu_mem_usage: f32, ) -> Self

Sets the percentage of GPU memory to utilize for PagedAttention.

Source

pub fn with_paged_attn_gpu_mem_usage_optional( self, paged_attn_gpu_mem_usage: Option<f32>, ) -> Self

Sets the percentage of GPU memory to utilize for PagedAttention if provided.

Source

pub fn with_paged_ctxt_len(self, paged_ctxt_len: usize) -> Self

Sets the total context length for KV cache allocation.

Source

pub fn with_paged_ctxt_len_optional(self, paged_ctxt_len: Option<usize>) -> Self

Sets the total context length for KV cache allocation if provided.

Source

pub fn with_paged_attn_block_size(self, paged_attn_block_size: usize) -> Self

Sets the block size for PagedAttention.

Source

pub fn with_paged_attn_cache_type(self, cache_type: PagedCacheType) -> Self

Sets the block size for PagedAttention.

Source

pub fn with_paged_attn_block_size_optional( self, paged_attn_block_size: Option<usize>, ) -> Self

Sets the block size for PagedAttention if provided.

Source

pub fn with_prompt_chunksize(self, prompt_chunksize: usize) -> Self

Sets the prompt chunking size.

Source

pub fn with_prompt_chunksize_optional( self, prompt_chunksize: Option<usize>, ) -> Self

Sets the prompt chunking size if provided.

Source

pub fn with_cpu(self, cpu: bool) -> Self

Sets whether to force CPU-only execution.

Sets whether to enable web search functionality.

Source

pub fn with_search_bert_model(self, search_bert_model: String) -> Self

Sets the BERT model for web search assistance.

Source

pub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self

Override the search function used when web_search_options is enabled.

Source

pub fn with_mcp_config(self, mcp_config: McpClientConfig) -> Self

Sets the MCP client configuration.

Source

pub fn with_mcp_config_optional( self, mcp_config: Option<McpClientConfig>, ) -> Self

Sets the MCP client configuration if provided.

Source

pub async fn build(self) -> Result<SharedMistralRsState>

Builds the configured mistral.rs instance.

§Examples
use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;

let shared_mistralrs = MistralRsForServerBuilder::new()
    .with_model(model)
    .with_in_situ_quant("8".to_string())
    .set_paged_attn(Some(true))
    .build()
    .await?;
Source

pub async fn build_multi_model(self) -> Result<SharedMistralRsState>

Build a multi-model instance

Trait Implementations§

Source§

impl Default for MistralRsForServerBuilder

Source§

fn default() -> Self

Creates a new builder with default configuration.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
§

impl<T> AsAny for T
where T: Any,

§

fn as_any(&self) -> &(dyn Any + 'static)

§

fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)

§

fn type_name(&self) -> &'static str

Gets the type name of self
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
§

impl<T> Downcast for T
where T: AsAny + ?Sized,

§

fn is<T>(&self) -> bool
where T: AsAny,

Returns true if the boxed type is the same as T. Read more
§

fn downcast_ref<T>(&self) -> Option<&T>
where T: AsAny,

Forward to the method defined on the type Any.
§

fn downcast_mut<T>(&mut self) -> Option<&mut T>
where T: AsAny,

Forward to the method defined on the type Any.
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
§

impl<F, T> IntoSample<T> for F
where T: FromSample<F>,

§

fn into_sample(self) -> T

§

impl<T> Pointable for T

§

const ALIGN: usize

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
§

impl<T> PolicyExt for T
where T: ?Sized,

§

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns [Action::Follow] only if self and other return Action::Follow. Read more
§

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

Create a new Policy that returns [Action::Follow] if either self or other returns Action::Follow. Read more
Source§

impl<R, P> ReadPrimitive<R> for P
where R: Read + ReadEndian<P>, P: Default,

Source§

fn read_from_little_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_little_endian().
Source§

fn read_from_big_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_big_endian().
Source§

fn read_from_native_endian(read: &mut R) -> Result<Self, Error>

Read this value from the supplied reader. Same as ReadEndian::read_from_native_endian().
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

impl<T> ErasedDestructor for T
where T: 'static,

§

impl<T> ErasedDestructor for T
where T: 'static,

§

impl<T> Ungil for T
where T: Send,