pub struct GgufModelBuilder { /* private fields */ }Expand description
Configure a text GGUF model with the various parameters for loading, running, and other inference behaviors.
Implementations§
Source§impl GgufModelBuilder
 
impl GgufModelBuilder
Sourcepub fn new(model_id: impl ToString, files: Vec<impl ToString>) -> Self
 
pub fn new(model_id: impl ToString, files: Vec<impl ToString>) -> Self
A few defaults are applied here:
- Token source is from the cache (.cache/huggingface/token)
 - Maximum number of sequences running is 32
 - Number of sequences to hold in prefix cache is 16.
 - Automatic device mapping with model defaults according to 
AutoDeviceMapParams - By default, web searching compatible with the OpenAI 
web_search_optionssetting is disabled. 
Sourcepub fn with_search(self, search_bert_model: BertEmbeddingModel) -> Self
 
pub fn with_search(self, search_bert_model: BertEmbeddingModel) -> Self
Enable searching compatible with the OpenAI web_search_options setting. This uses the BERT model specified or the default.
Sourcepub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
 
pub fn with_search_callback(self, callback: Arc<SearchCallback>) -> Self
Override the search function used when web_search_options is enabled.
pub fn with_tool_callback( self, name: impl Into<String>, callback: Arc<ToolCallback>, ) -> Self
Sourcepub fn with_tool_callback_and_tool(
    self,
    name: impl Into<String>,
    callback: Arc<ToolCallback>,
    tool: Tool,
) -> Self
 
pub fn with_tool_callback_and_tool( self, name: impl Into<String>, callback: Arc<ToolCallback>, tool: Tool, ) -> Self
Register a callback with an associated Tool definition that will be automatically added to requests when tool callbacks are active.
Sourcepub fn with_throughput_logging(self) -> Self
 
pub fn with_throughput_logging(self) -> Self
Enable runner throughput logging.
Sourcepub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
 
pub fn with_jinja_explicit(self, jinja_explicit: String) -> Self
Explicit JINJA chat template file (.jinja) to be used. If specified, this overrides all other chat templates.
Sourcepub fn with_tok_model_id(self, tok_model_id: impl ToString) -> Self
 
pub fn with_tok_model_id(self, tok_model_id: impl ToString) -> Self
Source the tokenizer and chat template from this model ID (must contain tokenizer.json and tokenizer_config.json).
Sourcepub fn with_topology(self, topology: Topology) -> Self
 
pub fn with_topology(self, topology: Topology) -> Self
Set the model topology for use during loading. If there is an overlap, the topology type is used over the ISQ type.
Sourcepub fn with_chat_template(self, chat_template: impl ToString) -> Self
 
pub fn with_chat_template(self, chat_template: impl ToString) -> Self
Literal Jinja chat template OR Path (ending in .json) to one.
Sourcepub fn with_tokenizer_json(self, tokenizer_json: impl ToString) -> Self
 
pub fn with_tokenizer_json(self, tokenizer_json: impl ToString) -> Self
Path to a discrete tokenizer.json file.
Sourcepub fn with_force_cpu(self) -> Self
 
pub fn with_force_cpu(self) -> Self
Force usage of the CPU device. Do not use PagedAttention with this.
Sourcepub fn with_token_source(self, token_source: TokenSource) -> Self
 
pub fn with_token_source(self, token_source: TokenSource) -> Self
Source of the Hugging Face token.
Sourcepub fn with_hf_revision(self, revision: impl ToString) -> Self
 
pub fn with_hf_revision(self, revision: impl ToString) -> Self
Set the revision to use for a Hugging Face remote model.
Sourcepub fn with_paged_attn(
    self,
    paged_attn_cfg: impl FnOnce() -> Result<PagedAttentionConfig>,
) -> Result<Self>
 
pub fn with_paged_attn( self, paged_attn_cfg: impl FnOnce() -> Result<PagedAttentionConfig>, ) -> Result<Self>
Enable PagedAttention. Configure PagedAttention with a PagedAttentionConfig object, which
can be created with sensible values with a PagedAttentionMetaBuilder.
If PagedAttention is not supported (query with paged_attn_supported), this will do nothing.
Sourcepub fn with_max_num_seqs(self, max_num_seqs: usize) -> Self
 
pub fn with_max_num_seqs(self, max_num_seqs: usize) -> Self
Set the maximum number of sequences which can be run at once.
Sourcepub fn with_no_kv_cache(self) -> Self
 
pub fn with_no_kv_cache(self) -> Self
Disable KV cache. Trade performance for memory usage.
Sourcepub fn with_prefix_cache_n(self, n_seqs: Option<usize>) -> Self
 
pub fn with_prefix_cache_n(self, n_seqs: Option<usize>) -> Self
Set the number of sequences to hold in the prefix cache. Set to None to disable the prefix cacher.
Sourcepub fn with_logging(self) -> Self
 
pub fn with_logging(self) -> Self
Enable logging.
Sourcepub fn with_device_mapping(self, device_mapping: DeviceMapSetting) -> Self
 
pub fn with_device_mapping(self, device_mapping: DeviceMapSetting) -> Self
Provide metadata to initialize the device mapper.
Sourcepub fn with_device(self, device: Device) -> Self
 
pub fn with_device(self, device: Device) -> Self
Set the main device to load this model onto. Automatic device mapping will be performed starting with this device.
pub async fn build(self) -> Result<Model>
Auto Trait Implementations§
impl Freeze for GgufModelBuilder
impl !RefUnwindSafe for GgufModelBuilder
impl Send for GgufModelBuilder
impl Sync for GgufModelBuilder
impl Unpin for GgufModelBuilder
impl !UnwindSafe for GgufModelBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
 
impl<T> BorrowMut<T> for Twhere
    T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
 
fn borrow_mut(&mut self) -> &mut T
§impl<T> Downcast for Twhere
    T: AsAny + ?Sized,
 
impl<T> Downcast for Twhere
    T: AsAny + ?Sized,
§fn downcast_ref<T>(&self) -> Option<&T>where
    T: AsAny,
 
fn downcast_ref<T>(&self) -> Option<&T>where
    T: AsAny,
Any.§fn downcast_mut<T>(&mut self) -> Option<&mut T>where
    T: AsAny,
 
fn downcast_mut<T>(&mut self) -> Option<&mut T>where
    T: AsAny,
Any.§impl<T> Instrument for T
 
impl<T> Instrument for T
§fn instrument(self, span: Span) -> Instrumented<Self>
 
fn instrument(self, span: Span) -> Instrumented<Self>
§fn in_current_span(self) -> Instrumented<Self>
 
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
 
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
 
fn into_either(self, into_left: bool) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
 
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self> ⓘ
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more§impl<F, T> IntoSample<T> for Fwhere
    T: FromSample<F>,
 
impl<F, T> IntoSample<T> for Fwhere
    T: FromSample<F>,
fn into_sample(self) -> T
§impl<T> Pointable for T
 
impl<T> Pointable for T
§impl<T> PolicyExt for Twhere
    T: ?Sized,
 
impl<T> PolicyExt for Twhere
    T: ?Sized,
§impl<SS, SP> SupersetOf<SS> for SPwhere
    SS: SubsetOf<SP>,
 
impl<SS, SP> SupersetOf<SS> for SPwhere
    SS: SubsetOf<SP>,
§fn to_subset(&self) -> Option<SS>
 
fn to_subset(&self) -> Option<SS>
self from the equivalent element of its
superset. Read more§fn is_in_subset(&self) -> bool
 
fn is_in_subset(&self) -> bool
self is actually part of its subset T (and can be converted to it).§fn to_subset_unchecked(&self) -> SS
 
fn to_subset_unchecked(&self) -> SS
self.to_subset but without any property checks. Always succeeds.§fn from_subset(element: &SS) -> SP
 
fn from_subset(element: &SS) -> SP
self to the equivalent element of its superset.