pub trait Loader: Send + Sync {
// Required methods
fn load_model_from_hf(
&self,
revision: Option<String>,
token_source: TokenSource,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapSetting,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>;
fn load_model_from_path(
&self,
paths: &Box<dyn ModelPaths>,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapSetting,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>;
fn get_id(&self) -> String;
fn get_kind(&self) -> ModelKind;
}Expand description
The Loader trait abstracts the loading process. The primary entrypoint is the
load_model method.
§Example
use mistralrs_core::{Loader, TokenSource, DeviceMapSetting, AutoDeviceMapParams, ModelDType};
use candle_core::Device;
let loader: Box<dyn Loader> = todo!();
let pipeline = loader.load_model_from_hf(
None,
TokenSource::CacheToken,
&ModelDType::Auto,
&Device::cuda_if_available(0).unwrap(),
false,
DeviceMapSetting::Auto(AutoDeviceMapParams::default_text()),
None,
None,
).unwrap();Required Methods§
Sourcefn load_model_from_hf(
&self,
revision: Option<String>,
token_source: TokenSource,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapSetting,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
fn load_model_from_hf( &self, revision: Option<String>, token_source: TokenSource, dtype: &dyn TryIntoDType, device: &Device, silent: bool, mapper: DeviceMapSetting, in_situ_quant: Option<IsqType>, paged_attn_config: Option<PagedAttentionConfig>, ) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
If revision is None, then it defaults to main.
If dtype is None, then it defaults to the model default (usually BF16).
If model is not found on HF, will attempt to resolve locally.
Sourcefn load_model_from_path(
&self,
paths: &Box<dyn ModelPaths>,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapSetting,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
fn load_model_from_path( &self, paths: &Box<dyn ModelPaths>, dtype: &dyn TryIntoDType, device: &Device, silent: bool, mapper: DeviceMapSetting, in_situ_quant: Option<IsqType>, paged_attn_config: Option<PagedAttentionConfig>, ) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
Load a model from the specified paths.
Also initializes DEBUG.