pub trait Loader: Send + Sync {
// Required methods
fn load_model_from_hf(
&self,
revision: Option<String>,
token_source: TokenSource,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapMetadata,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>;
fn load_model_from_path(
&self,
paths: &Box<dyn ModelPaths>,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapMetadata,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>;
fn get_id(&self) -> String;
fn get_kind(&self) -> ModelKind;
}
Expand description
The Loader
trait abstracts the loading process. The primary entrypoint is the
load_model
method.
§Example
use mistralrs_core::{Loader, TokenSource, DeviceMapMetadata, ModelDType};
use candle_core::Device;
let loader: Box<dyn Loader> = todo!();
let pipeline = loader.load_model_from_hf(
None,
TokenSource::CacheToken,
&ModelDType::Auto,
&Device::cuda_if_available(0).unwrap(),
false,
DeviceMapMetadata::dummy(),
None,
None,
).unwrap();
Required Methods§
sourcefn load_model_from_hf(
&self,
revision: Option<String>,
token_source: TokenSource,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapMetadata,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
fn load_model_from_hf( &self, revision: Option<String>, token_source: TokenSource, dtype: &dyn TryIntoDType, device: &Device, silent: bool, mapper: DeviceMapMetadata, in_situ_quant: Option<IsqType>, paged_attn_config: Option<PagedAttentionConfig>, ) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
If revision
is None, then it defaults to main
.
If dtype
is None, then it defaults to the model default (usually BF16).
If model is not found on HF, will attempt to resolve locally.
sourcefn load_model_from_path(
&self,
paths: &Box<dyn ModelPaths>,
dtype: &dyn TryIntoDType,
device: &Device,
silent: bool,
mapper: DeviceMapMetadata,
in_situ_quant: Option<IsqType>,
paged_attn_config: Option<PagedAttentionConfig>,
) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
fn load_model_from_path( &self, paths: &Box<dyn ModelPaths>, dtype: &dyn TryIntoDType, device: &Device, silent: bool, mapper: DeviceMapMetadata, in_situ_quant: Option<IsqType>, paged_attn_config: Option<PagedAttentionConfig>, ) -> Result<Arc<Mutex<dyn Pipeline + Sync + Send>>, Error>
Load a model from the specified paths.
Also initializes DEBUG
.