mistralrs

1from .mistralrs import *
2
3__doc__ = mistralrs.__doc__
4if hasattr(mistralrs, "__all__"):
5    __all__ = mistralrs.__all__
class Runner:

An object wrapping the underlying Rust system to handle requests and process conversations.

def send_chat_completion_request(self, /, request):

Send an OpenAI API compatible request, returning the result.

def send_completion_request(self, /, request):

Send an OpenAI API compatible request, returning the result.

def generate_image(self, /, prompt, response_format, height=720, width=1280):

Generate an image.

def send_re_isq(self, /, dtype):

Send a request to re-ISQ the model. If the model was loaded as GGUF or GGML then nothing will happen.

def activate_adapters(self, /, adapter_names):

Send a request to make the specified adapters the active adapters for the model.

def tokenize_text(self, /, text, add_special_tokens):

Tokenize some text, returning raw tokens.

def detokenize_text(self, /, tokens, skip_special_tokens):

Detokenize some tokens, returning text.

class Which:
Plain = <class 'builtins.Which_Plain'>
XLora = <class 'builtins.Which_XLora'>
Lora = <class 'builtins.Which_Lora'>
GGUF = <class 'builtins.Which_GGUF'>
XLoraGGUF = <class 'builtins.Which_XLoraGGUF'>
LoraGGUF = <class 'builtins.Which_LoraGGUF'>
GGML = <class 'builtins.Which_GGML'>
XLoraGGML = <class 'builtins.Which_XLoraGGML'>
LoraGGML = <class 'builtins.Which_LoraGGML'>
VisionPlain = <class 'builtins.Which_VisionPlain'>
DiffusionPlain = <class 'builtins.Which_DiffusionPlain'>
class ChatCompletionRequest:

An OpenAI API compatible chat completion request.

class CompletionRequest:

An OpenAI API compatible completion request.

class Architecture:
class VisionArchitecture:
class DiffusionArchitecture:
class AnyMoeConfig:
class AnyMoeExpertType:
FineTuned = <class 'builtins.AnyMoeExpertType_FineTuned'>
LoraAdapter = <class 'builtins.AnyMoeExpertType_LoraAdapter'>
class ToolChoice:
NoTools = ToolChoice.NoTools
class ResponseMessage:

Chat completion response message.

tool_calls
content
role
class Delta:

Delta in content for streaming response.

role
content
tool_calls
class ResponseLogprob:

A logprob with the top logprobs for this token.

logprob
bytes
top_logprobs
token
class Logprobs:

Logprobs per token.

content
class Choice:

Chat completion choice.

logprobs
index
finish_reason
message
class ChunkChoice:

Chat completion streaming chunk choice.

finish_reason
index
logprobs
delta
class Usage:

OpenAI compatible (superset) usage during a request.

completion_tokens
prompt_tokens
total_completion_time_sec
total_prompt_time_sec
avg_tok_per_sec
avg_compl_tok_per_sec
avg_prompt_tok_per_sec
total_tokens
total_time_sec
class ChatCompletionResponse:

An OpenAI compatible chat completion response.

model
created
id
system_fingerprint
usage
object
choices
class ChatCompletionChunkResponse:

Chat completion streaming request chunk.

object
usage
created
model
choices
id
system_fingerprint
class CompletionChoice:

Completion request choice.

text
index
finish_reason
logprobs
class CompletionResponse:

An OpenAI compatible completion response.

choices
system_fingerprint
usage
created
id
object
model
class TopLogprob:

Top-n logprobs element

bytes
token
logprob
class ModelDType:

DType for the model.

If the model is quantized, this is ignored so it is reasonable to use the [Default] impl.

Note: When using Auto, fallback pattern is: BF16 -> F16 -> 32

class ImageGenerationResponseFormat:

Image generation response format