mistralrs::llguidance::toktrie

Trait TokenizerEnv

pub trait TokenizerEnv: Send {
    // Required methods
    fn tok_trie(&self) -> &TokTrie;
    fn tokenize_bytes(&self, s: &[u8]) -> Vec<u32>;

    // Provided methods
    fn tokenize_bytes_marker(&self, s: &[u8]) -> Vec<u32> { ... }
    fn tokenize(&self, s: &str) -> Vec<u32> { ... }
    fn tokenize_special(&self, s: &str) -> Vec<u32> { ... }
    fn eos_token(&self) -> u32 { ... }
    fn tokenize_is_canonical(&self) -> bool { ... }
}

Required Methods§

fn tok_trie(&self) -> &TokTrie

Associated trie.

fn tokenize_bytes(&self, s: &[u8]) -> Vec<u32>

Tokenize a given byte sequence. It may or may not interpret <|special_tokens|> as special.

Provided Methods§

fn tokenize_bytes_marker(&self, s: &[u8]) -> Vec<u32>

Tokenize a given byte sequence. It will interpret text starting with SPECIAL_TOKEN_MARKER as special tokens.

fn tokenize(&self, s: &str) -> Vec<u32>

Tokenize a string coming from user. It may or may not interpret <|special_tokens|> as special.

fn tokenize_special(&self, s: &str) -> Vec<u32>

Tokenize a string. It will interpret <|special_tokens|> as special.

fn eos_token(&self) -> u32

End of sentence token

fn tokenize_is_canonical(&self) -> bool

If this returns true, this tokenizer always returns canonical tokenizations and can be used for forcing tokens. Non-canonical tokenizers will typically just use TokTrie::greedy_tokenize().

Implementations on Foreign Types§

§

impl TokenizerEnv for ByteTokenizerEnv

§

fn tok_trie(&self) -> &TokTrie

§

fn tokenize_bytes(&self, s: &[u8]) -> Vec<u32>

Implementors§