llama4
Runnable Python SDK example llama4.
from mistralrs import Runner, Which, ChatCompletionRequest, MultimodalArchitecture
runner = Runner( which=Which.MultimodalPlain( model_id="meta-llama/Llama-4-Scout-17B-16E-Instruct", arch=MultimodalArchitecture.Llama4, ), in_situ_quant="Q4K",)
res = runner.send_chat_completion_request( ChatCompletionRequest( model="default", messages=[ { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": "https://www.nhmagazine.com/content/uploads/2019/05/mtwashingtonFranconia-2-19-18-108-Edit-Edit.jpg" }, }, { "type": "text", "text": "What is this?", }, ], } ], max_tokens=256, presence_penalty=1.0, top_p=0.1, temperature=0.1, ))print(res.choices[0].message.content)print(res.usage)Source: examples/python/llama4.py