tune_model

POST

/v1/models/tune

const url = 'https://example.com/v1/models/tune';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"cpu":true,"dtype":"example","hf_revision":"example","max_batch_size":1,"max_image_length":1,"max_num_images":1,"max_seq_len":1,"model_id":"meta-llama/Llama-3.2-3B-Instruct","profile":"quality","requested_isq":"example","token_source":"example"}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}

curl --request POST \
  --url https://example.com/v1/models/tune \
  --header 'Content-Type: application/json' \
  --data '{ "cpu": true, "dtype": "example", "hf_revision": "example", "max_batch_size": 1, "max_image_length": 1, "max_num_images": 1, "max_seq_len": 1, "model_id": "meta-llama/Llama-3.2-3B-Instruct", "profile": "quality", "requested_isq": "example", "token_source": "example" }'

Request Body^required

application/json

object

cpu

Force CPU-only tuning

boolean | null

dtype

Optional model dtype (auto, f16, bf16, etc)

string | null

hf_revision

Optional HF revision

string | null

max_batch_size

Optional max batch size for tuning

integer | null

max_image_length

Optional max image length (multimodal)

integer | null

max_num_images

Optional max num images (multimodal)

integer | null

max_seq_len

Optional max sequence length for tuning

integer | null

model_id

required

string

Example

meta-llama/Llama-3.2-3B-Instruct

profile

One of:

null
string

null

requested_isq

Optional fixed ISQ level to test (e.g., Q4K)

string | null

token_source

Optional HF token source

string | null

Responses

200

Auto-tune result with recommended settings

500

Tuning failed

tune_model

Request Body required

Example

Responses

200

500

Request Body^required