Advanced MCP Usage

This guide covers advanced MCP client configurations and usage patterns.

Multi-Server Configuration

Connect to multiple MCP servers simultaneously to access different tool sets:

#![allow(unused)]
fn main() {
let mcp_config = McpClientConfig {
    servers: vec![
        // Hugging Face for ML tools
        McpServerConfig {
            id: "hf_server".to_string(),
            name: "Hugging Face MCP".to_string(),
            source: McpServerSource::Http {
                url: "https://hf.co/mcp".to_string(),
                timeout_secs: Some(30),
                headers: None,
            },
            enabled: true,
            tool_prefix: Some("hf".to_string()),
            resources: None,
            bearer_token: Some("hf_xxx".to_string()),
        },
        // Local filesystem access
        McpServerConfig {
            id: "fs_server".to_string(),
            name: "Filesystem MCP".to_string(),
            source: McpServerSource::Process {
                command: "mcp-server-filesystem".to_string(),
                args: vec!["--root".to_string(), "/data".to_string()],
                work_dir: None,
                env: None,
            },
            enabled: true,
            tool_prefix: Some("fs".to_string()),
            resources: Some(vec!["file://**".to_string()]),
            bearer_token: None,
        },
        // GitHub API access
        McpServerConfig {
            id: "github_server".to_string(),
            name: "GitHub MCP".to_string(),
            source: McpServerSource::Http {
                url: "https://api.github.com/mcp".to_string(),
                timeout_secs: Some(45),
                headers: Some(HashMap::from([
                    ("Accept".to_string(), "application/vnd.github.v3+json".to_string()),
                ])),
            },
            enabled: true,
            tool_prefix: Some("gh".to_string()),
            resources: None,
            bearer_token: Some("ghp_xxx".to_string()),
        },
    ],
    auto_register_tools: true,
    tool_timeout_secs: Some(30),
    max_concurrent_calls: Some(10),
};
}

Tool Prefixing Strategy

When using multiple servers, tool prefixes prevent naming conflicts:

{
  "servers": [
    {
      "id": "server1",
      "tool_prefix": "s1",
      // Tool "search" becomes "s1_search"
    },
    {
      "id": "server2", 
      "tool_prefix": "s2",
      // Tool "search" becomes "s2_search"
    }
  ]
}

Custom Headers and Authentication

API Key in Headers

#![allow(unused)]
fn main() {
let mut headers = HashMap::new();
headers.insert("X-API-Key".to_string(), "your-api-key".to_string());
headers.insert("X-Client-Version".to_string(), "1.0.0".to_string());

McpServerSource::Http {
    url: "https://api.example.com/mcp".to_string(),
    timeout_secs: Some(30),
    headers: Some(headers),
}
}

OAuth2 Bearer Token

#![allow(unused)]
fn main() {
McpServerConfig {
    // ...
    bearer_token: Some("your-oauth2-token".to_string()),
    // Automatically added as: Authorization: Bearer your-oauth2-token
}
}

Resource Subscriptions

Subscribe to specific resource patterns from MCP servers:

#![allow(unused)]
fn main() {
McpServerConfig {
    id: "data_server".to_string(),
    // ...
    resources: Some(vec![
        "file://data/**/*.json".to_string(),  // All JSON files in data/
        "db://users/*".to_string(),            // All user records
        "api://v1/metrics".to_string(),        // Specific API endpoint
    ]),
    // ...
}
}

Concurrency and Rate Limiting

Global Concurrency Control

#![allow(unused)]
fn main() {
McpClientConfig {
    // ...
    max_concurrent_calls: Some(5),  // Max 5 tools executing simultaneously
}
}

Per-Tool Timeouts

#![allow(unused)]
fn main() {
McpClientConfig {
    // ...
    tool_timeout_secs: Some(30),  // Each tool call times out after 30s
}
}

Custom Rate Limiting

# Python example with custom rate limiting
import time
from collections import deque

class RateLimitedMcpRunner:
    def __init__(self, runner, max_calls_per_minute=60):
        self.runner = runner
        self.max_calls = max_calls_per_minute
        self.call_times = deque()
    
    def send_chat_completion_request(self, request):
        # Remove calls older than 1 minute
        now = time.time()
        while self.call_times and self.call_times[0] < now - 60:
            self.call_times.popleft()
        
        # Check rate limit
        if len(self.call_times) >= self.max_calls:
            sleep_time = 60 - (now - self.call_times[0])
            time.sleep(sleep_time)
        
        # Make the call
        self.call_times.append(now)
        return self.runner.send_chat_completion_request(request)

Environment-Specific Configuration

Development vs Production

#![allow(unused)]
fn main() {
let mcp_config = if cfg!(debug_assertions) {
    McpClientConfig {
        servers: vec![/* development servers */],
        tool_timeout_secs: Some(60),  // Longer timeouts for debugging
        max_concurrent_calls: Some(1), // Sequential execution for debugging
        // ...
    }
} else {
    McpClientConfig {
        servers: vec![/* production servers */],
        tool_timeout_secs: Some(10),   // Strict timeouts
        max_concurrent_calls: Some(20), // Higher concurrency
        // ...
    }
};
}

Environment Variables

#![allow(unused)]
fn main() {
let mcp_config = McpClientConfig {
    servers: vec![
        McpServerConfig {
            // ...
            bearer_token: std::env::var("HF_TOKEN").ok(),
            source: McpServerSource::Http {
                url: std::env::var("MCP_SERVER_URL")
                    .unwrap_or_else(|_| "https://hf.co/mcp".to_string()),
                // ...
            },
            // ...
        },
    ],
    // ...
};
}

Error Handling and Fallbacks

Graceful Degradation

#![allow(unused)]
fn main() {
let mcp_config = McpClientConfig {
    servers: vec![
        // Primary server
        McpServerConfig {
            id: "primary".to_string(),
            enabled: true,
            // ...
        },
        // Fallback server
        McpServerConfig {
            id: "fallback".to_string(),
            enabled: check_primary_health().is_err(),
            // ...
        },
    ],
    // ...
};
}

Tool-Specific Error Handling

# Handle specific tool errors
try:
    response = runner.send_chat_completion_request(request)
except Exception as e:
    if "tool_timeout" in str(e):
        print("Tool execution timed out, trying with longer timeout...")
        # Retry with extended timeout
    elif "tool_not_found" in str(e):
        print("Tool not available, falling back to built-in response...")
        # Fallback logic

Monitoring and Debugging

Enable Debug Logging

#![allow(unused)]
fn main() {
std::env::set_var("RUST_LOG", "mistralrs_mcp=debug");
env_logger::init();
}

Tool Call Inspection

#![allow(unused)]
fn main() {
let response = model.send_chat_request(messages).await?;

// Check if tools were called
if let Some(tool_calls) = &response.choices[0].message.tool_calls {
    for call in tool_calls {
        println!("Tool: {}", call.function.name);
        println!("Args: {}", call.function.arguments);
        println!("ID: {}", call.id);
    }
}
}

Performance Optimization

Connection Pooling

HTTP and WebSocket transports automatically use connection pooling. Configure pool size:

#![allow(unused)]
fn main() {
// Set via environment variable
std::env::set_var("MCP_POOL_SIZE", "10");
}

Caching Tool Responses

from functools import lru_cache
import json

@lru_cache(maxsize=100)
def cached_tool_call(tool_name, args_json):
    args = json.loads(args_json)
    # Tool execution logic
    return result

# Use with MCP tools that have deterministic outputs

Security Best Practices

Token Rotation: Implement automatic token refresh for long-running applications
Least Privilege: Only enable required tools and resources
Audit Logging: Log all tool calls for security monitoring
Network Isolation: Use Process transport for sensitive local operations
Input Validation: MCP servers should validate all tool inputs

Keyboard shortcuts

mistral.rs Documentation