mistralrs_core/search/
mod.rs

1use std::collections::HashMap;
2
3pub mod rag;
4
5use anyhow::Result;
6use html2text::{config, render::PlainDecorator};
7use scraper::{Html, Selector};
8use serde::{Deserialize, Serialize};
9use serde_json::{json, Value};
10use std::env::consts::{ARCH, FAMILY, OS};
11
12use crate::{Function, Tool, ToolType, WebSearchOptions, WebSearchUserLocation};
13
14pub(crate) const SEARCH_TOOL_NAME: &str = "search_the_web";
15const APP_VERSION: &str = env!("CARGO_PKG_VERSION");
16const DESCRIPTION: &str = r#"This tool is used to search the web given a query. If you call this tool, then you MUST complete your answer using the output.
17You should expect output like this:
18{
19    "output": [
20        {
21            "title": "...",
22            "description": "...",
23            "url": "...",
24            "content": "...",
25        },
26        ...
27    ]
28}
29YOU SHOULD NOT CALL THE SEARCH TOOL CONSECUTIVELY!"#;
30
31#[derive(Debug, Serialize, Deserialize, Default)]
32pub struct SearchResult {
33    pub title: String,
34    pub description: String,
35    pub url: String,
36    pub content: String,
37}
38
39#[derive(Debug, Serialize, Deserialize)]
40pub struct SearchFunctionParameters {
41    pub query: String,
42}
43
44pub fn get_search_tool(web_search_options: &WebSearchOptions) -> Result<Tool> {
45    let parameters: HashMap<String, Value> = serde_json::from_value(json!({
46        "type": "object",
47        "properties": {
48            "query": {
49                "type": "string",
50                "description": "A query for web searching.",
51            },
52        },
53        "required": ["query"],
54    }))?;
55
56    let location_details = match &web_search_options.user_location {
57        Some(WebSearchUserLocation::Approximate { approximate }) => {
58            format!(
59                "\nThe user's location is: {}, {}, {}, {}.",
60                approximate.city, approximate.region, approximate.country, approximate.timezone
61            )
62        }
63        None => "".to_string(),
64    };
65
66    Ok(Tool {
67        tp: ToolType::Function,
68        function: Function {
69            description: Some(format!("{DESCRIPTION}{location_details}")),
70            name: SEARCH_TOOL_NAME.to_string(),
71            parameters: Some(parameters),
72        },
73    })
74}
75
76pub fn run_search_tool(params: &SearchFunctionParameters) -> Result<Vec<SearchResult>> {
77    let client = reqwest::blocking::Client::new();
78
79    let encoded_query = urlencoding::encode(&params.query);
80    let url = format!("https://html.duckduckgo.com/html/?q={}", encoded_query);
81
82    let user_agent = format!("mistralrs/{APP_VERSION} ({OS}; {ARCH}; {FAMILY})");
83    let response = client.get(&url).header("User-Agent", &user_agent).send()?;
84
85    // Check the response status
86    if !response.status().is_success() {
87        anyhow::bail!("Failed to fetch search results: {}", response.status())
88    }
89
90    let html = response.text()?;
91
92    let document = Html::parse_document(&html);
93
94    let result_selector = Selector::parse(".result").unwrap();
95    let title_selector = Selector::parse(".result__title").unwrap();
96    let snippet_selector = Selector::parse(".result__snippet").unwrap();
97    let url_selector = Selector::parse(".result__url").unwrap();
98
99    let mut results = Vec::new();
100
101    for element in document.select(&result_selector) {
102        let title = element
103            .select(&title_selector)
104            .next()
105            .map(|e| e.text().collect::<String>().trim().to_string())
106            .unwrap_or_default();
107
108        let description = element
109            .select(&snippet_selector)
110            .next()
111            .map(|e| e.text().collect::<String>().trim().to_string())
112            .unwrap_or_default();
113
114        let mut url = element
115            .select(&url_selector)
116            .next()
117            .map(|e| e.text().collect::<String>().trim().to_string())
118            .unwrap_or_default();
119
120        if !title.is_empty() && !description.is_empty() && !url.is_empty() {
121            if !url.starts_with("http") {
122                url = format!("https://{}", url);
123            }
124
125            let content = match client.get(&url).header("User-Agent", &user_agent).send() {
126                Ok(response) => {
127                    let html = response.text()?;
128
129                    config::with_decorator(PlainDecorator::new())
130                        .do_decorate()
131                        .string_from_read(html.as_bytes(), 80)?
132                }
133                Err(_) => "".to_string(),
134            };
135
136            results.push(SearchResult {
137                title,
138                description,
139                url,
140                content,
141            });
142        }
143    }
144
145    Ok(results)
146}