mistralrs_core/search/
mod.rs1use std::collections::HashMap;
2
3pub mod rag;
4
5use anyhow::Result;
6use html2text::{config, render::PlainDecorator};
7use scraper::{Html, Selector};
8use serde::{Deserialize, Serialize};
9use serde_json::{json, Value};
10use std::env::consts::{ARCH, FAMILY, OS};
11
12use crate::{Function, Tool, ToolType, WebSearchOptions, WebSearchUserLocation};
13
14pub(crate) const SEARCH_TOOL_NAME: &str = "search_the_web";
15const APP_VERSION: &str = env!("CARGO_PKG_VERSION");
16const DESCRIPTION: &str = r#"This tool is used to search the web given a query. If you call this tool, then you MUST complete your answer using the output.
17You should expect output like this:
18{
19 "output": [
20 {
21 "title": "...",
22 "description": "...",
23 "url": "...",
24 "content": "...",
25 },
26 ...
27 ]
28}
29YOU SHOULD NOT CALL THE SEARCH TOOL CONSECUTIVELY!"#;
30
31#[derive(Debug, Serialize, Deserialize, Default)]
32pub struct SearchResult {
33 pub title: String,
34 pub description: String,
35 pub url: String,
36 pub content: String,
37}
38
39#[derive(Debug, Serialize, Deserialize)]
40pub struct SearchFunctionParameters {
41 pub query: String,
42}
43
44pub fn get_search_tool(web_search_options: &WebSearchOptions) -> Result<Tool> {
45 let parameters: HashMap<String, Value> = serde_json::from_value(json!({
46 "type": "object",
47 "properties": {
48 "query": {
49 "type": "string",
50 "description": "A query for web searching.",
51 },
52 },
53 "required": ["query"],
54 }))?;
55
56 let location_details = match &web_search_options.user_location {
57 Some(WebSearchUserLocation::Approximate { approximate }) => {
58 format!(
59 "\nThe user's location is: {}, {}, {}, {}.",
60 approximate.city, approximate.region, approximate.country, approximate.timezone
61 )
62 }
63 None => "".to_string(),
64 };
65
66 Ok(Tool {
67 tp: ToolType::Function,
68 function: Function {
69 description: Some(format!("{DESCRIPTION}{location_details}")),
70 name: SEARCH_TOOL_NAME.to_string(),
71 parameters: Some(parameters),
72 },
73 })
74}
75
76pub fn run_search_tool(params: &SearchFunctionParameters) -> Result<Vec<SearchResult>> {
77 let client = reqwest::blocking::Client::new();
78
79 let encoded_query = urlencoding::encode(¶ms.query);
80 let url = format!("https://html.duckduckgo.com/html/?q={}", encoded_query);
81
82 let user_agent = format!("mistralrs/{APP_VERSION} ({OS}; {ARCH}; {FAMILY})");
83 let response = client.get(&url).header("User-Agent", &user_agent).send()?;
84
85 if !response.status().is_success() {
87 anyhow::bail!("Failed to fetch search results: {}", response.status())
88 }
89
90 let html = response.text()?;
91
92 let document = Html::parse_document(&html);
93
94 let result_selector = Selector::parse(".result").unwrap();
95 let title_selector = Selector::parse(".result__title").unwrap();
96 let snippet_selector = Selector::parse(".result__snippet").unwrap();
97 let url_selector = Selector::parse(".result__url").unwrap();
98
99 let mut results = Vec::new();
100
101 for element in document.select(&result_selector) {
102 let title = element
103 .select(&title_selector)
104 .next()
105 .map(|e| e.text().collect::<String>().trim().to_string())
106 .unwrap_or_default();
107
108 let description = element
109 .select(&snippet_selector)
110 .next()
111 .map(|e| e.text().collect::<String>().trim().to_string())
112 .unwrap_or_default();
113
114 let mut url = element
115 .select(&url_selector)
116 .next()
117 .map(|e| e.text().collect::<String>().trim().to_string())
118 .unwrap_or_default();
119
120 if !title.is_empty() && !description.is_empty() && !url.is_empty() {
121 if !url.starts_with("http") {
122 url = format!("https://{}", url);
123 }
124
125 let content = match client.get(&url).header("User-Agent", &user_agent).send() {
126 Ok(response) => {
127 let html = response.text()?;
128
129 config::with_decorator(PlainDecorator::new())
130 .do_decorate()
131 .string_from_read(html.as_bytes(), 80)?
132 }
133 Err(_) => "".to_string(),
134 };
135
136 results.push(SearchResult {
137 title,
138 description,
139 url,
140 content,
141 });
142 }
143 }
144
145 Ok(results)
146}