Switched to DDG
This commit is contained in:
186
tools/search.go
186
tools/search.go
@@ -1,12 +1,20 @@
|
||||
package tools
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
const (
|
||||
maxRetries = 3
|
||||
timeout = 60 * time.Second
|
||||
)
|
||||
|
||||
// SearchResult represents a single search result
|
||||
@@ -16,66 +24,152 @@ type SearchResult struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
// SearchResponse represents the SearXNG API response
|
||||
type SearchResponse struct {
|
||||
Results []SearchResult `json:"results"`
|
||||
}
|
||||
// WebSearch performs a web search using DuckDuckGo
|
||||
func WebSearch(query string) (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
// WebSearch performs a web search using SearXNG
|
||||
func WebSearch(searxngURL, query string) (string, error) {
|
||||
// Build the search URL
|
||||
searchURL := fmt.Sprintf("%s/search", strings.TrimSuffix(searxngURL, "/"))
|
||||
// Create HTTP client with proxy support
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
}
|
||||
|
||||
// Create URL with query parameters
|
||||
params := url.Values{}
|
||||
params.Add("q", query)
|
||||
params.Add("format", "json")
|
||||
params.Add("language", "en")
|
||||
client := &http.Client{
|
||||
Timeout: timeout,
|
||||
Transport: transport,
|
||||
}
|
||||
|
||||
fullURL := fmt.Sprintf("%s?%s", searchURL, params.Encode())
|
||||
|
||||
// Make the request
|
||||
resp, err := http.Get(fullURL)
|
||||
// Perform search with retry logic
|
||||
results, err := searchDuckDuckGo(ctx, client, query)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to perform search: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
return "", fmt.Errorf("search request failed with status %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
var searchResp SearchResponse
|
||||
if err := json.Unmarshal(body, &searchResp); err != nil {
|
||||
return "", fmt.Errorf("failed to parse search results: %w", err)
|
||||
return "", fmt.Errorf("DuckDuckGo search failed: %w", err)
|
||||
}
|
||||
|
||||
// Format results as text
|
||||
if len(searchResp.Results) == 0 {
|
||||
if len(results) == 0 {
|
||||
return "No results found.", nil
|
||||
}
|
||||
|
||||
var results strings.Builder
|
||||
results.WriteString(fmt.Sprintf("Found %d results:\n\n", len(searchResp.Results)))
|
||||
var output strings.Builder
|
||||
fmt.Fprintf(&output, "Found %d results:\n\n", len(results))
|
||||
|
||||
for i, result := range searchResp.Results {
|
||||
for i, result := range results {
|
||||
if i >= 10 { // Limit to top 10 results
|
||||
break
|
||||
}
|
||||
results.WriteString(fmt.Sprintf("%d. %s\n", i+1, result.Title))
|
||||
results.WriteString(fmt.Sprintf(" URL: %s\n", result.URL))
|
||||
fmt.Fprintf(&output, "%d. %s\n", i+1, result.Title)
|
||||
fmt.Fprintf(&output, " URL: %s\n", result.URL)
|
||||
if result.Content != "" {
|
||||
results.WriteString(fmt.Sprintf(" %s\n", result.Content))
|
||||
fmt.Fprintf(&output, " %s\n", result.Content)
|
||||
}
|
||||
results.WriteString("\n")
|
||||
fmt.Fprintf(&output, "\n")
|
||||
}
|
||||
|
||||
return results.String(), nil
|
||||
return output.String(), nil
|
||||
}
|
||||
|
||||
// searchDuckDuckGo performs the actual DuckDuckGo search
|
||||
func searchDuckDuckGo(ctx context.Context, client *http.Client, query string) ([]SearchResult, error) {
|
||||
// Build DuckDuckGo search URL
|
||||
searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query))
|
||||
|
||||
// Create request with proper headers
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create search request: %w", err)
|
||||
}
|
||||
|
||||
// Set browser-like user agent
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
|
||||
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
|
||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||
|
||||
// Execute request with retry logic
|
||||
resp, err := executeWithRetry(ctx, client, req, "search DuckDuckGo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Check response status
|
||||
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
|
||||
return nil, fmt.Errorf("search request failed with status: %d %s", resp.StatusCode, resp.Status)
|
||||
}
|
||||
|
||||
// Parse HTML response
|
||||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse search results: %w", err)
|
||||
}
|
||||
|
||||
// Extract search results
|
||||
var results []SearchResult
|
||||
doc.Find(".web-result").Each(func(i int, s *goquery.Selection) {
|
||||
titleNode := s.Find(".result__a")
|
||||
title := strings.TrimSpace(titleNode.Text())
|
||||
info := strings.TrimSpace(s.Find(".result__snippet").Text())
|
||||
|
||||
// Extract URL
|
||||
var resultURL string
|
||||
if titleNode.Length() > 0 {
|
||||
if href, exists := titleNode.Attr("href"); exists {
|
||||
// Validate URL format
|
||||
if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") {
|
||||
resultURL = href
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if title != "" && resultURL != "" {
|
||||
results = append(results, SearchResult{
|
||||
Title: title,
|
||||
URL: resultURL,
|
||||
Content: info,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// executeWithRetry executes an HTTP request with exponential backoff retry logic
|
||||
func executeWithRetry(ctx context.Context, client *http.Client, req *http.Request, operation string) (*http.Response, error) {
|
||||
var resp *http.Response
|
||||
var err error
|
||||
|
||||
// Retry logic with exponential backoff
|
||||
for attempt := 0; attempt <= maxRetries; attempt++ {
|
||||
resp, err = client.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// Don't retry on the last attempt
|
||||
if attempt == maxRetries {
|
||||
break
|
||||
}
|
||||
|
||||
// Exponential backoff: 1s, 2s, 4s
|
||||
backoffDuration := time.Duration(1<<uint(attempt)) * time.Second
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, fmt.Errorf("%s cancelled: %v", operation, ctx.Err())
|
||||
case <-time.After(backoffDuration):
|
||||
// Continue to next retry
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("failed to %s after %d retries: %v", operation, maxRetries, err)
|
||||
}
|
||||
|
||||
// logProxyConfiguration logs the proxy configuration for debugging
|
||||
func init() {
|
||||
if httpsProxy := os.Getenv("HTTPS_PROXY"); httpsProxy != "" {
|
||||
fmt.Printf("Using HTTPS_PROXY: %s\n", httpsProxy)
|
||||
} else if httpProxy := os.Getenv("HTTP_PROXY"); httpProxy != "" {
|
||||
fmt.Printf("Using HTTP_PROXY: %s\n", httpProxy)
|
||||
} else if allProxy := os.Getenv("ALL_PROXY"); allProxy != "" {
|
||||
fmt.Printf("Using ALL_PROXY: %s\n", allProxy)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user