Switched to startpage
This commit is contained in:
103
tools/search.go
103
tools/search.go
@@ -24,7 +24,7 @@ type SearchResult struct {
|
|||||||
Content string `json:"content"`
|
Content string `json:"content"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// WebSearch performs a web search using DuckDuckGo
|
// WebSearch performs a web search using Startpage
|
||||||
func WebSearch(query string) (string, error) {
|
func WebSearch(query string) (string, error) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
@@ -40,9 +40,9 @@ func WebSearch(query string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Perform search with retry logic
|
// Perform search with retry logic
|
||||||
results, err := searchDuckDuckGo(ctx, client, query)
|
results, err := searchStartpage(ctx, client, query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("DuckDuckGo search failed: %w", err)
|
return "", fmt.Errorf("Startpage search failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format results as text
|
// Format results as text
|
||||||
@@ -68,10 +68,10 @@ func WebSearch(query string) (string, error) {
|
|||||||
return output.String(), nil
|
return output.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// searchDuckDuckGo performs the actual DuckDuckGo search
|
// searchStartpage performs the actual Startpage search
|
||||||
func searchDuckDuckGo(ctx context.Context, client *http.Client, query string) ([]SearchResult, error) {
|
func searchStartpage(ctx context.Context, client *http.Client, query string) ([]SearchResult, error) {
|
||||||
// Build DuckDuckGo search URL
|
// Build Startpage search URL
|
||||||
searchURL := fmt.Sprintf("https://html.duckduckgo.com/html/?q=%s", url.QueryEscape(query))
|
searchURL := fmt.Sprintf("https://www.startpage.com/sp/search?query=%s", url.QueryEscape(query))
|
||||||
|
|
||||||
// Create request with proper headers
|
// Create request with proper headers
|
||||||
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
|
||||||
@@ -85,7 +85,7 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string) ([
|
|||||||
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
|
||||||
|
|
||||||
// Execute request with retry logic
|
// Execute request with retry logic
|
||||||
resp, err := executeWithRetry(ctx, client, req, "search DuckDuckGo")
|
resp, err := executeWithRetry(ctx, client, req, "search Startpage")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -102,31 +102,78 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string) ([
|
|||||||
return nil, fmt.Errorf("failed to parse search results: %w", err)
|
return nil, fmt.Errorf("failed to parse search results: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract search results
|
// Extract search results from Startpage
|
||||||
var results []SearchResult
|
var results []SearchResult
|
||||||
doc.Find(".web-result").Each(func(i int, s *goquery.Selection) {
|
|
||||||
titleNode := s.Find(".result__a")
|
// Find all search result links in the "Web results" section
|
||||||
title := strings.TrimSpace(titleNode.Text())
|
doc.Find("a[href]").Each(func(i int, s *goquery.Selection) {
|
||||||
info := strings.TrimSpace(s.Find(".result__snippet").Text())
|
href, exists := s.Attr("href")
|
||||||
|
if !exists {
|
||||||
// Extract URL
|
return
|
||||||
var resultURL string
|
}
|
||||||
if titleNode.Length() > 0 {
|
|
||||||
if href, exists := titleNode.Attr("href"); exists {
|
// Skip internal Startpage links and Anonymous View links
|
||||||
// Validate URL format
|
if strings.Contains(href, "startpage.com") ||
|
||||||
if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") {
|
strings.Contains(href, "/av/proxy") ||
|
||||||
resultURL = href
|
strings.HasPrefix(href, "#") ||
|
||||||
|
strings.HasPrefix(href, "/") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only process HTTP/HTTPS URLs
|
||||||
|
if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
title := strings.TrimSpace(s.Text())
|
||||||
|
if title == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip very short titles (likely navigation or other non-content links)
|
||||||
|
if len(title) < 10 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find description text near the link
|
||||||
|
var description string
|
||||||
|
parent := s.Parent()
|
||||||
|
for parent.Length() > 0 {
|
||||||
|
// Look for text content in siblings or parent elements
|
||||||
|
text := strings.TrimSpace(parent.Text())
|
||||||
|
if len(text) > len(title)+20 { // Found longer text that includes description
|
||||||
|
// Extract the part that's not the title
|
||||||
|
if idx := strings.Index(text, title); idx >= 0 {
|
||||||
|
remainder := strings.TrimSpace(text[idx+len(title):])
|
||||||
|
if len(remainder) > 20 { // Good description length
|
||||||
|
description = remainder
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
parent = parent.Parent()
|
||||||
|
if parent.Length() == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if title != "" && resultURL != "" {
|
// Limit description length
|
||||||
results = append(results, SearchResult{
|
if len(description) > 200 {
|
||||||
Title: title,
|
description = description[:200] + "..."
|
||||||
URL: resultURL,
|
|
||||||
Content: info,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if we already have this URL (avoid duplicates)
|
||||||
|
for _, existing := range results {
|
||||||
|
if existing.URL == href {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results = append(results, SearchResult{
|
||||||
|
Title: title,
|
||||||
|
URL: href,
|
||||||
|
Content: description,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
return results, nil
|
return results, nil
|
||||||
|
|||||||
Reference in New Issue
Block a user