76 lines
1.5 KiB
Go
76 lines
1.5 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/tradarr/backend/internal/models"
|
|
)
|
|
|
|
type Article struct {
|
|
Title string
|
|
Content string
|
|
URL string
|
|
PublishedAt *time.Time
|
|
Symbols []string
|
|
}
|
|
|
|
type Scraper interface {
|
|
Name() string
|
|
Scrape(ctx context.Context, symbols []string) ([]Article, error)
|
|
}
|
|
|
|
// detectSymbols extrait les symboles mentionnés dans un texte
|
|
func DetectSymbols(text string, watchlist []string) []string {
|
|
found := map[string]bool{}
|
|
for _, s := range watchlist {
|
|
// Recherche du symbole en majuscules dans le texte
|
|
if containsWord(text, s) {
|
|
found[s] = true
|
|
}
|
|
}
|
|
result := make([]string, 0, len(found))
|
|
for s := range found {
|
|
result = append(result, s)
|
|
}
|
|
return result
|
|
}
|
|
|
|
func containsWord(text, word string) bool {
|
|
upper := []byte(text)
|
|
w := []byte(word)
|
|
for i := 0; i <= len(upper)-len(w); i++ {
|
|
match := true
|
|
for j := range w {
|
|
c := upper[i+j]
|
|
if c >= 'a' && c <= 'z' {
|
|
c -= 32
|
|
}
|
|
if c != w[j] {
|
|
match = false
|
|
break
|
|
}
|
|
}
|
|
if match {
|
|
// Vérifier que c'est un mot entier
|
|
before := i == 0 || !isAlphaNum(upper[i-1])
|
|
after := i+len(w) >= len(upper) || !isAlphaNum(upper[i+len(w)])
|
|
if before && after {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isAlphaNum(b byte) bool {
|
|
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
|
|
}
|
|
|
|
// ScraperResult est le résultat d'un job de scraping
|
|
type ScraperResult struct {
|
|
Source *models.Source
|
|
Articles []Article
|
|
Err error
|
|
}
|