package yahoofinance import ( "context" "encoding/xml" "fmt" "io" "net/http" "strings" "time" "github.com/tradarr/backend/internal/scraper" ) type YahooFinance struct { client *http.Client } func New() *YahooFinance { return &YahooFinance{ client: &http.Client{Timeout: 15 * time.Second}, } } func (y *YahooFinance) Name() string { return "stocktwits" } // garde le même type en DB type rssFeed struct { Channel struct { Items []struct { Title string `xml:"title"` Link string `xml:"link"` Description string `xml:"description"` PubDate string `xml:"pubDate"` GUID string `xml:"guid"` } `xml:"item"` } `xml:"channel"` } func (y *YahooFinance) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { var articles []scraper.Article for i, symbol := range symbols { if i > 0 { select { case <-ctx.Done(): return articles, ctx.Err() case <-time.After(300 * time.Millisecond): } } items, err := y.fetchSymbol(ctx, symbol) if err != nil { fmt.Printf("yahoofinance %s: %v\n", symbol, err) continue } articles = append(articles, items...) fmt.Printf("yahoofinance %s: %d articles fetched\n", symbol, len(items)) } return articles, nil } func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) { url := fmt.Sprintf( "https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s®ion=US&lang=en-US", symbol, ) req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)") req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml") resp, err := y.client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) } var feed rssFeed if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil { return nil, fmt.Errorf("parse RSS: %w", err) } var articles []scraper.Article for _, item := range feed.Channel.Items { title := strings.TrimSpace(item.Title) link := strings.TrimSpace(item.Link) if title == "" || link == "" { continue } var publishedAt *time.Time if item.PubDate != "" { formats := []string{ time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700", } for _, f := range formats { if t, err := time.Parse(f, item.PubDate); err == nil { publishedAt = &t break } } } content := strings.TrimSpace(item.Description) if content == "" { content = title } articles = append(articles, scraper.Article{ Title: title, Content: content, URL: link, PublishedAt: publishedAt, Symbols: []string{symbol}, }) } return articles, nil }