Tradarr/backend/internal/scraper/yahoofinance/yahoofinance.go

package yahoofinance

import (
	"context"
	"encoding/xml"
	"fmt"
	"io"
	"net/http"
	"strings"
	"time"

	"github.com/tradarr/backend/internal/scraper"
)

type YahooFinance struct {
	client *http.Client
}

func New() *YahooFinance {
	return &YahooFinance{
		client: &http.Client{Timeout: 15 * time.Second},
	}
}

func (y *YahooFinance) Name() string { return "stocktwits" } // garde le même type en DB

type rssFeed struct {
	Channel struct {
		Items []struct {
			Title       string `xml:"title"`
			Link        string `xml:"link"`
			Description string `xml:"description"`
			PubDate     string `xml:"pubDate"`
			GUID        string `xml:"guid"`
		} `xml:"item"`
	} `xml:"channel"`
}

func (y *YahooFinance) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
	var articles []scraper.Article

	for i, symbol := range symbols {
		if i > 0 {
			select {
			case <-ctx.Done():
				return articles, ctx.Err()
			case <-time.After(300 * time.Millisecond):
			}
		}
		items, err := y.fetchSymbol(ctx, symbol)
		if err != nil {
			fmt.Printf("yahoofinance %s: %v\n", symbol, err)
			continue
		}
		articles = append(articles, items...)
		fmt.Printf("yahoofinance %s: %d articles fetched\n", symbol, len(items))
	}
	return articles, nil
}

func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) {
	url := fmt.Sprintf(
		"https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s&region=US&lang=en-US",
		symbol,
	)
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
	if err != nil {
		return nil, err
	}
	req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
	req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")

	resp, err := y.client.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
		return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
	}

	var feed rssFeed
	if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
		return nil, fmt.Errorf("parse RSS: %w", err)
	}

	const maxPerSymbol = 5

	var articles []scraper.Article
	for _, item := range feed.Channel.Items {
		if len(articles) >= maxPerSymbol {
			break
		}
		title := strings.TrimSpace(item.Title)
		link := strings.TrimSpace(item.Link)
		if title == "" || link == "" {
			continue
		}

		var publishedAt *time.Time
		if item.PubDate != "" {
			formats := []string{
				time.RFC1123Z,
				time.RFC1123,
				"Mon, 02 Jan 2006 15:04:05 -0700",
			}
			for _, f := range formats {
				if t, err := time.Parse(f, item.PubDate); err == nil {
					publishedAt = &t
					break
				}
			}
		}

		content := strings.TrimSpace(item.Description)
		if content == "" {
			content = title
		}

		articles = append(articles, scraper.Article{
			Title:       title,
			Content:     content,
			URL:         link,
			PublishedAt: publishedAt,
			Symbols:     []string{symbol},
		})
	}
	return articles, nil
}