132 lines
2.9 KiB
Go
132 lines
2.9 KiB
Go
package yahoofinance
|
|
|
|
import (
|
|
"context"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/tradarr/backend/internal/scraper"
|
|
)
|
|
|
|
type YahooFinance struct {
|
|
client *http.Client
|
|
}
|
|
|
|
func New() *YahooFinance {
|
|
return &YahooFinance{
|
|
client: &http.Client{Timeout: 15 * time.Second},
|
|
}
|
|
}
|
|
|
|
func (y *YahooFinance) Name() string { return "stocktwits" } // garde le même type en DB
|
|
|
|
type rssFeed struct {
|
|
Channel struct {
|
|
Items []struct {
|
|
Title string `xml:"title"`
|
|
Link string `xml:"link"`
|
|
Description string `xml:"description"`
|
|
PubDate string `xml:"pubDate"`
|
|
GUID string `xml:"guid"`
|
|
} `xml:"item"`
|
|
} `xml:"channel"`
|
|
}
|
|
|
|
func (y *YahooFinance) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
|
var articles []scraper.Article
|
|
|
|
for i, symbol := range symbols {
|
|
if i > 0 {
|
|
select {
|
|
case <-ctx.Done():
|
|
return articles, ctx.Err()
|
|
case <-time.After(300 * time.Millisecond):
|
|
}
|
|
}
|
|
items, err := y.fetchSymbol(ctx, symbol)
|
|
if err != nil {
|
|
fmt.Printf("yahoofinance %s: %v\n", symbol, err)
|
|
continue
|
|
}
|
|
articles = append(articles, items...)
|
|
fmt.Printf("yahoofinance %s: %d articles fetched\n", symbol, len(items))
|
|
}
|
|
return articles, nil
|
|
}
|
|
|
|
func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) {
|
|
url := fmt.Sprintf(
|
|
"https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s®ion=US&lang=en-US",
|
|
symbol,
|
|
)
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
|
|
req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")
|
|
|
|
resp, err := y.client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
|
|
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
|
|
}
|
|
|
|
var feed rssFeed
|
|
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
|
|
return nil, fmt.Errorf("parse RSS: %w", err)
|
|
}
|
|
|
|
const maxPerSymbol = 5
|
|
|
|
var articles []scraper.Article
|
|
for _, item := range feed.Channel.Items {
|
|
if len(articles) >= maxPerSymbol {
|
|
break
|
|
}
|
|
title := strings.TrimSpace(item.Title)
|
|
link := strings.TrimSpace(item.Link)
|
|
if title == "" || link == "" {
|
|
continue
|
|
}
|
|
|
|
var publishedAt *time.Time
|
|
if item.PubDate != "" {
|
|
formats := []string{
|
|
time.RFC1123Z,
|
|
time.RFC1123,
|
|
"Mon, 02 Jan 2006 15:04:05 -0700",
|
|
}
|
|
for _, f := range formats {
|
|
if t, err := time.Parse(f, item.PubDate); err == nil {
|
|
publishedAt = &t
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
content := strings.TrimSpace(item.Description)
|
|
if content == "" {
|
|
content = title
|
|
}
|
|
|
|
articles = append(articles, scraper.Article{
|
|
Title: title,
|
|
Content: content,
|
|
URL: link,
|
|
PublishedAt: publishedAt,
|
|
Symbols: []string{symbol},
|
|
})
|
|
}
|
|
return articles, nil
|
|
}
|