feat: add frontend + backend + database to retrieve and compute news from Yahoo
This commit is contained in:
126
backend/internal/scraper/yahoofinance/yahoofinance.go
Normal file
126
backend/internal/scraper/yahoofinance/yahoofinance.go
Normal file
@ -0,0 +1,126 @@
|
||||
package yahoofinance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/scraper"
|
||||
)
|
||||
|
||||
type YahooFinance struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func New() *YahooFinance {
|
||||
return &YahooFinance{
|
||||
client: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
func (y *YahooFinance) Name() string { return "stocktwits" } // garde le même type en DB
|
||||
|
||||
type rssFeed struct {
|
||||
Channel struct {
|
||||
Items []struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
GUID string `xml:"guid"`
|
||||
} `xml:"item"`
|
||||
} `xml:"channel"`
|
||||
}
|
||||
|
||||
func (y *YahooFinance) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||
var articles []scraper.Article
|
||||
|
||||
for i, symbol := range symbols {
|
||||
if i > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return articles, ctx.Err()
|
||||
case <-time.After(300 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
items, err := y.fetchSymbol(ctx, symbol)
|
||||
if err != nil {
|
||||
fmt.Printf("yahoofinance %s: %v\n", symbol, err)
|
||||
continue
|
||||
}
|
||||
articles = append(articles, items...)
|
||||
fmt.Printf("yahoofinance %s: %d articles fetched\n", symbol, len(items))
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) {
|
||||
url := fmt.Sprintf(
|
||||
"https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s®ion=US&lang=en-US",
|
||||
symbol,
|
||||
)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
|
||||
req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")
|
||||
|
||||
resp, err := y.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
|
||||
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var feed rssFeed
|
||||
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
|
||||
return nil, fmt.Errorf("parse RSS: %w", err)
|
||||
}
|
||||
|
||||
var articles []scraper.Article
|
||||
for _, item := range feed.Channel.Items {
|
||||
title := strings.TrimSpace(item.Title)
|
||||
link := strings.TrimSpace(item.Link)
|
||||
if title == "" || link == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
var publishedAt *time.Time
|
||||
if item.PubDate != "" {
|
||||
formats := []string{
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
}
|
||||
for _, f := range formats {
|
||||
if t, err := time.Parse(f, item.PubDate); err == nil {
|
||||
publishedAt = &t
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
content := strings.TrimSpace(item.Description)
|
||||
if content == "" {
|
||||
content = title
|
||||
}
|
||||
|
||||
articles = append(articles, scraper.Article{
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: link,
|
||||
PublishedAt: publishedAt,
|
||||
Symbols: []string{symbol},
|
||||
})
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
Reference in New Issue
Block a user