feat: add frontend + backend + database to retrieve and compute news from Yahoo
This commit is contained in:
206
backend/internal/scraper/bloomberg/bloomberg.go
Normal file
206
backend/internal/scraper/bloomberg/bloomberg.go
Normal file
@ -0,0 +1,206 @@
|
||||
package bloomberg
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/chromedp/chromedp"
|
||||
"github.com/tradarr/backend/internal/scraper"
|
||||
)
|
||||
|
||||
type Bloomberg struct {
|
||||
username string
|
||||
password string
|
||||
chromePath string
|
||||
}
|
||||
|
||||
func New(username, password, chromePath string) *Bloomberg {
|
||||
return &Bloomberg{username: username, password: password, chromePath: chromePath}
|
||||
}
|
||||
|
||||
func (b *Bloomberg) Name() string { return "bloomberg" }
|
||||
|
||||
func (b *Bloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||
if b.username == "" || b.password == "" {
|
||||
return nil, fmt.Errorf("bloomberg credentials not configured")
|
||||
}
|
||||
|
||||
opts := []chromedp.ExecAllocatorOption{
|
||||
chromedp.NoFirstRun,
|
||||
chromedp.NoDefaultBrowserCheck,
|
||||
chromedp.Headless,
|
||||
chromedp.DisableGPU,
|
||||
chromedp.Flag("no-sandbox", true),
|
||||
chromedp.Flag("disable-setuid-sandbox", true),
|
||||
chromedp.Flag("disable-dev-shm-usage", true),
|
||||
chromedp.Flag("disable-blink-features", "AutomationControlled"),
|
||||
chromedp.Flag("disable-infobars", true),
|
||||
chromedp.Flag("window-size", "1920,1080"),
|
||||
chromedp.Flag("ignore-certificate-errors", true),
|
||||
chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"),
|
||||
}
|
||||
if b.chromePath != "" {
|
||||
opts = append(opts, chromedp.ExecPath(b.chromePath))
|
||||
}
|
||||
|
||||
allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...)
|
||||
defer cancelAlloc()
|
||||
|
||||
chromeCtx, cancelChrome := chromedp.NewContext(allocCtx)
|
||||
defer cancelChrome()
|
||||
|
||||
timeoutCtx, cancelTimeout := context.WithTimeout(chromeCtx, 5*time.Minute)
|
||||
defer cancelTimeout()
|
||||
|
||||
if err := b.login(timeoutCtx); err != nil {
|
||||
return nil, fmt.Errorf("bloomberg login: %w", err)
|
||||
}
|
||||
|
||||
var articles []scraper.Article
|
||||
pages := []string{
|
||||
"https://www.bloomberg.com/markets",
|
||||
"https://www.bloomberg.com/technology",
|
||||
"https://www.bloomberg.com/economics",
|
||||
}
|
||||
for _, u := range pages {
|
||||
pageArticles, err := b.scrapePage(timeoutCtx, u, symbols)
|
||||
if err != nil {
|
||||
fmt.Printf("bloomberg scrape %s: %v\n", u, err)
|
||||
continue
|
||||
}
|
||||
articles = append(articles, pageArticles...)
|
||||
}
|
||||
fmt.Printf("bloomberg: %d articles fetched total\n", len(articles))
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func (b *Bloomberg) login(ctx context.Context) error {
|
||||
loginCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
// Masquer la détection d'automation via JS
|
||||
if err := chromedp.Run(loginCtx,
|
||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
return chromedp.Evaluate(`
|
||||
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
||||
window.chrome = { runtime: {} };
|
||||
`, nil).Do(ctx)
|
||||
}),
|
||||
); err != nil {
|
||||
fmt.Printf("bloomberg: could not inject stealth JS: %v\n", err)
|
||||
}
|
||||
|
||||
err := chromedp.Run(loginCtx,
|
||||
chromedp.Navigate("https://www.bloomberg.com/account/signin"),
|
||||
chromedp.Sleep(2*time.Second),
|
||||
// Essayer plusieurs sélecteurs pour l'email
|
||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
selectors := []string{
|
||||
`input[name="email"]`,
|
||||
`input[type="email"]`,
|
||||
`input[data-type="email"]`,
|
||||
`input[placeholder*="email" i]`,
|
||||
`input[placeholder*="mail" i]`,
|
||||
}
|
||||
for _, sel := range selectors {
|
||||
var count int
|
||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
||||
fmt.Printf("bloomberg: using email selector: %s\n", sel)
|
||||
return chromedp.SendKeys(sel, b.username, chromedp.ByQuery).Do(ctx)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("could not find email input — Bloomberg login page structure may have changed")
|
||||
}),
|
||||
chromedp.Sleep(500*time.Millisecond),
|
||||
// Submit email
|
||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
selectors := []string{`button[type="submit"]`, `input[type="submit"]`, `button[data-testid*="submit"]`}
|
||||
for _, sel := range selectors {
|
||||
var count int
|
||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
||||
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
|
||||
}
|
||||
}
|
||||
// Fallback: press Enter
|
||||
return chromedp.KeyEvent("\r").Do(ctx)
|
||||
}),
|
||||
chromedp.Sleep(2*time.Second),
|
||||
// Password
|
||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
selectors := []string{`input[type="password"]`, `input[name="password"]`}
|
||||
for _, sel := range selectors {
|
||||
var count int
|
||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
||||
fmt.Printf("bloomberg: using password selector: %s\n", sel)
|
||||
return chromedp.SendKeys(sel, b.password, chromedp.ByQuery).Do(ctx)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("could not find password input")
|
||||
}),
|
||||
chromedp.Sleep(500*time.Millisecond),
|
||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
selectors := []string{`button[type="submit"]`, `input[type="submit"]`}
|
||||
for _, sel := range selectors {
|
||||
var count int
|
||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
||||
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
|
||||
}
|
||||
}
|
||||
return chromedp.KeyEvent("\r").Do(ctx)
|
||||
}),
|
||||
chromedp.Sleep(3*time.Second),
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *Bloomberg) scrapePage(ctx context.Context, pageURL string, symbols []string) ([]scraper.Article, error) {
|
||||
pageCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var articleNodes []map[string]string
|
||||
err := chromedp.Run(pageCtx,
|
||||
chromedp.Navigate(pageURL),
|
||||
chromedp.Sleep(3*time.Second),
|
||||
chromedp.Evaluate(`
|
||||
(function() {
|
||||
var items = [];
|
||||
var seen = new Set();
|
||||
var links = document.querySelectorAll('a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]');
|
||||
links.forEach(function(a) {
|
||||
if (seen.has(a.href)) return;
|
||||
seen.add(a.href);
|
||||
var title = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]');
|
||||
var text = title ? title.innerText.trim() : a.innerText.trim();
|
||||
if (text.length > 20 && a.href.includes('bloomberg.com')) {
|
||||
items.push({title: text, url: a.href});
|
||||
}
|
||||
});
|
||||
return items.slice(0, 25);
|
||||
})()
|
||||
`, &articleNodes),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("navigate %s: %w", pageURL, err)
|
||||
}
|
||||
|
||||
var articles []scraper.Article
|
||||
now := time.Now()
|
||||
for _, node := range articleNodes {
|
||||
title := strings.TrimSpace(node["title"])
|
||||
url := node["url"]
|
||||
if title == "" || url == "" || !strings.Contains(url, "bloomberg.com") {
|
||||
continue
|
||||
}
|
||||
syms := scraper.DetectSymbols(title, symbols)
|
||||
articles = append(articles, scraper.Article{
|
||||
Title: title,
|
||||
Content: title, // contenu minimal — l'article complet nécessite un accès payant
|
||||
URL: url,
|
||||
PublishedAt: &now,
|
||||
Symbols: syms,
|
||||
})
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
50
backend/internal/scraper/bloomberg/dynamic.go
Normal file
50
backend/internal/scraper/bloomberg/dynamic.go
Normal file
@ -0,0 +1,50 @@
|
||||
package bloomberg
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/tradarr/backend/internal/crypto"
|
||||
"github.com/tradarr/backend/internal/models"
|
||||
"github.com/tradarr/backend/internal/scraper"
|
||||
)
|
||||
|
||||
// DynamicBloomberg charge les credentials depuis la DB avant chaque scraping
|
||||
type DynamicBloomberg struct {
|
||||
repo *models.Repository
|
||||
enc *crypto.Encryptor
|
||||
chromePath string
|
||||
}
|
||||
|
||||
func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, chromePath string) *DynamicBloomberg {
|
||||
return &DynamicBloomberg{repo: repo, enc: enc, chromePath: chromePath}
|
||||
}
|
||||
|
||||
func (d *DynamicBloomberg) Name() string { return "bloomberg" }
|
||||
|
||||
func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||
// Récupérer la source Bloomberg
|
||||
source, err := d.repo.GetSourceByType("bloomberg")
|
||||
if err != nil || source == nil {
|
||||
return nil, fmt.Errorf("bloomberg source not found")
|
||||
}
|
||||
|
||||
cred, err := d.repo.GetCredentials(source.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get bloomberg credentials: %w", err)
|
||||
}
|
||||
if cred == nil || cred.Username == "" {
|
||||
return nil, fmt.Errorf("bloomberg credentials not configured — please set them in the admin panel")
|
||||
}
|
||||
|
||||
password := ""
|
||||
if cred.PasswordEncrypted != "" {
|
||||
password, err = d.enc.Decrypt(cred.PasswordEncrypted)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decrypt bloomberg password: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
b := New(cred.Username, password, d.chromePath)
|
||||
return b.Scrape(ctx, symbols)
|
||||
}
|
||||
106
backend/internal/scraper/registry.go
Normal file
106
backend/internal/scraper/registry.go
Normal file
@ -0,0 +1,106 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/models"
|
||||
)
|
||||
|
||||
type Registry struct {
|
||||
scrapers map[string]Scraper
|
||||
repo *models.Repository
|
||||
}
|
||||
|
||||
func NewRegistry(repo *models.Repository) *Registry {
|
||||
return &Registry{
|
||||
scrapers: map[string]Scraper{},
|
||||
repo: repo,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Registry) Register(s Scraper) {
|
||||
r.scrapers[s.Name()] = s
|
||||
}
|
||||
|
||||
// Run exécute le scraper associé à sourceID et persiste les articles
|
||||
func (r *Registry) Run(sourceID string) error {
|
||||
sources, err := r.repo.ListSources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var source *models.Source
|
||||
for i := range sources {
|
||||
if sources[i].ID == sourceID {
|
||||
source = &sources[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if source == nil {
|
||||
return fmt.Errorf("source %s not found", sourceID)
|
||||
}
|
||||
|
||||
scrpr, ok := r.scrapers[source.Type]
|
||||
if !ok {
|
||||
return fmt.Errorf("no scraper for type %s", source.Type)
|
||||
}
|
||||
|
||||
// Créer le job
|
||||
job, err := r.repo.CreateScrapeJob(sourceID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := r.repo.UpdateScrapeJob(job.ID, "running", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Récupérer les symboles surveillés
|
||||
symbols, err := r.repo.GetAllWatchedSymbols()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
articles, scrapeErr := scrpr.Scrape(ctx, symbols)
|
||||
if scrapeErr != nil {
|
||||
_ = r.repo.UpdateScrapeJob(job.ID, "error", 0, scrapeErr.Error())
|
||||
return scrapeErr
|
||||
}
|
||||
|
||||
// Persister les articles
|
||||
count := 0
|
||||
for _, a := range articles {
|
||||
saved, err := r.repo.UpsertArticle(sourceID, a.Title, a.Content, a.URL, a.PublishedAt)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
for _, sym := range a.Symbols {
|
||||
_ = r.repo.AddArticleSymbol(saved.ID, sym)
|
||||
}
|
||||
}
|
||||
|
||||
return r.repo.UpdateScrapeJob(job.ID, "done", count, "")
|
||||
}
|
||||
|
||||
// RunAll exécute tous les scrapers activés
|
||||
func (r *Registry) RunAll() error {
|
||||
sources, err := r.repo.ListSources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, src := range sources {
|
||||
if !src.Enabled {
|
||||
continue
|
||||
}
|
||||
if err := r.Run(src.ID); err != nil {
|
||||
fmt.Printf("scraper %s error: %v\n", src.Name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
75
backend/internal/scraper/scraper.go
Normal file
75
backend/internal/scraper/scraper.go
Normal file
@ -0,0 +1,75 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/models"
|
||||
)
|
||||
|
||||
type Article struct {
|
||||
Title string
|
||||
Content string
|
||||
URL string
|
||||
PublishedAt *time.Time
|
||||
Symbols []string
|
||||
}
|
||||
|
||||
type Scraper interface {
|
||||
Name() string
|
||||
Scrape(ctx context.Context, symbols []string) ([]Article, error)
|
||||
}
|
||||
|
||||
// detectSymbols extrait les symboles mentionnés dans un texte
|
||||
func DetectSymbols(text string, watchlist []string) []string {
|
||||
found := map[string]bool{}
|
||||
for _, s := range watchlist {
|
||||
// Recherche du symbole en majuscules dans le texte
|
||||
if containsWord(text, s) {
|
||||
found[s] = true
|
||||
}
|
||||
}
|
||||
result := make([]string, 0, len(found))
|
||||
for s := range found {
|
||||
result = append(result, s)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func containsWord(text, word string) bool {
|
||||
upper := []byte(text)
|
||||
w := []byte(word)
|
||||
for i := 0; i <= len(upper)-len(w); i++ {
|
||||
match := true
|
||||
for j := range w {
|
||||
c := upper[i+j]
|
||||
if c >= 'a' && c <= 'z' {
|
||||
c -= 32
|
||||
}
|
||||
if c != w[j] {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if match {
|
||||
// Vérifier que c'est un mot entier
|
||||
before := i == 0 || !isAlphaNum(upper[i-1])
|
||||
after := i+len(w) >= len(upper) || !isAlphaNum(upper[i+len(w)])
|
||||
if before && after {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isAlphaNum(b byte) bool {
|
||||
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9')
|
||||
}
|
||||
|
||||
// ScraperResult est le résultat d'un job de scraping
|
||||
type ScraperResult struct {
|
||||
Source *models.Source
|
||||
Articles []Article
|
||||
Err error
|
||||
}
|
||||
128
backend/internal/scraper/stocktwits/stocktwits.go
Normal file
128
backend/internal/scraper/stocktwits/stocktwits.go
Normal file
@ -0,0 +1,128 @@
|
||||
package stocktwits
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/scraper"
|
||||
)
|
||||
|
||||
const apiBase = "https://api.stocktwits.com/api/2"
|
||||
|
||||
type StockTwits struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func New() *StockTwits {
|
||||
return &StockTwits{
|
||||
client: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *StockTwits) Name() string { return "stocktwits" }
|
||||
|
||||
type apiResponse struct {
|
||||
Response struct {
|
||||
Status int `json:"status"`
|
||||
Error string `json:"error,omitempty"`
|
||||
} `json:"response"`
|
||||
Messages []struct {
|
||||
ID int `json:"id"`
|
||||
Body string `json:"body"`
|
||||
CreatedAt string `json:"created_at"`
|
||||
User struct {
|
||||
Username string `json:"username"`
|
||||
} `json:"user"`
|
||||
Entities struct {
|
||||
Sentiment *struct {
|
||||
Basic string `json:"basic"`
|
||||
} `json:"sentiment"`
|
||||
} `json:"entities"`
|
||||
} `json:"messages"`
|
||||
}
|
||||
|
||||
func (s *StockTwits) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||
var articles []scraper.Article
|
||||
for i, symbol := range symbols {
|
||||
// Délai entre les requêtes pour éviter le rate limiting
|
||||
if i > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return articles, ctx.Err()
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
msgs, err := s.fetchSymbol(ctx, symbol)
|
||||
if err != nil {
|
||||
fmt.Printf("stocktwits %s: %v\n", symbol, err)
|
||||
continue
|
||||
}
|
||||
articles = append(articles, msgs...)
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func (s *StockTwits) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) {
|
||||
url := fmt.Sprintf("%s/streams/symbol/%s.json", apiBase, symbol)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36")
|
||||
|
||||
resp, err := s.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode == 429 {
|
||||
return nil, fmt.Errorf("rate limited by StockTwits for %s", symbol)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("StockTwits returned HTTP %d for %s: %s", resp.StatusCode, symbol, string(body))
|
||||
}
|
||||
|
||||
var data apiResponse
|
||||
if err := json.Unmarshal(body, &data); err != nil {
|
||||
return nil, fmt.Errorf("parse response for %s: %w", symbol, err)
|
||||
}
|
||||
|
||||
// L'API StockTwits retourne un status dans le body même en HTTP 200
|
||||
if data.Response.Status != 0 && data.Response.Status != 200 {
|
||||
return nil, fmt.Errorf("StockTwits API error %d for %s: %s", data.Response.Status, symbol, data.Response.Error)
|
||||
}
|
||||
|
||||
var articles []scraper.Article
|
||||
for _, msg := range data.Messages {
|
||||
if msg.Body == "" {
|
||||
continue
|
||||
}
|
||||
sentiment := ""
|
||||
if msg.Entities.Sentiment != nil {
|
||||
sentiment = " [" + msg.Entities.Sentiment.Basic + "]"
|
||||
}
|
||||
title := fmt.Sprintf("$%s — @%s%s", symbol, msg.User.Username, sentiment)
|
||||
publishedAt, _ := time.Parse(time.RFC3339, msg.CreatedAt)
|
||||
msgURL := fmt.Sprintf("https://stocktwits.com/%s/message/%d", msg.User.Username, msg.ID)
|
||||
|
||||
articles = append(articles, scraper.Article{
|
||||
Title: title,
|
||||
Content: msg.Body,
|
||||
URL: msgURL,
|
||||
PublishedAt: &publishedAt,
|
||||
Symbols: []string{symbol},
|
||||
})
|
||||
}
|
||||
fmt.Printf("stocktwits %s: %d messages fetched\n", symbol, len(articles))
|
||||
return articles, nil
|
||||
}
|
||||
126
backend/internal/scraper/yahoofinance/yahoofinance.go
Normal file
126
backend/internal/scraper/yahoofinance/yahoofinance.go
Normal file
@ -0,0 +1,126 @@
|
||||
package yahoofinance
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/scraper"
|
||||
)
|
||||
|
||||
type YahooFinance struct {
|
||||
client *http.Client
|
||||
}
|
||||
|
||||
func New() *YahooFinance {
|
||||
return &YahooFinance{
|
||||
client: &http.Client{Timeout: 15 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
func (y *YahooFinance) Name() string { return "stocktwits" } // garde le même type en DB
|
||||
|
||||
type rssFeed struct {
|
||||
Channel struct {
|
||||
Items []struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
GUID string `xml:"guid"`
|
||||
} `xml:"item"`
|
||||
} `xml:"channel"`
|
||||
}
|
||||
|
||||
func (y *YahooFinance) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||
var articles []scraper.Article
|
||||
|
||||
for i, symbol := range symbols {
|
||||
if i > 0 {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return articles, ctx.Err()
|
||||
case <-time.After(300 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
items, err := y.fetchSymbol(ctx, symbol)
|
||||
if err != nil {
|
||||
fmt.Printf("yahoofinance %s: %v\n", symbol, err)
|
||||
continue
|
||||
}
|
||||
articles = append(articles, items...)
|
||||
fmt.Printf("yahoofinance %s: %d articles fetched\n", symbol, len(items))
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
|
||||
func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scraper.Article, error) {
|
||||
url := fmt.Sprintf(
|
||||
"https://feeds.finance.yahoo.com/rss/2.0/headline?s=%s®ion=US&lang=en-US",
|
||||
symbol,
|
||||
)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
|
||||
req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")
|
||||
|
||||
resp, err := y.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
|
||||
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
var feed rssFeed
|
||||
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
|
||||
return nil, fmt.Errorf("parse RSS: %w", err)
|
||||
}
|
||||
|
||||
var articles []scraper.Article
|
||||
for _, item := range feed.Channel.Items {
|
||||
title := strings.TrimSpace(item.Title)
|
||||
link := strings.TrimSpace(item.Link)
|
||||
if title == "" || link == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
var publishedAt *time.Time
|
||||
if item.PubDate != "" {
|
||||
formats := []string{
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
}
|
||||
for _, f := range formats {
|
||||
if t, err := time.Parse(f, item.PubDate); err == nil {
|
||||
publishedAt = &t
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
content := strings.TrimSpace(item.Description)
|
||||
if content == "" {
|
||||
content = title
|
||||
}
|
||||
|
||||
articles = append(articles, scraper.Article{
|
||||
Title: title,
|
||||
Content: content,
|
||||
URL: link,
|
||||
PublishedAt: publishedAt,
|
||||
Symbols: []string{symbol},
|
||||
})
|
||||
}
|
||||
return articles, nil
|
||||
}
|
||||
Reference in New Issue
Block a user