package bloomberg import ( "context" "fmt" "strings" "time" "github.com/chromedp/chromedp" "github.com/tradarr/backend/internal/scraper" ) type Bloomberg struct { username string password string chromePath string } func New(username, password, chromePath string) *Bloomberg { return &Bloomberg{username: username, password: password, chromePath: chromePath} } func (b *Bloomberg) Name() string { return "bloomberg" } func (b *Bloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { if b.username == "" || b.password == "" { return nil, fmt.Errorf("bloomberg credentials not configured") } opts := []chromedp.ExecAllocatorOption{ chromedp.NoFirstRun, chromedp.NoDefaultBrowserCheck, chromedp.Headless, chromedp.DisableGPU, chromedp.Flag("no-sandbox", true), chromedp.Flag("disable-setuid-sandbox", true), chromedp.Flag("disable-dev-shm-usage", true), chromedp.Flag("disable-blink-features", "AutomationControlled"), chromedp.Flag("disable-infobars", true), chromedp.Flag("window-size", "1920,1080"), chromedp.Flag("ignore-certificate-errors", true), chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"), } if b.chromePath != "" { opts = append(opts, chromedp.ExecPath(b.chromePath)) } allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...) defer cancelAlloc() chromeCtx, cancelChrome := chromedp.NewContext(allocCtx) defer cancelChrome() timeoutCtx, cancelTimeout := context.WithTimeout(chromeCtx, 5*time.Minute) defer cancelTimeout() if err := b.login(timeoutCtx); err != nil { return nil, fmt.Errorf("bloomberg login: %w", err) } var articles []scraper.Article pages := []string{ "https://www.bloomberg.com/markets", "https://www.bloomberg.com/technology", "https://www.bloomberg.com/economics", } for _, u := range pages { pageArticles, err := b.scrapePage(timeoutCtx, u, symbols) if err != nil { fmt.Printf("bloomberg scrape %s: %v\n", u, err) continue } articles = append(articles, pageArticles...) } fmt.Printf("bloomberg: %d articles fetched total\n", len(articles)) return articles, nil } func (b *Bloomberg) login(ctx context.Context) error { loginCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() // Masquer la détection d'automation via JS if err := chromedp.Run(loginCtx, chromedp.ActionFunc(func(ctx context.Context) error { return chromedp.Evaluate(` Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); window.chrome = { runtime: {} }; `, nil).Do(ctx) }), ); err != nil { fmt.Printf("bloomberg: could not inject stealth JS: %v\n", err) } err := chromedp.Run(loginCtx, chromedp.Navigate("https://www.bloomberg.com/account/signin"), chromedp.Sleep(2*time.Second), // Essayer plusieurs sélecteurs pour l'email chromedp.ActionFunc(func(ctx context.Context) error { selectors := []string{ `input[name="email"]`, `input[type="email"]`, `input[data-type="email"]`, `input[placeholder*="email" i]`, `input[placeholder*="mail" i]`, } for _, sel := range selectors { var count int if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { fmt.Printf("bloomberg: using email selector: %s\n", sel) return chromedp.SendKeys(sel, b.username, chromedp.ByQuery).Do(ctx) } } return fmt.Errorf("could not find email input — Bloomberg login page structure may have changed") }), chromedp.Sleep(500*time.Millisecond), // Submit email chromedp.ActionFunc(func(ctx context.Context) error { selectors := []string{`button[type="submit"]`, `input[type="submit"]`, `button[data-testid*="submit"]`} for _, sel := range selectors { var count int if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { return chromedp.Click(sel, chromedp.ByQuery).Do(ctx) } } // Fallback: press Enter return chromedp.KeyEvent("\r").Do(ctx) }), chromedp.Sleep(2*time.Second), // Password chromedp.ActionFunc(func(ctx context.Context) error { selectors := []string{`input[type="password"]`, `input[name="password"]`} for _, sel := range selectors { var count int if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { fmt.Printf("bloomberg: using password selector: %s\n", sel) return chromedp.SendKeys(sel, b.password, chromedp.ByQuery).Do(ctx) } } return fmt.Errorf("could not find password input") }), chromedp.Sleep(500*time.Millisecond), chromedp.ActionFunc(func(ctx context.Context) error { selectors := []string{`button[type="submit"]`, `input[type="submit"]`} for _, sel := range selectors { var count int if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { return chromedp.Click(sel, chromedp.ByQuery).Do(ctx) } } return chromedp.KeyEvent("\r").Do(ctx) }), chromedp.Sleep(3*time.Second), ) return err } func (b *Bloomberg) scrapePage(ctx context.Context, pageURL string, symbols []string) ([]scraper.Article, error) { pageCtx, cancel := context.WithTimeout(ctx, 60*time.Second) defer cancel() var articleNodes []map[string]string err := chromedp.Run(pageCtx, chromedp.Navigate(pageURL), chromedp.Sleep(3*time.Second), chromedp.Evaluate(` (function() { var items = []; var seen = new Set(); var links = document.querySelectorAll('a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]'); links.forEach(function(a) { if (seen.has(a.href)) return; seen.add(a.href); var title = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]'); var text = title ? title.innerText.trim() : a.innerText.trim(); if (text.length > 20 && a.href.includes('bloomberg.com')) { items.push({title: text, url: a.href}); } }); return items.slice(0, 25); })() `, &articleNodes), ) if err != nil { return nil, fmt.Errorf("navigate %s: %w", pageURL, err) } var articles []scraper.Article now := time.Now() for _, node := range articleNodes { title := strings.TrimSpace(node["title"]) url := node["url"] if title == "" || url == "" || !strings.Contains(url, "bloomberg.com") { continue } syms := scraper.DetectSymbols(title, symbols) articles = append(articles, scraper.Article{ Title: title, Content: title, // contenu minimal — l'article complet nécessite un accès payant URL: url, PublishedAt: &now, Symbols: syms, }) } return articles, nil }