feat: add sources to retrieve news and divide the IA reflexions in 2 steps to limit the number of news

This commit is contained in:
2026-04-19 10:43:15 +02:00
parent 93668273ff
commit eb1fb5ca78
28 changed files with 1086 additions and 249 deletions

View File

@ -15,6 +15,8 @@ import (
"github.com/tradarr/backend/internal/scheduler" "github.com/tradarr/backend/internal/scheduler"
"github.com/tradarr/backend/internal/scraper" "github.com/tradarr/backend/internal/scraper"
"github.com/tradarr/backend/internal/scraper/bloomberg" "github.com/tradarr/backend/internal/scraper/bloomberg"
"github.com/tradarr/backend/internal/scraper/reuters"
"github.com/tradarr/backend/internal/scraper/watcherguru"
"github.com/tradarr/backend/internal/scraper/yahoofinance" "github.com/tradarr/backend/internal/scraper/yahoofinance"
) )
@ -38,30 +40,23 @@ func main() {
enc := crypto.New(cfg.EncryptionKey) enc := crypto.New(cfg.EncryptionKey)
pipeline := ai.NewPipeline(repo, enc) pipeline := ai.NewPipeline(repo, enc)
// Créer le compte admin initial si nécessaire
if err := ensureAdmin(repo, cfg); err != nil { if err := ensureAdmin(repo, cfg); err != nil {
log.Printf("ensure admin: %v", err) log.Printf("ensure admin: %v", err)
} }
// Configurer les scrapers
registry := scraper.NewRegistry(repo) registry := scraper.NewRegistry(repo)
registry.Register(bloomberg.NewDynamic(repo, enc, cfg.ScraperURL))
registry.Register(yahoofinance.New())
registry.Register(reuters.New())
registry.Register(watcherguru.New())
// Bloomberg (credentials chargés depuis la DB à chaque run)
bbScraper := bloomberg.NewDynamic(repo, enc, cfg.ChromePath)
registry.Register(bbScraper)
stScraper := yahoofinance.New()
registry.Register(stScraper)
// Scheduler
sched := scheduler.New(registry, pipeline, repo) sched := scheduler.New(registry, pipeline, repo)
if err := sched.Start(); err != nil { if err := sched.Start(); err != nil {
log.Printf("scheduler: %v", err) log.Printf("scheduler: %v", err)
} }
defer sched.Stop() defer sched.Stop()
// API h := handlers.New(repo, cfg, enc, registry, pipeline, sched)
h := handlers.New(repo, cfg, enc, registry, pipeline)
r := api.SetupRouter(h, cfg.JWTSecret) r := api.SetupRouter(h, cfg.JWTSecret)
addr := fmt.Sprintf(":%s", cfg.Port) addr := fmt.Sprintf(":%s", cfg.Port)

View File

@ -3,6 +3,7 @@ package ai
import ( import (
"context" "context"
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -31,7 +32,6 @@ func NewPipeline(repo *models.Repository, enc *crypto.Encryptor) *Pipeline {
return &Pipeline{repo: repo, enc: enc} return &Pipeline{repo: repo, enc: enc}
} }
// BuildProvider instancie un provider à partir de ses paramètres
func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error) { func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error) {
provider, err := p.repo.GetActiveAIProvider() provider, err := p.repo.GetActiveAIProvider()
if err != nil { if err != nil {
@ -44,9 +44,7 @@ func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error
return NewProvider(name, apiKey, model, endpoint) return NewProvider(name, apiKey, model, endpoint)
} }
// GenerateForUser génère un résumé personnalisé pour un utilisateur
func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.Summary, error) { func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.Summary, error) {
// Récupérer le provider actif
providerCfg, err := p.repo.GetActiveAIProvider() providerCfg, err := p.repo.GetActiveAIProvider()
if err != nil { if err != nil {
return nil, fmt.Errorf("get active provider: %w", err) return nil, fmt.Errorf("get active provider: %w", err)
@ -68,7 +66,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
return nil, fmt.Errorf("build provider: %w", err) return nil, fmt.Errorf("build provider: %w", err)
} }
// Récupérer la watchlist de l'utilisateur (pour le contexte IA uniquement)
assets, err := p.repo.GetUserAssets(userID) assets, err := p.repo.GetUserAssets(userID)
if err != nil { if err != nil {
return nil, fmt.Errorf("get user assets: %w", err) return nil, fmt.Errorf("get user assets: %w", err)
@ -78,7 +75,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
symbols[i] = a.Symbol symbols[i] = a.Symbol
} }
// Récupérer TOUS les articles récents, toutes sources confondues
hoursStr, _ := p.repo.GetSetting("articles_lookback_hours") hoursStr, _ := p.repo.GetSetting("articles_lookback_hours")
hours, _ := strconv.Atoi(hoursStr) hours, _ := strconv.Atoi(hoursStr)
if hours == 0 { if hours == 0 {
@ -98,16 +94,21 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
if maxArticles == 0 { if maxArticles == 0 {
maxArticles = 50 maxArticles = 50
} }
// Passe 1 : filtrage par pertinence sur les titres si trop d'articles
if len(articles) > maxArticles { if len(articles) > maxArticles {
articles = articles[:maxArticles] fmt.Printf("pipeline: %d articles → filtering to %d via AI\n", len(articles), maxArticles)
articles = p.filterByRelevance(ctx, provider, symbols, articles, maxArticles)
fmt.Printf("pipeline: %d articles retained after filtering\n", len(articles))
} }
systemPrompt, _ := p.repo.GetSetting("ai_system_prompt") systemPrompt, _ := p.repo.GetSetting("ai_system_prompt")
if systemPrompt == "" { if systemPrompt == "" {
systemPrompt = DefaultSystemPrompt systemPrompt = DefaultSystemPrompt
} }
prompt := buildPrompt(systemPrompt, symbols, articles)
// Passe 2 : résumé complet
prompt := buildPrompt(systemPrompt, symbols, articles)
summary, err := provider.Summarize(ctx, prompt) summary, err := provider.Summarize(ctx, prompt)
if err != nil { if err != nil {
return nil, fmt.Errorf("AI summarize: %w", err) return nil, fmt.Errorf("AI summarize: %w", err)
@ -116,7 +117,77 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
return p.repo.CreateSummary(userID, summary, &providerCfg.ID) return p.repo.CreateSummary(userID, summary, &providerCfg.ID)
} }
// GenerateForAll génère les résumés pour tous les utilisateurs ayant une watchlist // filterByRelevance demande à l'IA de sélectionner les articles les plus pertinents
// en ne lui envoyant que les titres (prompt très court = rapide).
func (p *Pipeline) filterByRelevance(ctx context.Context, provider Provider, symbols []string, articles []models.Article, max int) []models.Article {
prompt := buildFilterPrompt(symbols, articles, max)
response, err := provider.Summarize(ctx, prompt)
if err != nil {
fmt.Printf("pipeline: filter AI call failed (%v), falling back to truncation\n", err)
return articles[:max]
}
indices := parseIndexArray(response, len(articles))
if len(indices) == 0 {
fmt.Printf("pipeline: could not parse filter response, falling back to truncation\n")
return articles[:max]
}
filtered := make([]models.Article, 0, len(indices))
for _, i := range indices {
filtered = append(filtered, articles[i])
if len(filtered) >= max {
break
}
}
return filtered
}
func buildFilterPrompt(symbols []string, articles []models.Article, max int) string {
var sb strings.Builder
sb.WriteString("Tu es un assistant de trading financier. ")
sb.WriteString(fmt.Sprintf("Parmi les %d articles ci-dessous, sélectionne les %d plus pertinents pour un trader actif.\n", len(articles), max))
if len(symbols) > 0 {
sb.WriteString("Actifs surveillés (priorité haute) : ")
sb.WriteString(strings.Join(symbols, ", "))
sb.WriteString("\n")
}
sb.WriteString(fmt.Sprintf("\nRéponds UNIQUEMENT avec un tableau JSON des indices sélectionnés (base 0), exemple : [0, 3, 7, 12]\n"))
sb.WriteString("N'ajoute aucun texte avant ou après le tableau JSON.\n\n")
sb.WriteString("Articles :\n")
for i, a := range articles {
sb.WriteString(fmt.Sprintf("[%d] %s (%s)\n", i, a.Title, a.SourceName))
}
return sb.String()
}
var jsonArrayRe = regexp.MustCompile(`\[[\d\s,]+\]`)
func parseIndexArray(response string, maxIndex int) []int {
match := jsonArrayRe.FindString(response)
if match == "" {
return nil
}
match = strings.Trim(match, "[]")
parts := strings.Split(match, ",")
seen := make(map[int]bool)
var indices []int
for _, p := range parts {
n, err := strconv.Atoi(strings.TrimSpace(p))
if err != nil || n < 0 || n >= maxIndex || seen[n] {
continue
}
seen[n] = true
indices = append(indices, n)
}
return indices
}
func (p *Pipeline) GenerateForAll(ctx context.Context) error { func (p *Pipeline) GenerateForAll(ctx context.Context) error {
users, err := p.repo.ListUsers() users, err := p.repo.ListUsers()
if err != nil { if err != nil {

View File

@ -287,6 +287,45 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
httputil.OK(c, gin.H{"ok": true}) httputil.OK(c, gin.H{"ok": true})
} }
// ── Schedule ───────────────────────────────────────────────────────────────
func (h *Handler) GetSchedule(c *gin.Context) {
slots, err := h.repo.ListScheduleSlots()
if err != nil {
httputil.InternalError(c, err)
return
}
httputil.OK(c, slots)
}
type scheduleRequest struct {
Slots []struct {
DayOfWeek int `json:"day_of_week"`
Hour int `json:"hour"`
Minute int `json:"minute"`
} `json:"slots"`
}
func (h *Handler) UpdateSchedule(c *gin.Context) {
var req scheduleRequest
if err := c.ShouldBindJSON(&req); err != nil {
httputil.BadRequest(c, err)
return
}
slots := make([]models.ScheduleSlot, len(req.Slots))
for i, s := range req.Slots {
slots[i] = models.ScheduleSlot{DayOfWeek: s.DayOfWeek, Hour: s.Hour, Minute: s.Minute}
}
if err := h.repo.ReplaceSchedule(slots); err != nil {
httputil.InternalError(c, err)
return
}
if err := h.scheduler.Reload(); err != nil {
fmt.Printf("schedule reload: %v\n", err)
}
httputil.OK(c, gin.H{"ok": true})
}
func (h *Handler) GetDefaultSystemPrompt(c *gin.Context) { func (h *Handler) GetDefaultSystemPrompt(c *gin.Context) {
httputil.OK(c, gin.H{"prompt": ai.DefaultSystemPrompt}) httputil.OK(c, gin.H{"prompt": ai.DefaultSystemPrompt})
} }

View File

@ -5,6 +5,7 @@ import (
"github.com/tradarr/backend/internal/config" "github.com/tradarr/backend/internal/config"
"github.com/tradarr/backend/internal/crypto" "github.com/tradarr/backend/internal/crypto"
"github.com/tradarr/backend/internal/models" "github.com/tradarr/backend/internal/models"
"github.com/tradarr/backend/internal/scheduler"
"github.com/tradarr/backend/internal/scraper" "github.com/tradarr/backend/internal/scraper"
) )
@ -14,6 +15,7 @@ type Handler struct {
enc *crypto.Encryptor enc *crypto.Encryptor
registry *scraper.Registry registry *scraper.Registry
pipeline *ai.Pipeline pipeline *ai.Pipeline
scheduler *scheduler.Scheduler
} }
func New( func New(
@ -22,6 +24,7 @@ func New(
enc *crypto.Encryptor, enc *crypto.Encryptor,
registry *scraper.Registry, registry *scraper.Registry,
pipeline *ai.Pipeline, pipeline *ai.Pipeline,
sched *scheduler.Scheduler,
) *Handler { ) *Handler {
return &Handler{ return &Handler{
repo: repo, repo: repo,
@ -29,5 +32,6 @@ func New(
enc: enc, enc: enc,
registry: registry, registry: registry,
pipeline: pipeline, pipeline: pipeline,
scheduler: sched,
} }
} }

View File

@ -65,6 +65,9 @@ func SetupRouter(h *handlers.Handler, jwtSecret string) *gin.Engine {
admin.PUT("/settings", h.UpdateSettings) admin.PUT("/settings", h.UpdateSettings)
admin.GET("/settings/default-prompt", h.GetDefaultSystemPrompt) admin.GET("/settings/default-prompt", h.GetDefaultSystemPrompt)
admin.GET("/schedule", h.GetSchedule)
admin.PUT("/schedule", h.UpdateSchedule)
admin.GET("/users", h.ListUsers) admin.GET("/users", h.ListUsers)
admin.PUT("/users/:id", h.UpdateAdminUser) admin.PUT("/users/:id", h.UpdateAdminUser)
admin.DELETE("/users/:id", h.DeleteAdminUser) admin.DELETE("/users/:id", h.DeleteAdminUser)

View File

@ -11,7 +11,7 @@ type Config struct {
JWTSecret string JWTSecret string
EncryptionKey []byte EncryptionKey []byte
Port string Port string
ChromePath string ScraperURL string
AdminEmail string AdminEmail string
AdminPassword string AdminPassword string
} }
@ -41,12 +41,17 @@ func Load() (*Config, error) {
port = "8080" port = "8080"
} }
scraperURL := os.Getenv("SCRAPER_URL")
if scraperURL == "" {
scraperURL = "http://scraper:3001"
}
return &Config{ return &Config{
DatabaseURL: dbURL, DatabaseURL: dbURL,
JWTSecret: jwtSecret, JWTSecret: jwtSecret,
EncryptionKey: encKey, EncryptionKey: encKey,
Port: port, Port: port,
ChromePath: os.Getenv("CHROME_PATH"), ScraperURL: scraperURL,
AdminEmail: os.Getenv("ADMIN_EMAIL"), AdminEmail: os.Getenv("ADMIN_EMAIL"),
AdminPassword: os.Getenv("ADMIN_PASSWORD"), AdminPassword: os.Getenv("ADMIN_PASSWORD"),
}, nil }, nil

View File

@ -21,7 +21,7 @@ CREATE TABLE user_assets (
CREATE TABLE sources ( CREATE TABLE sources (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(), id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
name TEXT NOT NULL, name TEXT NOT NULL,
type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits')), type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits', 'reuters', 'watcherguru')),
enabled BOOLEAN NOT NULL DEFAULT TRUE, enabled BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
); );
@ -97,7 +97,7 @@ CREATE INDEX idx_user_assets_user_id ON user_assets(user_id);
-- Sources initiales -- Sources initiales
INSERT INTO sources (name, type, enabled) VALUES INSERT INTO sources (name, type, enabled) VALUES
('Bloomberg', 'bloomberg', TRUE), ('Bloomberg', 'bloomberg', TRUE),
('StockTwits', 'stocktwits', TRUE); ('Yahoo Finance', 'stocktwits', TRUE);
-- Paramètres par défaut -- Paramètres par défaut
INSERT INTO settings (key, value) VALUES INSERT INTO settings (key, value) VALUES

View File

@ -0,0 +1 @@
DELETE FROM sources WHERE type IN ('reuters', 'watcherguru');

View File

@ -0,0 +1,4 @@
INSERT INTO sources (name, type, enabled) VALUES
('Reuters', 'reuters', true),
('Watcher.Guru', 'watcherguru', true)
ON CONFLICT DO NOTHING;

View File

@ -0,0 +1 @@
DROP TABLE IF EXISTS scrape_schedules;

View File

@ -0,0 +1,17 @@
CREATE TABLE scrape_schedules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
day_of_week SMALLINT NOT NULL CHECK (day_of_week BETWEEN 0 AND 6),
hour SMALLINT NOT NULL CHECK (hour BETWEEN 0 AND 23),
minute SMALLINT NOT NULL DEFAULT 0 CHECK (minute BETWEEN 0 AND 59),
UNIQUE (day_of_week, hour, minute)
);
-- Planning par défaut : lun-ven à 6h et 15h, week-end à 6h uniquement
INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES
(1, 6, 0), (1, 15, 0),
(2, 6, 0), (2, 15, 0),
(3, 6, 0), (3, 15, 0),
(4, 6, 0), (4, 15, 0),
(5, 6, 0), (5, 15, 0),
(6, 6, 0),
(0, 6, 0);

View File

@ -97,3 +97,10 @@ type Setting struct {
Key string `json:"key"` Key string `json:"key"`
Value string `json:"value"` Value string `json:"value"`
} }
type ScheduleSlot struct {
ID string `json:"id"`
DayOfWeek int `json:"day_of_week"` // 0=dimanche, 1=lundi ... 6=samedi
Hour int `json:"hour"`
Minute int `json:"minute"`
}

View File

@ -520,6 +520,51 @@ func (r *Repository) SetSetting(key, value string) error {
return err return err
} }
// ── Schedule ───────────────────────────────────────────────────────────────
func (r *Repository) ListScheduleSlots() ([]ScheduleSlot, error) {
rows, err := r.db.Query(`
SELECT id, day_of_week, hour, minute FROM scrape_schedules
ORDER BY day_of_week, hour, minute`)
if err != nil {
return nil, err
}
defer rows.Close()
var slots []ScheduleSlot
for rows.Next() {
var s ScheduleSlot
if err := rows.Scan(&s.ID, &s.DayOfWeek, &s.Hour, &s.Minute); err != nil {
return nil, err
}
slots = append(slots, s)
}
return slots, nil
}
func (r *Repository) ReplaceSchedule(slots []ScheduleSlot) error {
tx, err := r.db.Begin()
if err != nil {
return err
}
defer tx.Rollback()
if _, err := tx.Exec(`DELETE FROM scrape_schedules`); err != nil {
return err
}
for _, s := range slots {
if _, err := tx.Exec(
`INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES ($1, $2, $3)
ON CONFLICT (day_of_week, hour, minute) DO NOTHING`,
s.DayOfWeek, s.Hour, s.Minute,
); err != nil {
return err
}
}
return tx.Commit()
}
// ── Settings ───────────────────────────────────────────────────────────────
func (r *Repository) ListSettings() ([]Setting, error) { func (r *Repository) ListSettings() ([]Setting, error) {
rows, err := r.db.Query(`SELECT key, value FROM settings ORDER BY key`) rows, err := r.db.Query(`SELECT key, value FROM settings ORDER BY key`)
if err != nil { if err != nil {

View File

@ -3,7 +3,6 @@ package scheduler
import ( import (
"context" "context"
"fmt" "fmt"
"strconv"
"github.com/robfig/cron/v3" "github.com/robfig/cron/v3"
"github.com/tradarr/backend/internal/ai" "github.com/tradarr/backend/internal/ai"
@ -16,7 +15,7 @@ type Scheduler struct {
registry *scraper.Registry registry *scraper.Registry
pipeline *ai.Pipeline pipeline *ai.Pipeline
repo *models.Repository repo *models.Repository
entryID cron.EntryID entryIDs []cron.EntryID
} }
func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler { func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler {
@ -29,19 +28,10 @@ func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Reposit
} }
func (s *Scheduler) Start() error { func (s *Scheduler) Start() error {
interval, err := s.getInterval() if err := s.loadSchedule(); err != nil {
if err != nil {
return err return err
} }
spec := fmt.Sprintf("@every %dm", interval)
s.entryID, err = s.cron.AddFunc(spec, s.run)
if err != nil {
return fmt.Errorf("add cron: %w", err)
}
s.cron.Start() s.cron.Start()
fmt.Printf("scheduler started, running every %d minutes\n", interval)
return nil return nil
} }
@ -50,39 +40,46 @@ func (s *Scheduler) Stop() {
} }
func (s *Scheduler) Reload() error { func (s *Scheduler) Reload() error {
s.cron.Remove(s.entryID) for _, id := range s.entryIDs {
interval, err := s.getInterval() s.cron.Remove(id)
if err != nil {
return err
} }
spec := fmt.Sprintf("@every %dm", interval) s.entryIDs = nil
s.entryID, err = s.cron.AddFunc(spec, s.run) return s.loadSchedule()
return err }
func (s *Scheduler) loadSchedule() error {
slots, err := s.repo.ListScheduleSlots()
if err != nil {
return fmt.Errorf("load schedule: %w", err)
}
if len(slots) == 0 {
fmt.Println("scheduler: no schedule configured, scraping disabled")
return nil
}
for _, slot := range slots {
// Format cron: "minute hour * * day_of_week"
spec := fmt.Sprintf("%d %d * * %d", slot.Minute, slot.Hour, slot.DayOfWeek)
id, err := s.cron.AddFunc(spec, s.run)
if err != nil {
fmt.Printf("scheduler: invalid cron spec %q: %v\n", spec, err)
continue
}
s.entryIDs = append(s.entryIDs, id)
}
fmt.Printf("scheduler: %d time slots loaded\n", len(s.entryIDs))
return nil
} }
func (s *Scheduler) run() { func (s *Scheduler) run() {
fmt.Println("scheduler: running scraping cycle") fmt.Println("scheduler: starting scraping cycle")
if err := s.registry.RunAll(); err != nil { if err := s.registry.RunAll(); err != nil {
fmt.Printf("scheduler scrape error: %v\n", err) fmt.Printf("scheduler scrape error: %v\n", err)
return return
} }
fmt.Println("scheduler: running AI summaries") fmt.Println("scheduler: starting AI summaries")
if err := s.pipeline.GenerateForAll(context.Background()); err != nil { if err := s.pipeline.GenerateForAll(context.Background()); err != nil {
fmt.Printf("scheduler summary error: %v\n", err) fmt.Printf("scheduler summary error: %v\n", err)
} }
} }
func (s *Scheduler) getInterval() (int, error) {
v, err := s.repo.GetSetting("scrape_interval_minutes")
if err != nil {
return 60, nil
}
if v == "" {
return 60, nil
}
n, err := strconv.Atoi(v)
if err != nil || n < 1 {
return 60, nil
}
return n, nil
}

View File

@ -1,206 +1,94 @@
package bloomberg package bloomberg
import ( import (
"bytes"
"context" "context"
"encoding/json"
"fmt" "fmt"
"io"
"net/http"
"strings" "strings"
"time" "time"
"github.com/chromedp/chromedp"
"github.com/tradarr/backend/internal/scraper" "github.com/tradarr/backend/internal/scraper"
) )
type Bloomberg struct { type Bloomberg struct {
username string scraperURL string
password string client *http.Client
chromePath string
} }
func New(username, password, chromePath string) *Bloomberg { func New(scraperURL string) *Bloomberg {
return &Bloomberg{username: username, password: password, chromePath: chromePath} if scraperURL == "" {
scraperURL = "http://scraper:3001"
}
return &Bloomberg{
scraperURL: scraperURL,
client: &http.Client{Timeout: 10 * time.Minute},
}
} }
func (b *Bloomberg) Name() string { return "bloomberg" } func (b *Bloomberg) Name() string { return "bloomberg" }
func (b *Bloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { type scraperRequest struct {
if b.username == "" || b.password == "" { Username string `json:"username"`
return nil, fmt.Errorf("bloomberg credentials not configured") Password string `json:"password"`
}
opts := []chromedp.ExecAllocatorOption{
chromedp.NoFirstRun,
chromedp.NoDefaultBrowserCheck,
chromedp.Headless,
chromedp.DisableGPU,
chromedp.Flag("no-sandbox", true),
chromedp.Flag("disable-setuid-sandbox", true),
chromedp.Flag("disable-dev-shm-usage", true),
chromedp.Flag("disable-blink-features", "AutomationControlled"),
chromedp.Flag("disable-infobars", true),
chromedp.Flag("window-size", "1920,1080"),
chromedp.Flag("ignore-certificate-errors", true),
chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"),
}
if b.chromePath != "" {
opts = append(opts, chromedp.ExecPath(b.chromePath))
}
allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...)
defer cancelAlloc()
chromeCtx, cancelChrome := chromedp.NewContext(allocCtx)
defer cancelChrome()
timeoutCtx, cancelTimeout := context.WithTimeout(chromeCtx, 5*time.Minute)
defer cancelTimeout()
if err := b.login(timeoutCtx); err != nil {
return nil, fmt.Errorf("bloomberg login: %w", err)
}
var articles []scraper.Article
pages := []string{
"https://www.bloomberg.com/markets",
"https://www.bloomberg.com/technology",
"https://www.bloomberg.com/economics",
}
for _, u := range pages {
pageArticles, err := b.scrapePage(timeoutCtx, u, symbols)
if err != nil {
fmt.Printf("bloomberg scrape %s: %v\n", u, err)
continue
}
articles = append(articles, pageArticles...)
}
fmt.Printf("bloomberg: %d articles fetched total\n", len(articles))
return articles, nil
} }
func (b *Bloomberg) login(ctx context.Context) error { type scraperArticle struct {
loginCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) Title string `json:"title"`
defer cancel() URL string `json:"url"`
// Masquer la détection d'automation via JS
if err := chromedp.Run(loginCtx,
chromedp.ActionFunc(func(ctx context.Context) error {
return chromedp.Evaluate(`
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
window.chrome = { runtime: {} };
`, nil).Do(ctx)
}),
); err != nil {
fmt.Printf("bloomberg: could not inject stealth JS: %v\n", err)
}
err := chromedp.Run(loginCtx,
chromedp.Navigate("https://www.bloomberg.com/account/signin"),
chromedp.Sleep(2*time.Second),
// Essayer plusieurs sélecteurs pour l'email
chromedp.ActionFunc(func(ctx context.Context) error {
selectors := []string{
`input[name="email"]`,
`input[type="email"]`,
`input[data-type="email"]`,
`input[placeholder*="email" i]`,
`input[placeholder*="mail" i]`,
}
for _, sel := range selectors {
var count int
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
fmt.Printf("bloomberg: using email selector: %s\n", sel)
return chromedp.SendKeys(sel, b.username, chromedp.ByQuery).Do(ctx)
}
}
return fmt.Errorf("could not find email input — Bloomberg login page structure may have changed")
}),
chromedp.Sleep(500*time.Millisecond),
// Submit email
chromedp.ActionFunc(func(ctx context.Context) error {
selectors := []string{`button[type="submit"]`, `input[type="submit"]`, `button[data-testid*="submit"]`}
for _, sel := range selectors {
var count int
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
}
}
// Fallback: press Enter
return chromedp.KeyEvent("\r").Do(ctx)
}),
chromedp.Sleep(2*time.Second),
// Password
chromedp.ActionFunc(func(ctx context.Context) error {
selectors := []string{`input[type="password"]`, `input[name="password"]`}
for _, sel := range selectors {
var count int
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
fmt.Printf("bloomberg: using password selector: %s\n", sel)
return chromedp.SendKeys(sel, b.password, chromedp.ByQuery).Do(ctx)
}
}
return fmt.Errorf("could not find password input")
}),
chromedp.Sleep(500*time.Millisecond),
chromedp.ActionFunc(func(ctx context.Context) error {
selectors := []string{`button[type="submit"]`, `input[type="submit"]`}
for _, sel := range selectors {
var count int
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
}
}
return chromedp.KeyEvent("\r").Do(ctx)
}),
chromedp.Sleep(3*time.Second),
)
return err
} }
func (b *Bloomberg) scrapePage(ctx context.Context, pageURL string, symbols []string) ([]scraper.Article, error) { type scraperResponse struct {
pageCtx, cancel := context.WithTimeout(ctx, 60*time.Second) Articles []scraperArticle `json:"articles"`
defer cancel() Error string `json:"error,omitempty"`
}
var articleNodes []map[string]string func (b *Bloomberg) ScrapeWithCredentials(ctx context.Context, username, password string, symbols []string) ([]scraper.Article, error) {
err := chromedp.Run(pageCtx, payload, _ := json.Marshal(scraperRequest{Username: username, Password: password})
chromedp.Navigate(pageURL), req, err := http.NewRequestWithContext(ctx, http.MethodPost, b.scraperURL+"/bloomberg/scrape", bytes.NewReader(payload))
chromedp.Sleep(3*time.Second),
chromedp.Evaluate(`
(function() {
var items = [];
var seen = new Set();
var links = document.querySelectorAll('a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]');
links.forEach(function(a) {
if (seen.has(a.href)) return;
seen.add(a.href);
var title = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]');
var text = title ? title.innerText.trim() : a.innerText.trim();
if (text.length > 20 && a.href.includes('bloomberg.com')) {
items.push({title: text, url: a.href});
}
});
return items.slice(0, 25);
})()
`, &articleNodes),
)
if err != nil { if err != nil {
return nil, fmt.Errorf("navigate %s: %w", pageURL, err) return nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := b.client.Do(req)
if err != nil {
return nil, fmt.Errorf("scraper service unreachable: %w", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("scraper service HTTP %d: %s", resp.StatusCode, body)
}
var result scraperResponse
if err := json.Unmarshal(body, &result); err != nil {
return nil, fmt.Errorf("parse scraper response: %w", err)
}
if result.Error != "" {
return nil, fmt.Errorf("bloomberg: %s", result.Error)
} }
var articles []scraper.Article
now := time.Now() now := time.Now()
for _, node := range articleNodes { var articles []scraper.Article
title := strings.TrimSpace(node["title"]) for _, a := range result.Articles {
url := node["url"] title := strings.TrimSpace(a.Title)
if title == "" || url == "" || !strings.Contains(url, "bloomberg.com") { url := a.URL
if title == "" || url == "" {
continue continue
} }
syms := scraper.DetectSymbols(title, symbols) syms := scraper.DetectSymbols(title, symbols)
articles = append(articles, scraper.Article{ articles = append(articles, scraper.Article{
Title: title, Title: title,
Content: title, // contenu minimal — l'article complet nécessite un accès payant Content: title,
URL: url, URL: url,
PublishedAt: &now, PublishedAt: &now,
Symbols: syms, Symbols: syms,
}) })
} }
fmt.Printf("bloomberg: %d articles fetched\n", len(articles))
return articles, nil return articles, nil
} }

View File

@ -9,21 +9,19 @@ import (
"github.com/tradarr/backend/internal/scraper" "github.com/tradarr/backend/internal/scraper"
) )
// DynamicBloomberg charge les credentials depuis la DB avant chaque scraping
type DynamicBloomberg struct { type DynamicBloomberg struct {
repo *models.Repository repo *models.Repository
enc *crypto.Encryptor enc *crypto.Encryptor
chromePath string scraperURL string
} }
func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, chromePath string) *DynamicBloomberg { func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, scraperURL string) *DynamicBloomberg {
return &DynamicBloomberg{repo: repo, enc: enc, chromePath: chromePath} return &DynamicBloomberg{repo: repo, enc: enc, scraperURL: scraperURL}
} }
func (d *DynamicBloomberg) Name() string { return "bloomberg" } func (d *DynamicBloomberg) Name() string { return "bloomberg" }
func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
// Récupérer la source Bloomberg
source, err := d.repo.GetSourceByType("bloomberg") source, err := d.repo.GetSourceByType("bloomberg")
if err != nil || source == nil { if err != nil || source == nil {
return nil, fmt.Errorf("bloomberg source not found") return nil, fmt.Errorf("bloomberg source not found")
@ -34,7 +32,7 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra
return nil, fmt.Errorf("get bloomberg credentials: %w", err) return nil, fmt.Errorf("get bloomberg credentials: %w", err)
} }
if cred == nil || cred.Username == "" { if cred == nil || cred.Username == "" {
return nil, fmt.Errorf("bloomberg credentials not configured — please set them in the admin panel") return nil, fmt.Errorf("bloomberg credentials not configured — configure them in the admin panel")
} }
password := "" password := ""
@ -45,6 +43,6 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra
} }
} }
b := New(cred.Username, password, d.chromePath) b := New(d.scraperURL)
return b.Scrape(ctx, symbols) return b.ScrapeWithCredentials(ctx, cred.Username, password, symbols)
} }

View File

@ -0,0 +1,129 @@
package reuters
import (
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/tradarr/backend/internal/scraper"
)
// Reuters RSS est bloqué par Cloudflare. On utilise des flux RSS financiers
// publics fiables à la place : MarketWatch, CNBC, Seeking Alpha.
var feeds = []struct {
name string
url string
}{
{"MarketWatch Top Stories", "https://feeds.content.dowjones.io/public/rss/mw_topstories"},
{"MarketWatch Markets", "https://feeds.content.dowjones.io/public/rss/mw_marketpulse"},
{"CNBC Top News", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews"},
{"CNBC Finance", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews?tag=Finance"},
}
type Reuters struct {
client *http.Client
}
func New() *Reuters {
return &Reuters{client: &http.Client{Timeout: 15 * time.Second}}
}
func (r *Reuters) Name() string { return "reuters" }
type rssFeed struct {
Channel struct {
Items []struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
} `xml:"item"`
} `xml:"channel"`
}
func (r *Reuters) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) {
var articles []scraper.Article
seen := make(map[string]bool)
for i, feed := range feeds {
if i > 0 {
select {
case <-ctx.Done():
return articles, ctx.Err()
case <-time.After(300 * time.Millisecond):
}
}
items, err := r.fetchFeed(ctx, feed.url)
if err != nil {
fmt.Printf("reuters/financial %s: %v\n", feed.name, err)
continue
}
for _, a := range items {
if !seen[a.URL] {
seen[a.URL] = true
articles = append(articles, a)
}
}
fmt.Printf("reuters/financial %s: %d articles\n", feed.name, len(items))
}
return articles, nil
}
func (r *Reuters) fetchFeed(ctx context.Context, feedURL string) ([]scraper.Article, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")
resp, err := r.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
}
var feed rssFeed
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
return nil, fmt.Errorf("parse RSS: %w", err)
}
var articles []scraper.Article
for _, item := range feed.Channel.Items {
title := strings.TrimSpace(item.Title)
link := strings.TrimSpace(item.Link)
if title == "" || link == "" {
continue
}
var publishedAt *time.Time
for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} {
if t, err := time.Parse(f, item.PubDate); err == nil {
publishedAt = &t
break
}
}
content := strings.TrimSpace(item.Description)
if content == "" {
content = title
}
articles = append(articles, scraper.Article{
Title: title,
Content: content,
URL: link,
PublishedAt: publishedAt,
})
}
return articles, nil
}

View File

@ -0,0 +1,200 @@
package watcherguru
import (
"context"
"encoding/xml"
"fmt"
"io"
"net/http"
"strings"
"time"
"golang.org/x/net/html"
"github.com/tradarr/backend/internal/scraper"
)
const baseURL = "https://watcher.guru"
type WatcherGuru struct {
client *http.Client
}
func New() *WatcherGuru {
return &WatcherGuru{client: &http.Client{Timeout: 15 * time.Second}}
}
func (w *WatcherGuru) Name() string { return "watcherguru" }
type rssFeed struct {
Channel struct {
Items []struct {
Title string `xml:"title"`
Link string `xml:"link"`
PubDate string `xml:"pubDate"`
Desc string `xml:"description"`
} `xml:"item"`
} `xml:"channel"`
}
func (w *WatcherGuru) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) {
// Try RSS feeds first
for _, feedURL := range []string{
baseURL + "/feed/",
baseURL + "/news/feed/",
} {
articles, err := w.fetchRSS(ctx, feedURL)
if err == nil && len(articles) > 0 {
fmt.Printf("watcherguru rss: %d articles\n", len(articles))
return articles, nil
}
}
// Fallback: HTML scraping
articles, err := w.scrapeHTML(ctx)
if err != nil {
return nil, fmt.Errorf("watcherguru: %w", err)
}
fmt.Printf("watcherguru html: %d articles\n", len(articles))
return articles, nil
}
func (w *WatcherGuru) fetchRSS(ctx context.Context, feedURL string) ([]scraper.Article, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
resp, err := w.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
}
var feed rssFeed
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
return nil, fmt.Errorf("parse RSS: %w", err)
}
var articles []scraper.Article
for _, item := range feed.Channel.Items {
title := strings.TrimSpace(item.Title)
link := strings.TrimSpace(item.Link)
if title == "" || link == "" {
continue
}
var publishedAt *time.Time
for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} {
if t, err := time.Parse(f, item.PubDate); err == nil {
publishedAt = &t
break
}
}
content := strings.TrimSpace(item.Desc)
if content == "" {
content = title
}
articles = append(articles, scraper.Article{
Title: title,
Content: content,
URL: link,
PublishedAt: publishedAt,
})
}
return articles, nil
}
func (w *WatcherGuru) scrapeHTML(ctx context.Context) ([]scraper.Article, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/news/", nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml")
resp, err := w.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
}
doc, err := html.Parse(resp.Body)
if err != nil {
return nil, fmt.Errorf("parse HTML: %w", err)
}
var articles []scraper.Article
seen := make(map[string]bool)
now := time.Now()
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "article") {
if n.Data == "a" {
href := attrVal(n, "href")
if href == "" || seen[href] {
walk(n.FirstChild)
return
}
// Collect links that look like news articles
if strings.Contains(href, "/news/") || strings.Contains(href, "watcher.guru") {
text := strings.TrimSpace(nodeText(n))
if len(text) > 20 {
url := href
if !strings.HasPrefix(url, "http") {
url = baseURL + url
}
if !seen[url] {
seen[url] = true
articles = append(articles, scraper.Article{
Title: text,
Content: text,
URL: url,
PublishedAt: &now,
})
}
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(doc)
if len(articles) > 40 {
articles = articles[:40]
}
return articles, nil
}
func attrVal(n *html.Node, key string) string {
for _, a := range n.Attr {
if a.Key == key {
return a.Val
}
}
return ""
}
func nodeText(n *html.Node) string {
if n.Type == html.TextNode {
return n.Data
}
var sb strings.Builder
for c := n.FirstChild; c != nil; c = c.NextSibling {
sb.WriteString(nodeText(c))
}
return sb.String()
}

View File

@ -86,8 +86,13 @@ func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scrape
return nil, fmt.Errorf("parse RSS: %w", err) return nil, fmt.Errorf("parse RSS: %w", err)
} }
const maxPerSymbol = 5
var articles []scraper.Article var articles []scraper.Article
for _, item := range feed.Channel.Items { for _, item := range feed.Channel.Items {
if len(articles) >= maxPerSymbol {
break
}
title := strings.TrimSpace(item.Title) title := strings.TrimSpace(item.Title)
link := strings.TrimSpace(item.Link) link := strings.TrimSpace(item.Link)
if title == "" || link == "" { if title == "" || link == "" {

View File

@ -14,6 +14,14 @@ services:
timeout: 5s timeout: 5s
retries: 5 retries: 5
scraper:
build:
context: ./scraper-service
dockerfile: Dockerfile
restart: unless-stopped
expose:
- "3001"
backend: backend:
build: build:
context: ./backend context: ./backend
@ -22,11 +30,14 @@ services:
depends_on: depends_on:
postgres: postgres:
condition: service_healthy condition: service_healthy
scraper:
condition: service_started
environment: environment:
DATABASE_URL: "host=postgres port=5432 user=${POSTGRES_USER:-tradarr} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB:-tradarr} sslmode=disable" DATABASE_URL: "host=postgres port=5432 user=${POSTGRES_USER:-tradarr} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB:-tradarr} sslmode=disable"
JWT_SECRET: ${JWT_SECRET:?JWT_SECRET is required} JWT_SECRET: ${JWT_SECRET:?JWT_SECRET is required}
ENCRYPTION_KEY: ${ENCRYPTION_KEY:?ENCRYPTION_KEY must be 32 bytes hex} ENCRYPTION_KEY: ${ENCRYPTION_KEY:?ENCRYPTION_KEY must be 32 bytes hex}
PORT: "8080" PORT: "8080"
SCRAPER_URL: "http://scraper:3001"
ADMIN_EMAIL: ${ADMIN_EMAIL:-admin@tradarr.local} ADMIN_EMAIL: ${ADMIN_EMAIL:-admin@tradarr.local}
ADMIN_PASSWORD: ${ADMIN_PASSWORD:-changeme} ADMIN_PASSWORD: ${ADMIN_PASSWORD:-changeme}
expose: expose:

View File

@ -12,6 +12,7 @@ export interface ScrapeJob {
articles_found: number; error_msg: string; created_at: string articles_found: number; error_msg: string; created_at: string
} }
export interface Setting { key: string; value: string } export interface Setting { key: string; value: string }
export interface ScheduleSlot { id?: string; day_of_week: number; hour: number; minute: number }
export interface AdminUser { id: string; email: string; role: string; created_at: string } export interface AdminUser { id: string; email: string; role: string; created_at: string }
export interface Credential { source_id: string; source_name: string; username: string; has_password: boolean } export interface Credential { source_id: string; source_name: string; username: string; has_password: boolean }
@ -44,6 +45,10 @@ export const adminApi = {
updateSettings: (settings: Setting[]) => api.put<void>('/admin/settings', { settings }), updateSettings: (settings: Setting[]) => api.put<void>('/admin/settings', { settings }),
getDefaultPrompt: () => api.get<{ prompt: string }>('/admin/settings/default-prompt'), getDefaultPrompt: () => api.get<{ prompt: string }>('/admin/settings/default-prompt'),
// Schedule
getSchedule: () => api.get<ScheduleSlot[]>('/admin/schedule'),
updateSchedule: (slots: ScheduleSlot[]) => api.put<void>('/admin/schedule', { slots }),
// Users // Users
listUsers: () => api.get<AdminUser[]>('/admin/users'), listUsers: () => api.get<AdminUser[]>('/admin/users'),
updateUser: (id: string, email: string, role: string) => updateUser: (id: string, email: string, role: string) =>

View File

@ -1,5 +1,5 @@
import { NavLink } from 'react-router-dom' import { NavLink } from 'react-router-dom'
import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp } from 'lucide-react' import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp, CalendarDays } from 'lucide-react'
import { useAuth } from '@/lib/auth' import { useAuth } from '@/lib/auth'
import { cn } from '@/lib/cn' import { cn } from '@/lib/cn'
@ -15,6 +15,7 @@ const adminItems = [
{ to: '/admin/sources', icon: Database, label: 'Sources' }, { to: '/admin/sources', icon: Database, label: 'Sources' },
{ to: '/admin/jobs', icon: ClipboardList, label: 'Jobs' }, { to: '/admin/jobs', icon: ClipboardList, label: 'Jobs' },
{ to: '/admin/users', icon: Users, label: 'Utilisateurs' }, { to: '/admin/users', icon: Users, label: 'Utilisateurs' },
{ to: '/admin/schedule', icon: CalendarDays, label: 'Planning' },
{ to: '/admin/settings', icon: Settings, label: 'Paramètres' }, { to: '/admin/settings', icon: Settings, label: 'Paramètres' },
] ]

View File

@ -11,6 +11,7 @@ import { Sources } from '@/pages/admin/Sources'
import { Jobs } from '@/pages/admin/Jobs' import { Jobs } from '@/pages/admin/Jobs'
import { AdminUsers } from '@/pages/admin/AdminUsers' import { AdminUsers } from '@/pages/admin/AdminUsers'
import { AdminSettings } from '@/pages/admin/AdminSettings' import { AdminSettings } from '@/pages/admin/AdminSettings'
import { Schedule } from '@/pages/admin/Schedule'
export const router = createBrowserRouter([ export const router = createBrowserRouter([
{ path: '/login', element: <Login /> }, { path: '/login', element: <Login /> },
@ -31,6 +32,7 @@ export const router = createBrowserRouter([
{ path: 'jobs', element: <Jobs /> }, { path: 'jobs', element: <Jobs /> },
{ path: 'users', element: <AdminUsers /> }, { path: 'users', element: <AdminUsers /> },
{ path: 'settings', element: <AdminSettings /> }, { path: 'settings', element: <AdminSettings /> },
{ path: 'schedule', element: <Schedule /> },
], ],
}, },
], ],

View File

@ -0,0 +1,145 @@
import { useState, useEffect } from 'react'
import { Plus, Trash2, Save } from 'lucide-react'
import { adminApi, type ScheduleSlot } from '@/api/admin'
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Spinner } from '@/components/ui/spinner'
const DAYS = [
{ label: 'Lundi', short: 'LUN', value: 1 },
{ label: 'Mardi', short: 'MAR', value: 2 },
{ label: 'Mercredi', short: 'MER', value: 3 },
{ label: 'Jeudi', short: 'JEU', value: 4 },
{ label: 'Vendredi', short: 'VEN', value: 5 },
{ label: 'Samedi', short: 'SAM', value: 6 },
{ label: 'Dimanche', short: 'DIM', value: 0 },
]
type SlotKey = `${number}-${number}-${number}`
function toKey(s: ScheduleSlot): SlotKey {
return `${s.day_of_week}-${s.hour}-${s.minute}`
}
function fmt(h: number, m: number) {
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}`
}
export function Schedule() {
const [slots, setSlots] = useState<ScheduleSlot[]>([])
const [loading, setLoading] = useState(true)
const [saving, setSaving] = useState(false)
const [saved, setSaved] = useState(false)
const [newTimes, setNewTimes] = useState<Record<number, string>>({})
useEffect(() => { load() }, [])
async function load() {
setLoading(true)
try { setSlots((await adminApi.getSchedule()) ?? []) } finally { setLoading(false) }
}
function slotsForDay(day: number) {
return slots
.filter(s => s.day_of_week === day)
.sort((a, b) => a.hour !== b.hour ? a.hour - b.hour : a.minute - b.minute)
}
function addSlot(day: number) {
const time = newTimes[day] || '06:00'
const [h, m] = time.split(':').map(Number)
const newSlot: ScheduleSlot = { day_of_week: day, hour: h, minute: m }
if (slots.some(s => toKey(s) === toKey(newSlot))) return
setSlots(prev => [...prev, newSlot])
setNewTimes(p => ({ ...p, [day]: '06:00' }))
}
function removeSlot(slot: ScheduleSlot) {
setSlots(prev => prev.filter(s => toKey(s) !== toKey(slot)))
}
async function save() {
setSaving(true); setSaved(false)
await adminApi.updateSchedule(slots)
setSaving(false); setSaved(true)
setTimeout(() => setSaved(false), 2000)
}
if (loading) return <div className="flex justify-center py-20"><Spinner /></div>
return (
<div className="space-y-6">
<div className="flex items-center justify-between">
<div>
<h1 className="text-2xl font-bold">Planning hebdomadaire</h1>
<p className="text-muted-foreground text-sm">
Définissez les créneaux de scraping + résumé IA pour chaque jour
</p>
</div>
<Button onClick={save} disabled={saving}>
{saving ? <Spinner className="h-4 w-4" /> : <Save className="h-4 w-4" />}
{saved ? 'Enregistré !' : 'Enregistrer'}
</Button>
</div>
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 xl:grid-cols-7 gap-3">
{DAYS.map(day => {
const daySlots = slotsForDay(day.value)
const isWeekend = day.value === 0 || day.value === 6
return (
<Card key={day.value} className={isWeekend ? 'border-muted' : ''}>
<CardHeader className="pb-2 pt-4 px-4">
<CardTitle className="text-sm font-semibold">
<span className="hidden xl:block">{day.label}</span>
<span className="xl:hidden">{day.short}</span>
</CardTitle>
</CardHeader>
<CardContent className="px-4 pb-4 space-y-2">
{/* Créneaux existants */}
{daySlots.length === 0 && (
<p className="text-xs text-muted-foreground italic">Aucun créneau</p>
)}
{daySlots.map(slot => (
<div
key={toKey(slot)}
className="flex items-center justify-between rounded bg-primary/10 px-2 py-1"
>
<span className="text-sm font-mono font-medium">
{fmt(slot.hour, slot.minute)}
</span>
<button
onClick={() => removeSlot(slot)}
className="text-muted-foreground hover:text-destructive transition-colors ml-2"
>
<Trash2 className="h-3 w-3" />
</button>
</div>
))}
{/* Ajout d'un créneau */}
<div className="flex items-center gap-1 pt-1">
<input
type="time"
value={newTimes[day.value] ?? '06:00'}
onChange={e => setNewTimes(p => ({ ...p, [day.value]: e.target.value }))}
className="flex-1 min-w-0 rounded border border-input bg-background px-2 py-1 text-xs font-mono focus:outline-none focus:ring-1 focus:ring-ring"
/>
<button
onClick={() => addSlot(day.value)}
className="rounded bg-primary/10 p-1 hover:bg-primary/20 transition-colors"
>
<Plus className="h-3 w-3" />
</button>
</div>
</CardContent>
</Card>
)
})}
</div>
<p className="text-xs text-muted-foreground">
À chaque créneau, le service lance le scraping de toutes les sources actives puis génère les résumés IA.
</p>
</div>
)
}

View File

@ -15,8 +15,7 @@
"noUnusedLocals": true, "noUnusedLocals": true,
"noUnusedParameters": true, "noUnusedParameters": true,
"noFallthroughCasesInSwitch": true, "noFallthroughCasesInSwitch": true,
"baseUrl": ".", "paths": { "@/*": ["./src/*"] }
"paths": { "@/*": ["src/*"] }
}, },
"include": ["src"] "include": ["src"]
} }

View File

@ -0,0 +1,46 @@
FROM node:20-slim
RUN apt-get update && apt-get install -y \
chromium \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcairo2 \
libcups2 \
libdbus-1-3 \
libexpat1 \
libfontconfig1 \
libgbm1 \
libglib2.0-0 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxcursor1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxi6 \
libxrandr2 \
libxrender1 \
libxss1 \
libxtst6 \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
WORKDIR /app
COPY package*.json ./
RUN npm install --omit=dev
COPY . .
EXPOSE 3001
CMD ["node", "index.js"]

205
scraper-service/index.js Normal file
View File

@ -0,0 +1,205 @@
const express = require('express')
const puppeteer = require('puppeteer-extra')
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
puppeteer.use(StealthPlugin())
const app = express()
app.use(express.json())
const CHROME_PATH = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/chromium'
const PORT = process.env.PORT || 3001
function launchBrowser() {
return puppeteer.launch({
executablePath: CHROME_PATH,
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--window-size=1920,1080',
'--disable-blink-features=AutomationControlled',
],
})
}
async function tryClick(page, selectors) {
for (const sel of selectors) {
try {
const el = await page.$(sel)
if (el) { await el.click(); return true }
} catch {}
}
await page.keyboard.press('Enter')
return false
}
async function tryType(page, selectors, text) {
for (const sel of selectors) {
try {
await page.waitForSelector(sel, { timeout: 4000 })
await page.type(sel, text, { delay: 60 })
return true
} catch {}
}
return false
}
app.get('/health', (_, res) => res.json({ ok: true }))
app.post('/bloomberg/scrape', async (req, res) => {
const { username, password } = req.body || {}
if (!username || !password) {
return res.status(400).json({ error: 'username and password required' })
}
let browser
try {
browser = await launchBrowser()
const page = await browser.newPage()
await page.setViewport({ width: 1920, height: 1080 })
// Hide automation signals
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => undefined })
window.chrome = { runtime: {} }
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] })
})
console.log('[bloomberg] navigating to login page')
await page.goto('https://www.bloomberg.com/account/signin', {
waitUntil: 'networkidle2',
timeout: 60000,
})
await new Promise(r => setTimeout(r, 2000))
// Debug: état de la page avant de chercher l'email
const pageInputs = await page.evaluate(() =>
Array.from(document.querySelectorAll('input')).map(i => ({
type: i.type, name: i.name, id: i.id, placeholder: i.placeholder, visible: i.offsetParent !== null
}))
)
console.log('[bloomberg] inputs on login page:', JSON.stringify(pageInputs))
const pageTitle = await page.title()
console.log('[bloomberg] page title:', pageTitle)
console.log('[bloomberg] entering email')
const emailSelectors = [
'#email-form-input',
'input[id="email-form-input"]',
'input[type="email"]',
'input[name="text-input"]',
'input[placeholder*="email" i]',
]
const emailOk = await tryType(page, emailSelectors, username)
if (!emailOk) throw new Error('could not find email input')
await new Promise(r => setTimeout(r, 800))
// Click submit via JS pour contourner les boutons désactivés
const submitted = await page.evaluate(() => {
const btns = Array.from(document.querySelectorAll('button'))
const btn = btns.find(b =>
b.type === 'submit' ||
/continue|next|sign.?in/i.test(b.textContent)
)
if (btn) { btn.click(); return true }
const form = document.querySelector('form')
if (form) { form.submit(); return true }
return false
})
if (!submitted) await page.keyboard.press('Enter')
// Attendre que la page change (password input apparaît ou navigation)
try {
await page.waitForFunction(
() => document.querySelector('input[type="password"]') !== null,
{ timeout: 10000 }
)
} catch {
await new Promise(r => setTimeout(r, 3000))
}
console.log('[bloomberg] after email submit, url:', page.url())
// Debug inputs disponibles
const allInputs = await page.evaluate(() =>
Array.from(document.querySelectorAll('input')).map(i => ({
type: i.type, name: i.name, id: i.id, placeholder: i.placeholder
}))
)
console.log('[bloomberg] inputs after email submit:', JSON.stringify(allInputs))
console.log('[bloomberg] entering password')
const pwdOk = await tryType(page, [
'input[type="password"]',
'input[name="password"]',
'input[autocomplete="current-password"]',
'input[autocomplete="password"]',
], password)
if (!pwdOk) throw new Error('could not find password input — check logs above for available inputs')
await new Promise(r => setTimeout(r, 500))
await tryClick(page, ['button[type="submit"]', 'input[type="submit"]'])
await new Promise(r => setTimeout(r, 3000))
const currentURL = page.url()
console.log('[bloomberg] after login, url:', currentURL)
const pages = [
'https://www.bloomberg.com/markets',
'https://www.bloomberg.com/technology',
'https://www.bloomberg.com/economics',
]
const articles = []
const seen = new Set()
for (const url of pages) {
try {
console.log('[bloomberg] scraping', url)
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 })
await new Promise(r => setTimeout(r, 2000))
const items = await page.evaluate(() => {
const results = []
const seen = new Set()
const links = document.querySelectorAll(
'a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]'
)
links.forEach(a => {
if (seen.has(a.href)) return
seen.add(a.href)
const titleEl = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]')
const text = titleEl ? titleEl.innerText.trim() : a.innerText.trim()
if (text.length > 20 && a.href.includes('bloomberg.com')) {
results.push({ title: text, url: a.href })
}
})
return results.slice(0, 25)
})
for (const item of items) {
if (!seen.has(item.url) && item.title && item.url) {
seen.add(item.url)
articles.push(item)
}
}
console.log('[bloomberg]', url, '->', items.length, 'articles')
} catch (e) {
console.error('[bloomberg] error on', url, ':', e.message)
}
}
console.log('[bloomberg] total:', articles.length, 'articles')
res.json({ articles })
} catch (e) {
console.error('[bloomberg] scrape error:', e.message)
res.status(500).json({ error: e.message })
} finally {
if (browser) await browser.close()
}
})
app.listen(PORT, () => console.log(`scraper-service listening on :${PORT}`))

View File

@ -0,0 +1,14 @@
{
"name": "tradarr-scraper-service",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"start": "node index.js"
},
"dependencies": {
"express": "^4.19.2",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"puppeteer": "^22.0.0"
}
}