diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go index 9caadc4..011c66e 100644 --- a/backend/cmd/server/main.go +++ b/backend/cmd/server/main.go @@ -15,6 +15,8 @@ import ( "github.com/tradarr/backend/internal/scheduler" "github.com/tradarr/backend/internal/scraper" "github.com/tradarr/backend/internal/scraper/bloomberg" + "github.com/tradarr/backend/internal/scraper/reuters" + "github.com/tradarr/backend/internal/scraper/watcherguru" "github.com/tradarr/backend/internal/scraper/yahoofinance" ) @@ -38,30 +40,23 @@ func main() { enc := crypto.New(cfg.EncryptionKey) pipeline := ai.NewPipeline(repo, enc) - // Créer le compte admin initial si nécessaire if err := ensureAdmin(repo, cfg); err != nil { log.Printf("ensure admin: %v", err) } - // Configurer les scrapers registry := scraper.NewRegistry(repo) + registry.Register(bloomberg.NewDynamic(repo, enc, cfg.ScraperURL)) + registry.Register(yahoofinance.New()) + registry.Register(reuters.New()) + registry.Register(watcherguru.New()) - // Bloomberg (credentials chargés depuis la DB à chaque run) - bbScraper := bloomberg.NewDynamic(repo, enc, cfg.ChromePath) - registry.Register(bbScraper) - - stScraper := yahoofinance.New() - registry.Register(stScraper) - - // Scheduler sched := scheduler.New(registry, pipeline, repo) if err := sched.Start(); err != nil { log.Printf("scheduler: %v", err) } defer sched.Stop() - // API - h := handlers.New(repo, cfg, enc, registry, pipeline) + h := handlers.New(repo, cfg, enc, registry, pipeline, sched) r := api.SetupRouter(h, cfg.JWTSecret) addr := fmt.Sprintf(":%s", cfg.Port) diff --git a/backend/internal/ai/pipeline.go b/backend/internal/ai/pipeline.go index 31bd604..c1da501 100644 --- a/backend/internal/ai/pipeline.go +++ b/backend/internal/ai/pipeline.go @@ -3,6 +3,7 @@ package ai import ( "context" "fmt" + "regexp" "strconv" "strings" "time" @@ -31,7 +32,6 @@ func NewPipeline(repo *models.Repository, enc *crypto.Encryptor) *Pipeline { return &Pipeline{repo: repo, enc: enc} } -// BuildProvider instancie un provider à partir de ses paramètres func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error) { provider, err := p.repo.GetActiveAIProvider() if err != nil { @@ -44,9 +44,7 @@ func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error return NewProvider(name, apiKey, model, endpoint) } -// GenerateForUser génère un résumé personnalisé pour un utilisateur func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.Summary, error) { - // Récupérer le provider actif providerCfg, err := p.repo.GetActiveAIProvider() if err != nil { return nil, fmt.Errorf("get active provider: %w", err) @@ -68,7 +66,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models. return nil, fmt.Errorf("build provider: %w", err) } - // Récupérer la watchlist de l'utilisateur (pour le contexte IA uniquement) assets, err := p.repo.GetUserAssets(userID) if err != nil { return nil, fmt.Errorf("get user assets: %w", err) @@ -78,7 +75,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models. symbols[i] = a.Symbol } - // Récupérer TOUS les articles récents, toutes sources confondues hoursStr, _ := p.repo.GetSetting("articles_lookback_hours") hours, _ := strconv.Atoi(hoursStr) if hours == 0 { @@ -98,16 +94,21 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models. if maxArticles == 0 { maxArticles = 50 } + + // Passe 1 : filtrage par pertinence sur les titres si trop d'articles if len(articles) > maxArticles { - articles = articles[:maxArticles] + fmt.Printf("pipeline: %d articles → filtering to %d via AI\n", len(articles), maxArticles) + articles = p.filterByRelevance(ctx, provider, symbols, articles, maxArticles) + fmt.Printf("pipeline: %d articles retained after filtering\n", len(articles)) } systemPrompt, _ := p.repo.GetSetting("ai_system_prompt") if systemPrompt == "" { systemPrompt = DefaultSystemPrompt } - prompt := buildPrompt(systemPrompt, symbols, articles) + // Passe 2 : résumé complet + prompt := buildPrompt(systemPrompt, symbols, articles) summary, err := provider.Summarize(ctx, prompt) if err != nil { return nil, fmt.Errorf("AI summarize: %w", err) @@ -116,7 +117,77 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models. return p.repo.CreateSummary(userID, summary, &providerCfg.ID) } -// GenerateForAll génère les résumés pour tous les utilisateurs ayant une watchlist +// filterByRelevance demande à l'IA de sélectionner les articles les plus pertinents +// en ne lui envoyant que les titres (prompt très court = rapide). +func (p *Pipeline) filterByRelevance(ctx context.Context, provider Provider, symbols []string, articles []models.Article, max int) []models.Article { + prompt := buildFilterPrompt(symbols, articles, max) + response, err := provider.Summarize(ctx, prompt) + if err != nil { + fmt.Printf("pipeline: filter AI call failed (%v), falling back to truncation\n", err) + return articles[:max] + } + + indices := parseIndexArray(response, len(articles)) + if len(indices) == 0 { + fmt.Printf("pipeline: could not parse filter response, falling back to truncation\n") + return articles[:max] + } + + filtered := make([]models.Article, 0, len(indices)) + for _, i := range indices { + filtered = append(filtered, articles[i]) + if len(filtered) >= max { + break + } + } + return filtered +} + +func buildFilterPrompt(symbols []string, articles []models.Article, max int) string { + var sb strings.Builder + sb.WriteString("Tu es un assistant de trading financier. ") + sb.WriteString(fmt.Sprintf("Parmi les %d articles ci-dessous, sélectionne les %d plus pertinents pour un trader actif.\n", len(articles), max)) + + if len(symbols) > 0 { + sb.WriteString("Actifs surveillés (priorité haute) : ") + sb.WriteString(strings.Join(symbols, ", ")) + sb.WriteString("\n") + } + + sb.WriteString(fmt.Sprintf("\nRéponds UNIQUEMENT avec un tableau JSON des indices sélectionnés (base 0), exemple : [0, 3, 7, 12]\n")) + sb.WriteString("N'ajoute aucun texte avant ou après le tableau JSON.\n\n") + sb.WriteString("Articles :\n") + + for i, a := range articles { + sb.WriteString(fmt.Sprintf("[%d] %s (%s)\n", i, a.Title, a.SourceName)) + } + + return sb.String() +} + +var jsonArrayRe = regexp.MustCompile(`\[[\d\s,]+\]`) + +func parseIndexArray(response string, maxIndex int) []int { + match := jsonArrayRe.FindString(response) + if match == "" { + return nil + } + match = strings.Trim(match, "[]") + parts := strings.Split(match, ",") + + seen := make(map[int]bool) + var indices []int + for _, p := range parts { + n, err := strconv.Atoi(strings.TrimSpace(p)) + if err != nil || n < 0 || n >= maxIndex || seen[n] { + continue + } + seen[n] = true + indices = append(indices, n) + } + return indices +} + func (p *Pipeline) GenerateForAll(ctx context.Context) error { users, err := p.repo.ListUsers() if err != nil { diff --git a/backend/internal/api/handlers/admin.go b/backend/internal/api/handlers/admin.go index 2d6215f..e32c1d5 100644 --- a/backend/internal/api/handlers/admin.go +++ b/backend/internal/api/handlers/admin.go @@ -287,6 +287,45 @@ func (h *Handler) UpdateSettings(c *gin.Context) { httputil.OK(c, gin.H{"ok": true}) } +// ── Schedule ─────────────────────────────────────────────────────────────── + +func (h *Handler) GetSchedule(c *gin.Context) { + slots, err := h.repo.ListScheduleSlots() + if err != nil { + httputil.InternalError(c, err) + return + } + httputil.OK(c, slots) +} + +type scheduleRequest struct { + Slots []struct { + DayOfWeek int `json:"day_of_week"` + Hour int `json:"hour"` + Minute int `json:"minute"` + } `json:"slots"` +} + +func (h *Handler) UpdateSchedule(c *gin.Context) { + var req scheduleRequest + if err := c.ShouldBindJSON(&req); err != nil { + httputil.BadRequest(c, err) + return + } + slots := make([]models.ScheduleSlot, len(req.Slots)) + for i, s := range req.Slots { + slots[i] = models.ScheduleSlot{DayOfWeek: s.DayOfWeek, Hour: s.Hour, Minute: s.Minute} + } + if err := h.repo.ReplaceSchedule(slots); err != nil { + httputil.InternalError(c, err) + return + } + if err := h.scheduler.Reload(); err != nil { + fmt.Printf("schedule reload: %v\n", err) + } + httputil.OK(c, gin.H{"ok": true}) +} + func (h *Handler) GetDefaultSystemPrompt(c *gin.Context) { httputil.OK(c, gin.H{"prompt": ai.DefaultSystemPrompt}) } diff --git a/backend/internal/api/handlers/handler.go b/backend/internal/api/handlers/handler.go index 7443691..1035187 100644 --- a/backend/internal/api/handlers/handler.go +++ b/backend/internal/api/handlers/handler.go @@ -5,6 +5,7 @@ import ( "github.com/tradarr/backend/internal/config" "github.com/tradarr/backend/internal/crypto" "github.com/tradarr/backend/internal/models" + "github.com/tradarr/backend/internal/scheduler" "github.com/tradarr/backend/internal/scraper" ) @@ -14,6 +15,7 @@ type Handler struct { enc *crypto.Encryptor registry *scraper.Registry pipeline *ai.Pipeline + scheduler *scheduler.Scheduler } func New( @@ -22,12 +24,14 @@ func New( enc *crypto.Encryptor, registry *scraper.Registry, pipeline *ai.Pipeline, + sched *scheduler.Scheduler, ) *Handler { return &Handler{ - repo: repo, - cfg: cfg, - enc: enc, - registry: registry, - pipeline: pipeline, + repo: repo, + cfg: cfg, + enc: enc, + registry: registry, + pipeline: pipeline, + scheduler: sched, } } diff --git a/backend/internal/api/router.go b/backend/internal/api/router.go index 97524b5..a2adb7b 100644 --- a/backend/internal/api/router.go +++ b/backend/internal/api/router.go @@ -65,6 +65,9 @@ func SetupRouter(h *handlers.Handler, jwtSecret string) *gin.Engine { admin.PUT("/settings", h.UpdateSettings) admin.GET("/settings/default-prompt", h.GetDefaultSystemPrompt) + admin.GET("/schedule", h.GetSchedule) + admin.PUT("/schedule", h.UpdateSchedule) + admin.GET("/users", h.ListUsers) admin.PUT("/users/:id", h.UpdateAdminUser) admin.DELETE("/users/:id", h.DeleteAdminUser) diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 6300b98..e736435 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -7,13 +7,13 @@ import ( ) type Config struct { - DatabaseURL string - JWTSecret string - EncryptionKey []byte - Port string - ChromePath string - AdminEmail string - AdminPassword string + DatabaseURL string + JWTSecret string + EncryptionKey []byte + Port string + ScraperURL string + AdminEmail string + AdminPassword string } func Load() (*Config, error) { @@ -41,12 +41,17 @@ func Load() (*Config, error) { port = "8080" } + scraperURL := os.Getenv("SCRAPER_URL") + if scraperURL == "" { + scraperURL = "http://scraper:3001" + } + return &Config{ DatabaseURL: dbURL, JWTSecret: jwtSecret, EncryptionKey: encKey, Port: port, - ChromePath: os.Getenv("CHROME_PATH"), + ScraperURL: scraperURL, AdminEmail: os.Getenv("ADMIN_EMAIL"), AdminPassword: os.Getenv("ADMIN_PASSWORD"), }, nil diff --git a/backend/internal/database/migrations/000001_init.up.sql b/backend/internal/database/migrations/000001_init.up.sql index 01ec1a0..2f07aeb 100644 --- a/backend/internal/database/migrations/000001_init.up.sql +++ b/backend/internal/database/migrations/000001_init.up.sql @@ -21,7 +21,7 @@ CREATE TABLE user_assets ( CREATE TABLE sources ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), name TEXT NOT NULL, - type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits')), + type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits', 'reuters', 'watcherguru')), enabled BOOLEAN NOT NULL DEFAULT TRUE, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); @@ -97,7 +97,7 @@ CREATE INDEX idx_user_assets_user_id ON user_assets(user_id); -- Sources initiales INSERT INTO sources (name, type, enabled) VALUES ('Bloomberg', 'bloomberg', TRUE), - ('StockTwits', 'stocktwits', TRUE); + ('Yahoo Finance', 'stocktwits', TRUE); -- Paramètres par défaut INSERT INTO settings (key, value) VALUES diff --git a/backend/internal/database/migrations/000003_new_sources.down.sql b/backend/internal/database/migrations/000003_new_sources.down.sql new file mode 100644 index 0000000..bb4b405 --- /dev/null +++ b/backend/internal/database/migrations/000003_new_sources.down.sql @@ -0,0 +1 @@ +DELETE FROM sources WHERE type IN ('reuters', 'watcherguru'); diff --git a/backend/internal/database/migrations/000003_new_sources.up.sql b/backend/internal/database/migrations/000003_new_sources.up.sql new file mode 100644 index 0000000..b729049 --- /dev/null +++ b/backend/internal/database/migrations/000003_new_sources.up.sql @@ -0,0 +1,4 @@ +INSERT INTO sources (name, type, enabled) VALUES + ('Reuters', 'reuters', true), + ('Watcher.Guru', 'watcherguru', true) +ON CONFLICT DO NOTHING; diff --git a/backend/internal/database/migrations/000004_schedule.down.sql b/backend/internal/database/migrations/000004_schedule.down.sql new file mode 100644 index 0000000..c8cb0d1 --- /dev/null +++ b/backend/internal/database/migrations/000004_schedule.down.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS scrape_schedules; diff --git a/backend/internal/database/migrations/000004_schedule.up.sql b/backend/internal/database/migrations/000004_schedule.up.sql new file mode 100644 index 0000000..cd9b8ea --- /dev/null +++ b/backend/internal/database/migrations/000004_schedule.up.sql @@ -0,0 +1,17 @@ +CREATE TABLE scrape_schedules ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + day_of_week SMALLINT NOT NULL CHECK (day_of_week BETWEEN 0 AND 6), + hour SMALLINT NOT NULL CHECK (hour BETWEEN 0 AND 23), + minute SMALLINT NOT NULL DEFAULT 0 CHECK (minute BETWEEN 0 AND 59), + UNIQUE (day_of_week, hour, minute) +); + +-- Planning par défaut : lun-ven à 6h et 15h, week-end à 6h uniquement +INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES + (1, 6, 0), (1, 15, 0), + (2, 6, 0), (2, 15, 0), + (3, 6, 0), (3, 15, 0), + (4, 6, 0), (4, 15, 0), + (5, 6, 0), (5, 15, 0), + (6, 6, 0), + (0, 6, 0); diff --git a/backend/internal/models/models.go b/backend/internal/models/models.go index b1281e4..f6b4bc7 100644 --- a/backend/internal/models/models.go +++ b/backend/internal/models/models.go @@ -97,3 +97,10 @@ type Setting struct { Key string `json:"key"` Value string `json:"value"` } + +type ScheduleSlot struct { + ID string `json:"id"` + DayOfWeek int `json:"day_of_week"` // 0=dimanche, 1=lundi ... 6=samedi + Hour int `json:"hour"` + Minute int `json:"minute"` +} diff --git a/backend/internal/models/repository.go b/backend/internal/models/repository.go index 855d4ec..5af29a5 100644 --- a/backend/internal/models/repository.go +++ b/backend/internal/models/repository.go @@ -520,6 +520,51 @@ func (r *Repository) SetSetting(key, value string) error { return err } +// ── Schedule ─────────────────────────────────────────────────────────────── + +func (r *Repository) ListScheduleSlots() ([]ScheduleSlot, error) { + rows, err := r.db.Query(` + SELECT id, day_of_week, hour, minute FROM scrape_schedules + ORDER BY day_of_week, hour, minute`) + if err != nil { + return nil, err + } + defer rows.Close() + var slots []ScheduleSlot + for rows.Next() { + var s ScheduleSlot + if err := rows.Scan(&s.ID, &s.DayOfWeek, &s.Hour, &s.Minute); err != nil { + return nil, err + } + slots = append(slots, s) + } + return slots, nil +} + +func (r *Repository) ReplaceSchedule(slots []ScheduleSlot) error { + tx, err := r.db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + if _, err := tx.Exec(`DELETE FROM scrape_schedules`); err != nil { + return err + } + for _, s := range slots { + if _, err := tx.Exec( + `INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES ($1, $2, $3) + ON CONFLICT (day_of_week, hour, minute) DO NOTHING`, + s.DayOfWeek, s.Hour, s.Minute, + ); err != nil { + return err + } + } + return tx.Commit() +} + +// ── Settings ─────────────────────────────────────────────────────────────── + func (r *Repository) ListSettings() ([]Setting, error) { rows, err := r.db.Query(`SELECT key, value FROM settings ORDER BY key`) if err != nil { diff --git a/backend/internal/scheduler/scheduler.go b/backend/internal/scheduler/scheduler.go index bb4a27a..eee6077 100644 --- a/backend/internal/scheduler/scheduler.go +++ b/backend/internal/scheduler/scheduler.go @@ -3,7 +3,6 @@ package scheduler import ( "context" "fmt" - "strconv" "github.com/robfig/cron/v3" "github.com/tradarr/backend/internal/ai" @@ -16,7 +15,7 @@ type Scheduler struct { registry *scraper.Registry pipeline *ai.Pipeline repo *models.Repository - entryID cron.EntryID + entryIDs []cron.EntryID } func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler { @@ -29,19 +28,10 @@ func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Reposit } func (s *Scheduler) Start() error { - interval, err := s.getInterval() - if err != nil { + if err := s.loadSchedule(); err != nil { return err } - - spec := fmt.Sprintf("@every %dm", interval) - s.entryID, err = s.cron.AddFunc(spec, s.run) - if err != nil { - return fmt.Errorf("add cron: %w", err) - } - s.cron.Start() - fmt.Printf("scheduler started, running every %d minutes\n", interval) return nil } @@ -50,39 +40,46 @@ func (s *Scheduler) Stop() { } func (s *Scheduler) Reload() error { - s.cron.Remove(s.entryID) - interval, err := s.getInterval() - if err != nil { - return err + for _, id := range s.entryIDs { + s.cron.Remove(id) } - spec := fmt.Sprintf("@every %dm", interval) - s.entryID, err = s.cron.AddFunc(spec, s.run) - return err + s.entryIDs = nil + return s.loadSchedule() +} + +func (s *Scheduler) loadSchedule() error { + slots, err := s.repo.ListScheduleSlots() + if err != nil { + return fmt.Errorf("load schedule: %w", err) + } + if len(slots) == 0 { + fmt.Println("scheduler: no schedule configured, scraping disabled") + return nil + } + + for _, slot := range slots { + // Format cron: "minute hour * * day_of_week" + spec := fmt.Sprintf("%d %d * * %d", slot.Minute, slot.Hour, slot.DayOfWeek) + id, err := s.cron.AddFunc(spec, s.run) + if err != nil { + fmt.Printf("scheduler: invalid cron spec %q: %v\n", spec, err) + continue + } + s.entryIDs = append(s.entryIDs, id) + } + + fmt.Printf("scheduler: %d time slots loaded\n", len(s.entryIDs)) + return nil } func (s *Scheduler) run() { - fmt.Println("scheduler: running scraping cycle") + fmt.Println("scheduler: starting scraping cycle") if err := s.registry.RunAll(); err != nil { fmt.Printf("scheduler scrape error: %v\n", err) return } - fmt.Println("scheduler: running AI summaries") + fmt.Println("scheduler: starting AI summaries") if err := s.pipeline.GenerateForAll(context.Background()); err != nil { fmt.Printf("scheduler summary error: %v\n", err) } } - -func (s *Scheduler) getInterval() (int, error) { - v, err := s.repo.GetSetting("scrape_interval_minutes") - if err != nil { - return 60, nil - } - if v == "" { - return 60, nil - } - n, err := strconv.Atoi(v) - if err != nil || n < 1 { - return 60, nil - } - return n, nil -} diff --git a/backend/internal/scraper/bloomberg/bloomberg.go b/backend/internal/scraper/bloomberg/bloomberg.go index 4af981c..1455505 100644 --- a/backend/internal/scraper/bloomberg/bloomberg.go +++ b/backend/internal/scraper/bloomberg/bloomberg.go @@ -1,206 +1,94 @@ package bloomberg import ( + "bytes" "context" + "encoding/json" "fmt" + "io" + "net/http" "strings" "time" - "github.com/chromedp/chromedp" "github.com/tradarr/backend/internal/scraper" ) type Bloomberg struct { - username string - password string - chromePath string + scraperURL string + client *http.Client } -func New(username, password, chromePath string) *Bloomberg { - return &Bloomberg{username: username, password: password, chromePath: chromePath} +func New(scraperURL string) *Bloomberg { + if scraperURL == "" { + scraperURL = "http://scraper:3001" + } + return &Bloomberg{ + scraperURL: scraperURL, + client: &http.Client{Timeout: 10 * time.Minute}, + } } func (b *Bloomberg) Name() string { return "bloomberg" } -func (b *Bloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { - if b.username == "" || b.password == "" { - return nil, fmt.Errorf("bloomberg credentials not configured") - } - - opts := []chromedp.ExecAllocatorOption{ - chromedp.NoFirstRun, - chromedp.NoDefaultBrowserCheck, - chromedp.Headless, - chromedp.DisableGPU, - chromedp.Flag("no-sandbox", true), - chromedp.Flag("disable-setuid-sandbox", true), - chromedp.Flag("disable-dev-shm-usage", true), - chromedp.Flag("disable-blink-features", "AutomationControlled"), - chromedp.Flag("disable-infobars", true), - chromedp.Flag("window-size", "1920,1080"), - chromedp.Flag("ignore-certificate-errors", true), - chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"), - } - if b.chromePath != "" { - opts = append(opts, chromedp.ExecPath(b.chromePath)) - } - - allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...) - defer cancelAlloc() - - chromeCtx, cancelChrome := chromedp.NewContext(allocCtx) - defer cancelChrome() - - timeoutCtx, cancelTimeout := context.WithTimeout(chromeCtx, 5*time.Minute) - defer cancelTimeout() - - if err := b.login(timeoutCtx); err != nil { - return nil, fmt.Errorf("bloomberg login: %w", err) - } - - var articles []scraper.Article - pages := []string{ - "https://www.bloomberg.com/markets", - "https://www.bloomberg.com/technology", - "https://www.bloomberg.com/economics", - } - for _, u := range pages { - pageArticles, err := b.scrapePage(timeoutCtx, u, symbols) - if err != nil { - fmt.Printf("bloomberg scrape %s: %v\n", u, err) - continue - } - articles = append(articles, pageArticles...) - } - fmt.Printf("bloomberg: %d articles fetched total\n", len(articles)) - return articles, nil +type scraperRequest struct { + Username string `json:"username"` + Password string `json:"password"` } -func (b *Bloomberg) login(ctx context.Context) error { - loginCtx, cancel := context.WithTimeout(ctx, 2*time.Minute) - defer cancel() - - // Masquer la détection d'automation via JS - if err := chromedp.Run(loginCtx, - chromedp.ActionFunc(func(ctx context.Context) error { - return chromedp.Evaluate(` - Object.defineProperty(navigator, 'webdriver', {get: () => undefined}); - window.chrome = { runtime: {} }; - `, nil).Do(ctx) - }), - ); err != nil { - fmt.Printf("bloomberg: could not inject stealth JS: %v\n", err) - } - - err := chromedp.Run(loginCtx, - chromedp.Navigate("https://www.bloomberg.com/account/signin"), - chromedp.Sleep(2*time.Second), - // Essayer plusieurs sélecteurs pour l'email - chromedp.ActionFunc(func(ctx context.Context) error { - selectors := []string{ - `input[name="email"]`, - `input[type="email"]`, - `input[data-type="email"]`, - `input[placeholder*="email" i]`, - `input[placeholder*="mail" i]`, - } - for _, sel := range selectors { - var count int - if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { - fmt.Printf("bloomberg: using email selector: %s\n", sel) - return chromedp.SendKeys(sel, b.username, chromedp.ByQuery).Do(ctx) - } - } - return fmt.Errorf("could not find email input — Bloomberg login page structure may have changed") - }), - chromedp.Sleep(500*time.Millisecond), - // Submit email - chromedp.ActionFunc(func(ctx context.Context) error { - selectors := []string{`button[type="submit"]`, `input[type="submit"]`, `button[data-testid*="submit"]`} - for _, sel := range selectors { - var count int - if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { - return chromedp.Click(sel, chromedp.ByQuery).Do(ctx) - } - } - // Fallback: press Enter - return chromedp.KeyEvent("\r").Do(ctx) - }), - chromedp.Sleep(2*time.Second), - // Password - chromedp.ActionFunc(func(ctx context.Context) error { - selectors := []string{`input[type="password"]`, `input[name="password"]`} - for _, sel := range selectors { - var count int - if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { - fmt.Printf("bloomberg: using password selector: %s\n", sel) - return chromedp.SendKeys(sel, b.password, chromedp.ByQuery).Do(ctx) - } - } - return fmt.Errorf("could not find password input") - }), - chromedp.Sleep(500*time.Millisecond), - chromedp.ActionFunc(func(ctx context.Context) error { - selectors := []string{`button[type="submit"]`, `input[type="submit"]`} - for _, sel := range selectors { - var count int - if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 { - return chromedp.Click(sel, chromedp.ByQuery).Do(ctx) - } - } - return chromedp.KeyEvent("\r").Do(ctx) - }), - chromedp.Sleep(3*time.Second), - ) - return err +type scraperArticle struct { + Title string `json:"title"` + URL string `json:"url"` } -func (b *Bloomberg) scrapePage(ctx context.Context, pageURL string, symbols []string) ([]scraper.Article, error) { - pageCtx, cancel := context.WithTimeout(ctx, 60*time.Second) - defer cancel() +type scraperResponse struct { + Articles []scraperArticle `json:"articles"` + Error string `json:"error,omitempty"` +} - var articleNodes []map[string]string - err := chromedp.Run(pageCtx, - chromedp.Navigate(pageURL), - chromedp.Sleep(3*time.Second), - chromedp.Evaluate(` - (function() { - var items = []; - var seen = new Set(); - var links = document.querySelectorAll('a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]'); - links.forEach(function(a) { - if (seen.has(a.href)) return; - seen.add(a.href); - var title = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]'); - var text = title ? title.innerText.trim() : a.innerText.trim(); - if (text.length > 20 && a.href.includes('bloomberg.com')) { - items.push({title: text, url: a.href}); - } - }); - return items.slice(0, 25); - })() - `, &articleNodes), - ) +func (b *Bloomberg) ScrapeWithCredentials(ctx context.Context, username, password string, symbols []string) ([]scraper.Article, error) { + payload, _ := json.Marshal(scraperRequest{Username: username, Password: password}) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, b.scraperURL+"/bloomberg/scrape", bytes.NewReader(payload)) if err != nil { - return nil, fmt.Errorf("navigate %s: %w", pageURL, err) + return nil, err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := b.client.Do(req) + if err != nil { + return nil, fmt.Errorf("scraper service unreachable: %w", err) + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("scraper service HTTP %d: %s", resp.StatusCode, body) + } + + var result scraperResponse + if err := json.Unmarshal(body, &result); err != nil { + return nil, fmt.Errorf("parse scraper response: %w", err) + } + if result.Error != "" { + return nil, fmt.Errorf("bloomberg: %s", result.Error) } - var articles []scraper.Article now := time.Now() - for _, node := range articleNodes { - title := strings.TrimSpace(node["title"]) - url := node["url"] - if title == "" || url == "" || !strings.Contains(url, "bloomberg.com") { + var articles []scraper.Article + for _, a := range result.Articles { + title := strings.TrimSpace(a.Title) + url := a.URL + if title == "" || url == "" { continue } syms := scraper.DetectSymbols(title, symbols) articles = append(articles, scraper.Article{ Title: title, - Content: title, // contenu minimal — l'article complet nécessite un accès payant + Content: title, URL: url, PublishedAt: &now, Symbols: syms, }) } + fmt.Printf("bloomberg: %d articles fetched\n", len(articles)) return articles, nil } diff --git a/backend/internal/scraper/bloomberg/dynamic.go b/backend/internal/scraper/bloomberg/dynamic.go index 7189732..e155cbe 100644 --- a/backend/internal/scraper/bloomberg/dynamic.go +++ b/backend/internal/scraper/bloomberg/dynamic.go @@ -9,21 +9,19 @@ import ( "github.com/tradarr/backend/internal/scraper" ) -// DynamicBloomberg charge les credentials depuis la DB avant chaque scraping type DynamicBloomberg struct { repo *models.Repository enc *crypto.Encryptor - chromePath string + scraperURL string } -func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, chromePath string) *DynamicBloomberg { - return &DynamicBloomberg{repo: repo, enc: enc, chromePath: chromePath} +func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, scraperURL string) *DynamicBloomberg { + return &DynamicBloomberg{repo: repo, enc: enc, scraperURL: scraperURL} } func (d *DynamicBloomberg) Name() string { return "bloomberg" } func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) { - // Récupérer la source Bloomberg source, err := d.repo.GetSourceByType("bloomberg") if err != nil || source == nil { return nil, fmt.Errorf("bloomberg source not found") @@ -34,7 +32,7 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra return nil, fmt.Errorf("get bloomberg credentials: %w", err) } if cred == nil || cred.Username == "" { - return nil, fmt.Errorf("bloomberg credentials not configured — please set them in the admin panel") + return nil, fmt.Errorf("bloomberg credentials not configured — configure them in the admin panel") } password := "" @@ -45,6 +43,6 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra } } - b := New(cred.Username, password, d.chromePath) - return b.Scrape(ctx, symbols) + b := New(d.scraperURL) + return b.ScrapeWithCredentials(ctx, cred.Username, password, symbols) } diff --git a/backend/internal/scraper/reuters/reuters.go b/backend/internal/scraper/reuters/reuters.go new file mode 100644 index 0000000..896e5e4 --- /dev/null +++ b/backend/internal/scraper/reuters/reuters.go @@ -0,0 +1,129 @@ +package reuters + +import ( + "context" + "encoding/xml" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/tradarr/backend/internal/scraper" +) + +// Reuters RSS est bloqué par Cloudflare. On utilise des flux RSS financiers +// publics fiables à la place : MarketWatch, CNBC, Seeking Alpha. +var feeds = []struct { + name string + url string +}{ + {"MarketWatch Top Stories", "https://feeds.content.dowjones.io/public/rss/mw_topstories"}, + {"MarketWatch Markets", "https://feeds.content.dowjones.io/public/rss/mw_marketpulse"}, + {"CNBC Top News", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews"}, + {"CNBC Finance", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews?tag=Finance"}, +} + +type Reuters struct { + client *http.Client +} + +func New() *Reuters { + return &Reuters{client: &http.Client{Timeout: 15 * time.Second}} +} + +func (r *Reuters) Name() string { return "reuters" } + +type rssFeed struct { + Channel struct { + Items []struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` + PubDate string `xml:"pubDate"` + } `xml:"item"` + } `xml:"channel"` +} + +func (r *Reuters) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) { + var articles []scraper.Article + seen := make(map[string]bool) + + for i, feed := range feeds { + if i > 0 { + select { + case <-ctx.Done(): + return articles, ctx.Err() + case <-time.After(300 * time.Millisecond): + } + } + items, err := r.fetchFeed(ctx, feed.url) + if err != nil { + fmt.Printf("reuters/financial %s: %v\n", feed.name, err) + continue + } + for _, a := range items { + if !seen[a.URL] { + seen[a.URL] = true + articles = append(articles, a) + } + } + fmt.Printf("reuters/financial %s: %d articles\n", feed.name, len(items)) + } + return articles, nil +} + +func (r *Reuters) fetchFeed(ctx context.Context, feedURL string) ([]scraper.Article, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)") + req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml") + + resp, err := r.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 256)) + return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) + } + + var feed rssFeed + if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil { + return nil, fmt.Errorf("parse RSS: %w", err) + } + + var articles []scraper.Article + for _, item := range feed.Channel.Items { + title := strings.TrimSpace(item.Title) + link := strings.TrimSpace(item.Link) + if title == "" || link == "" { + continue + } + + var publishedAt *time.Time + for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} { + if t, err := time.Parse(f, item.PubDate); err == nil { + publishedAt = &t + break + } + } + + content := strings.TrimSpace(item.Description) + if content == "" { + content = title + } + + articles = append(articles, scraper.Article{ + Title: title, + Content: content, + URL: link, + PublishedAt: publishedAt, + }) + } + return articles, nil +} diff --git a/backend/internal/scraper/watcherguru/watcherguru.go b/backend/internal/scraper/watcherguru/watcherguru.go new file mode 100644 index 0000000..1d34e25 --- /dev/null +++ b/backend/internal/scraper/watcherguru/watcherguru.go @@ -0,0 +1,200 @@ +package watcherguru + +import ( + "context" + "encoding/xml" + "fmt" + "io" + "net/http" + "strings" + "time" + + "golang.org/x/net/html" + + "github.com/tradarr/backend/internal/scraper" +) + +const baseURL = "https://watcher.guru" + +type WatcherGuru struct { + client *http.Client +} + +func New() *WatcherGuru { + return &WatcherGuru{client: &http.Client{Timeout: 15 * time.Second}} +} + +func (w *WatcherGuru) Name() string { return "watcherguru" } + +type rssFeed struct { + Channel struct { + Items []struct { + Title string `xml:"title"` + Link string `xml:"link"` + PubDate string `xml:"pubDate"` + Desc string `xml:"description"` + } `xml:"item"` + } `xml:"channel"` +} + +func (w *WatcherGuru) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) { + // Try RSS feeds first + for _, feedURL := range []string{ + baseURL + "/feed/", + baseURL + "/news/feed/", + } { + articles, err := w.fetchRSS(ctx, feedURL) + if err == nil && len(articles) > 0 { + fmt.Printf("watcherguru rss: %d articles\n", len(articles)) + return articles, nil + } + } + + // Fallback: HTML scraping + articles, err := w.scrapeHTML(ctx) + if err != nil { + return nil, fmt.Errorf("watcherguru: %w", err) + } + fmt.Printf("watcherguru html: %d articles\n", len(articles)) + return articles, nil +} + +func (w *WatcherGuru) fetchRSS(ctx context.Context, feedURL string) ([]scraper.Article, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)") + + resp, err := w.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + var feed rssFeed + if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil { + return nil, fmt.Errorf("parse RSS: %w", err) + } + + var articles []scraper.Article + for _, item := range feed.Channel.Items { + title := strings.TrimSpace(item.Title) + link := strings.TrimSpace(item.Link) + if title == "" || link == "" { + continue + } + var publishedAt *time.Time + for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} { + if t, err := time.Parse(f, item.PubDate); err == nil { + publishedAt = &t + break + } + } + content := strings.TrimSpace(item.Desc) + if content == "" { + content = title + } + articles = append(articles, scraper.Article{ + Title: title, + Content: content, + URL: link, + PublishedAt: publishedAt, + }) + } + return articles, nil +} + +func (w *WatcherGuru) scrapeHTML(ctx context.Context) ([]scraper.Article, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/news/", nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36") + req.Header.Set("Accept", "text/html,application/xhtml+xml") + + resp, err := w.client.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 512)) + return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body) + } + + doc, err := html.Parse(resp.Body) + if err != nil { + return nil, fmt.Errorf("parse HTML: %w", err) + } + + var articles []scraper.Article + seen := make(map[string]bool) + now := time.Now() + + var walk func(*html.Node) + walk = func(n *html.Node) { + if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "article") { + if n.Data == "a" { + href := attrVal(n, "href") + if href == "" || seen[href] { + walk(n.FirstChild) + return + } + // Collect links that look like news articles + if strings.Contains(href, "/news/") || strings.Contains(href, "watcher.guru") { + text := strings.TrimSpace(nodeText(n)) + if len(text) > 20 { + url := href + if !strings.HasPrefix(url, "http") { + url = baseURL + url + } + if !seen[url] { + seen[url] = true + articles = append(articles, scraper.Article{ + Title: text, + Content: text, + URL: url, + PublishedAt: &now, + }) + } + } + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + walk(c) + } + } + walk(doc) + + if len(articles) > 40 { + articles = articles[:40] + } + return articles, nil +} + +func attrVal(n *html.Node, key string) string { + for _, a := range n.Attr { + if a.Key == key { + return a.Val + } + } + return "" +} + +func nodeText(n *html.Node) string { + if n.Type == html.TextNode { + return n.Data + } + var sb strings.Builder + for c := n.FirstChild; c != nil; c = c.NextSibling { + sb.WriteString(nodeText(c)) + } + return sb.String() +} diff --git a/backend/internal/scraper/yahoofinance/yahoofinance.go b/backend/internal/scraper/yahoofinance/yahoofinance.go index 1e25911..9151b45 100644 --- a/backend/internal/scraper/yahoofinance/yahoofinance.go +++ b/backend/internal/scraper/yahoofinance/yahoofinance.go @@ -86,8 +86,13 @@ func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scrape return nil, fmt.Errorf("parse RSS: %w", err) } + const maxPerSymbol = 5 + var articles []scraper.Article for _, item := range feed.Channel.Items { + if len(articles) >= maxPerSymbol { + break + } title := strings.TrimSpace(item.Title) link := strings.TrimSpace(item.Link) if title == "" || link == "" { diff --git a/docker-compose.yml b/docker-compose.yml index 1e2b8b6..5d55079 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,14 @@ services: timeout: 5s retries: 5 + scraper: + build: + context: ./scraper-service + dockerfile: Dockerfile + restart: unless-stopped + expose: + - "3001" + backend: build: context: ./backend @@ -22,11 +30,14 @@ services: depends_on: postgres: condition: service_healthy + scraper: + condition: service_started environment: DATABASE_URL: "host=postgres port=5432 user=${POSTGRES_USER:-tradarr} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB:-tradarr} sslmode=disable" JWT_SECRET: ${JWT_SECRET:?JWT_SECRET is required} ENCRYPTION_KEY: ${ENCRYPTION_KEY:?ENCRYPTION_KEY must be 32 bytes hex} PORT: "8080" + SCRAPER_URL: "http://scraper:3001" ADMIN_EMAIL: ${ADMIN_EMAIL:-admin@tradarr.local} ADMIN_PASSWORD: ${ADMIN_PASSWORD:-changeme} expose: diff --git a/frontend/src/api/admin.ts b/frontend/src/api/admin.ts index e3bc65c..c73e0c6 100644 --- a/frontend/src/api/admin.ts +++ b/frontend/src/api/admin.ts @@ -12,6 +12,7 @@ export interface ScrapeJob { articles_found: number; error_msg: string; created_at: string } export interface Setting { key: string; value: string } +export interface ScheduleSlot { id?: string; day_of_week: number; hour: number; minute: number } export interface AdminUser { id: string; email: string; role: string; created_at: string } export interface Credential { source_id: string; source_name: string; username: string; has_password: boolean } @@ -44,6 +45,10 @@ export const adminApi = { updateSettings: (settings: Setting[]) => api.put('/admin/settings', { settings }), getDefaultPrompt: () => api.get<{ prompt: string }>('/admin/settings/default-prompt'), + // Schedule + getSchedule: () => api.get('/admin/schedule'), + updateSchedule: (slots: ScheduleSlot[]) => api.put('/admin/schedule', { slots }), + // Users listUsers: () => api.get('/admin/users'), updateUser: (id: string, email: string, role: string) => diff --git a/frontend/src/components/layout/Sidebar.tsx b/frontend/src/components/layout/Sidebar.tsx index 894dbf6..87af5e2 100644 --- a/frontend/src/components/layout/Sidebar.tsx +++ b/frontend/src/components/layout/Sidebar.tsx @@ -1,5 +1,5 @@ import { NavLink } from 'react-router-dom' -import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp } from 'lucide-react' +import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp, CalendarDays } from 'lucide-react' import { useAuth } from '@/lib/auth' import { cn } from '@/lib/cn' @@ -15,6 +15,7 @@ const adminItems = [ { to: '/admin/sources', icon: Database, label: 'Sources' }, { to: '/admin/jobs', icon: ClipboardList, label: 'Jobs' }, { to: '/admin/users', icon: Users, label: 'Utilisateurs' }, + { to: '/admin/schedule', icon: CalendarDays, label: 'Planning' }, { to: '/admin/settings', icon: Settings, label: 'Paramètres' }, ] diff --git a/frontend/src/lib/router.tsx b/frontend/src/lib/router.tsx index 6825f65..f76bf69 100644 --- a/frontend/src/lib/router.tsx +++ b/frontend/src/lib/router.tsx @@ -11,6 +11,7 @@ import { Sources } from '@/pages/admin/Sources' import { Jobs } from '@/pages/admin/Jobs' import { AdminUsers } from '@/pages/admin/AdminUsers' import { AdminSettings } from '@/pages/admin/AdminSettings' +import { Schedule } from '@/pages/admin/Schedule' export const router = createBrowserRouter([ { path: '/login', element: }, @@ -31,6 +32,7 @@ export const router = createBrowserRouter([ { path: 'jobs', element: }, { path: 'users', element: }, { path: 'settings', element: }, + { path: 'schedule', element: }, ], }, ], diff --git a/frontend/src/pages/admin/Schedule.tsx b/frontend/src/pages/admin/Schedule.tsx new file mode 100644 index 0000000..fc8f2d1 --- /dev/null +++ b/frontend/src/pages/admin/Schedule.tsx @@ -0,0 +1,145 @@ +import { useState, useEffect } from 'react' +import { Plus, Trash2, Save } from 'lucide-react' +import { adminApi, type ScheduleSlot } from '@/api/admin' +import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' +import { Button } from '@/components/ui/button' +import { Spinner } from '@/components/ui/spinner' + +const DAYS = [ + { label: 'Lundi', short: 'LUN', value: 1 }, + { label: 'Mardi', short: 'MAR', value: 2 }, + { label: 'Mercredi', short: 'MER', value: 3 }, + { label: 'Jeudi', short: 'JEU', value: 4 }, + { label: 'Vendredi', short: 'VEN', value: 5 }, + { label: 'Samedi', short: 'SAM', value: 6 }, + { label: 'Dimanche', short: 'DIM', value: 0 }, +] + +type SlotKey = `${number}-${number}-${number}` + +function toKey(s: ScheduleSlot): SlotKey { + return `${s.day_of_week}-${s.hour}-${s.minute}` +} + +function fmt(h: number, m: number) { + return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}` +} + +export function Schedule() { + const [slots, setSlots] = useState([]) + const [loading, setLoading] = useState(true) + const [saving, setSaving] = useState(false) + const [saved, setSaved] = useState(false) + const [newTimes, setNewTimes] = useState>({}) + + useEffect(() => { load() }, []) + + async function load() { + setLoading(true) + try { setSlots((await adminApi.getSchedule()) ?? []) } finally { setLoading(false) } + } + + function slotsForDay(day: number) { + return slots + .filter(s => s.day_of_week === day) + .sort((a, b) => a.hour !== b.hour ? a.hour - b.hour : a.minute - b.minute) + } + + function addSlot(day: number) { + const time = newTimes[day] || '06:00' + const [h, m] = time.split(':').map(Number) + const newSlot: ScheduleSlot = { day_of_week: day, hour: h, minute: m } + if (slots.some(s => toKey(s) === toKey(newSlot))) return + setSlots(prev => [...prev, newSlot]) + setNewTimes(p => ({ ...p, [day]: '06:00' })) + } + + function removeSlot(slot: ScheduleSlot) { + setSlots(prev => prev.filter(s => toKey(s) !== toKey(slot))) + } + + async function save() { + setSaving(true); setSaved(false) + await adminApi.updateSchedule(slots) + setSaving(false); setSaved(true) + setTimeout(() => setSaved(false), 2000) + } + + if (loading) return
+ + return ( +
+
+
+

Planning hebdomadaire

+

+ Définissez les créneaux de scraping + résumé IA pour chaque jour +

+
+ +
+ +
+ {DAYS.map(day => { + const daySlots = slotsForDay(day.value) + const isWeekend = day.value === 0 || day.value === 6 + return ( + + + + {day.label} + {day.short} + + + + {/* Créneaux existants */} + {daySlots.length === 0 && ( +

Aucun créneau

+ )} + {daySlots.map(slot => ( +
+ + {fmt(slot.hour, slot.minute)} + + +
+ ))} + + {/* Ajout d'un créneau */} +
+ setNewTimes(p => ({ ...p, [day.value]: e.target.value }))} + className="flex-1 min-w-0 rounded border border-input bg-background px-2 py-1 text-xs font-mono focus:outline-none focus:ring-1 focus:ring-ring" + /> + +
+
+
+ ) + })} +
+ +

+ À chaque créneau, le service lance le scraping de toutes les sources actives puis génère les résumés IA. +

+
+ ) +} diff --git a/frontend/tsconfig.json b/frontend/tsconfig.json index 1250ba5..1d6297d 100644 --- a/frontend/tsconfig.json +++ b/frontend/tsconfig.json @@ -15,8 +15,7 @@ "noUnusedLocals": true, "noUnusedParameters": true, "noFallthroughCasesInSwitch": true, - "baseUrl": ".", - "paths": { "@/*": ["src/*"] } + "paths": { "@/*": ["./src/*"] } }, "include": ["src"] } diff --git a/scraper-service/Dockerfile b/scraper-service/Dockerfile new file mode 100644 index 0000000..3c2cc2b --- /dev/null +++ b/scraper-service/Dockerfile @@ -0,0 +1,46 @@ +FROM node:20-slim + +RUN apt-get update && apt-get install -y \ + chromium \ + fonts-liberation \ + libasound2 \ + libatk-bridge2.0-0 \ + libatk1.0-0 \ + libcairo2 \ + libcups2 \ + libdbus-1-3 \ + libexpat1 \ + libfontconfig1 \ + libgbm1 \ + libglib2.0-0 \ + libgtk-3-0 \ + libnspr4 \ + libnss3 \ + libpango-1.0-0 \ + libpangocairo-1.0-0 \ + libx11-6 \ + libx11-xcb1 \ + libxcb1 \ + libxcomposite1 \ + libxcursor1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxi6 \ + libxrandr2 \ + libxrender1 \ + libxss1 \ + libxtst6 \ + --no-install-recommends \ + && rm -rf /var/lib/apt/lists/* + +ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium + +WORKDIR /app +COPY package*.json ./ +RUN npm install --omit=dev +COPY . . + +EXPOSE 3001 +CMD ["node", "index.js"] diff --git a/scraper-service/index.js b/scraper-service/index.js new file mode 100644 index 0000000..ca048c8 --- /dev/null +++ b/scraper-service/index.js @@ -0,0 +1,205 @@ +const express = require('express') +const puppeteer = require('puppeteer-extra') +const StealthPlugin = require('puppeteer-extra-plugin-stealth') + +puppeteer.use(StealthPlugin()) + +const app = express() +app.use(express.json()) + +const CHROME_PATH = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/chromium' +const PORT = process.env.PORT || 3001 + +function launchBrowser() { + return puppeteer.launch({ + executablePath: CHROME_PATH, + headless: true, + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-gpu', + '--window-size=1920,1080', + '--disable-blink-features=AutomationControlled', + ], + }) +} + +async function tryClick(page, selectors) { + for (const sel of selectors) { + try { + const el = await page.$(sel) + if (el) { await el.click(); return true } + } catch {} + } + await page.keyboard.press('Enter') + return false +} + +async function tryType(page, selectors, text) { + for (const sel of selectors) { + try { + await page.waitForSelector(sel, { timeout: 4000 }) + await page.type(sel, text, { delay: 60 }) + return true + } catch {} + } + return false +} + +app.get('/health', (_, res) => res.json({ ok: true })) + +app.post('/bloomberg/scrape', async (req, res) => { + const { username, password } = req.body || {} + if (!username || !password) { + return res.status(400).json({ error: 'username and password required' }) + } + + let browser + try { + browser = await launchBrowser() + const page = await browser.newPage() + await page.setViewport({ width: 1920, height: 1080 }) + + // Hide automation signals + await page.evaluateOnNewDocument(() => { + Object.defineProperty(navigator, 'webdriver', { get: () => undefined }) + window.chrome = { runtime: {} } + Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] }) + }) + + console.log('[bloomberg] navigating to login page') + await page.goto('https://www.bloomberg.com/account/signin', { + waitUntil: 'networkidle2', + timeout: 60000, + }) + await new Promise(r => setTimeout(r, 2000)) + + // Debug: état de la page avant de chercher l'email + const pageInputs = await page.evaluate(() => + Array.from(document.querySelectorAll('input')).map(i => ({ + type: i.type, name: i.name, id: i.id, placeholder: i.placeholder, visible: i.offsetParent !== null + })) + ) + console.log('[bloomberg] inputs on login page:', JSON.stringify(pageInputs)) + const pageTitle = await page.title() + console.log('[bloomberg] page title:', pageTitle) + + console.log('[bloomberg] entering email') + const emailSelectors = [ + '#email-form-input', + 'input[id="email-form-input"]', + 'input[type="email"]', + 'input[name="text-input"]', + 'input[placeholder*="email" i]', + ] + const emailOk = await tryType(page, emailSelectors, username) + if (!emailOk) throw new Error('could not find email input') + + await new Promise(r => setTimeout(r, 800)) + + // Click submit via JS pour contourner les boutons désactivés + const submitted = await page.evaluate(() => { + const btns = Array.from(document.querySelectorAll('button')) + const btn = btns.find(b => + b.type === 'submit' || + /continue|next|sign.?in/i.test(b.textContent) + ) + if (btn) { btn.click(); return true } + const form = document.querySelector('form') + if (form) { form.submit(); return true } + return false + }) + if (!submitted) await page.keyboard.press('Enter') + + // Attendre que la page change (password input apparaît ou navigation) + try { + await page.waitForFunction( + () => document.querySelector('input[type="password"]') !== null, + { timeout: 10000 } + ) + } catch { + await new Promise(r => setTimeout(r, 3000)) + } + console.log('[bloomberg] after email submit, url:', page.url()) + + // Debug inputs disponibles + const allInputs = await page.evaluate(() => + Array.from(document.querySelectorAll('input')).map(i => ({ + type: i.type, name: i.name, id: i.id, placeholder: i.placeholder + })) + ) + console.log('[bloomberg] inputs after email submit:', JSON.stringify(allInputs)) + + console.log('[bloomberg] entering password') + const pwdOk = await tryType(page, [ + 'input[type="password"]', + 'input[name="password"]', + 'input[autocomplete="current-password"]', + 'input[autocomplete="password"]', + ], password) + if (!pwdOk) throw new Error('could not find password input — check logs above for available inputs') + + await new Promise(r => setTimeout(r, 500)) + await tryClick(page, ['button[type="submit"]', 'input[type="submit"]']) + await new Promise(r => setTimeout(r, 3000)) + + const currentURL = page.url() + console.log('[bloomberg] after login, url:', currentURL) + + const pages = [ + 'https://www.bloomberg.com/markets', + 'https://www.bloomberg.com/technology', + 'https://www.bloomberg.com/economics', + ] + + const articles = [] + const seen = new Set() + + for (const url of pages) { + try { + console.log('[bloomberg] scraping', url) + await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 }) + await new Promise(r => setTimeout(r, 2000)) + + const items = await page.evaluate(() => { + const results = [] + const seen = new Set() + const links = document.querySelectorAll( + 'a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]' + ) + links.forEach(a => { + if (seen.has(a.href)) return + seen.add(a.href) + const titleEl = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]') + const text = titleEl ? titleEl.innerText.trim() : a.innerText.trim() + if (text.length > 20 && a.href.includes('bloomberg.com')) { + results.push({ title: text, url: a.href }) + } + }) + return results.slice(0, 25) + }) + + for (const item of items) { + if (!seen.has(item.url) && item.title && item.url) { + seen.add(item.url) + articles.push(item) + } + } + console.log('[bloomberg]', url, '->', items.length, 'articles') + } catch (e) { + console.error('[bloomberg] error on', url, ':', e.message) + } + } + + console.log('[bloomberg] total:', articles.length, 'articles') + res.json({ articles }) + } catch (e) { + console.error('[bloomberg] scrape error:', e.message) + res.status(500).json({ error: e.message }) + } finally { + if (browser) await browser.close() + } +}) + +app.listen(PORT, () => console.log(`scraper-service listening on :${PORT}`)) diff --git a/scraper-service/package.json b/scraper-service/package.json new file mode 100644 index 0000000..42602d8 --- /dev/null +++ b/scraper-service/package.json @@ -0,0 +1,14 @@ +{ + "name": "tradarr-scraper-service", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "start": "node index.js" + }, + "dependencies": { + "express": "^4.19.2", + "puppeteer-extra": "^3.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2", + "puppeteer": "^22.0.0" + } +}