feat: add sources to retrieve news and divide the IA reflexions in 2 steps to limit the number of news
This commit is contained in:
@ -15,6 +15,8 @@ import (
|
|||||||
"github.com/tradarr/backend/internal/scheduler"
|
"github.com/tradarr/backend/internal/scheduler"
|
||||||
"github.com/tradarr/backend/internal/scraper"
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
"github.com/tradarr/backend/internal/scraper/bloomberg"
|
"github.com/tradarr/backend/internal/scraper/bloomberg"
|
||||||
|
"github.com/tradarr/backend/internal/scraper/reuters"
|
||||||
|
"github.com/tradarr/backend/internal/scraper/watcherguru"
|
||||||
"github.com/tradarr/backend/internal/scraper/yahoofinance"
|
"github.com/tradarr/backend/internal/scraper/yahoofinance"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -38,30 +40,23 @@ func main() {
|
|||||||
enc := crypto.New(cfg.EncryptionKey)
|
enc := crypto.New(cfg.EncryptionKey)
|
||||||
pipeline := ai.NewPipeline(repo, enc)
|
pipeline := ai.NewPipeline(repo, enc)
|
||||||
|
|
||||||
// Créer le compte admin initial si nécessaire
|
|
||||||
if err := ensureAdmin(repo, cfg); err != nil {
|
if err := ensureAdmin(repo, cfg); err != nil {
|
||||||
log.Printf("ensure admin: %v", err)
|
log.Printf("ensure admin: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configurer les scrapers
|
|
||||||
registry := scraper.NewRegistry(repo)
|
registry := scraper.NewRegistry(repo)
|
||||||
|
registry.Register(bloomberg.NewDynamic(repo, enc, cfg.ScraperURL))
|
||||||
|
registry.Register(yahoofinance.New())
|
||||||
|
registry.Register(reuters.New())
|
||||||
|
registry.Register(watcherguru.New())
|
||||||
|
|
||||||
// Bloomberg (credentials chargés depuis la DB à chaque run)
|
|
||||||
bbScraper := bloomberg.NewDynamic(repo, enc, cfg.ChromePath)
|
|
||||||
registry.Register(bbScraper)
|
|
||||||
|
|
||||||
stScraper := yahoofinance.New()
|
|
||||||
registry.Register(stScraper)
|
|
||||||
|
|
||||||
// Scheduler
|
|
||||||
sched := scheduler.New(registry, pipeline, repo)
|
sched := scheduler.New(registry, pipeline, repo)
|
||||||
if err := sched.Start(); err != nil {
|
if err := sched.Start(); err != nil {
|
||||||
log.Printf("scheduler: %v", err)
|
log.Printf("scheduler: %v", err)
|
||||||
}
|
}
|
||||||
defer sched.Stop()
|
defer sched.Stop()
|
||||||
|
|
||||||
// API
|
h := handlers.New(repo, cfg, enc, registry, pipeline, sched)
|
||||||
h := handlers.New(repo, cfg, enc, registry, pipeline)
|
|
||||||
r := api.SetupRouter(h, cfg.JWTSecret)
|
r := api.SetupRouter(h, cfg.JWTSecret)
|
||||||
|
|
||||||
addr := fmt.Sprintf(":%s", cfg.Port)
|
addr := fmt.Sprintf(":%s", cfg.Port)
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package ai
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@ -31,7 +32,6 @@ func NewPipeline(repo *models.Repository, enc *crypto.Encryptor) *Pipeline {
|
|||||||
return &Pipeline{repo: repo, enc: enc}
|
return &Pipeline{repo: repo, enc: enc}
|
||||||
}
|
}
|
||||||
|
|
||||||
// BuildProvider instancie un provider à partir de ses paramètres
|
|
||||||
func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error) {
|
func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error) {
|
||||||
provider, err := p.repo.GetActiveAIProvider()
|
provider, err := p.repo.GetActiveAIProvider()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -44,9 +44,7 @@ func (p *Pipeline) BuildProvider(name, apiKey, endpoint string) (Provider, error
|
|||||||
return NewProvider(name, apiKey, model, endpoint)
|
return NewProvider(name, apiKey, model, endpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenerateForUser génère un résumé personnalisé pour un utilisateur
|
|
||||||
func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.Summary, error) {
|
func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.Summary, error) {
|
||||||
// Récupérer le provider actif
|
|
||||||
providerCfg, err := p.repo.GetActiveAIProvider()
|
providerCfg, err := p.repo.GetActiveAIProvider()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("get active provider: %w", err)
|
return nil, fmt.Errorf("get active provider: %w", err)
|
||||||
@ -68,7 +66,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
|
|||||||
return nil, fmt.Errorf("build provider: %w", err)
|
return nil, fmt.Errorf("build provider: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Récupérer la watchlist de l'utilisateur (pour le contexte IA uniquement)
|
|
||||||
assets, err := p.repo.GetUserAssets(userID)
|
assets, err := p.repo.GetUserAssets(userID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("get user assets: %w", err)
|
return nil, fmt.Errorf("get user assets: %w", err)
|
||||||
@ -78,7 +75,6 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
|
|||||||
symbols[i] = a.Symbol
|
symbols[i] = a.Symbol
|
||||||
}
|
}
|
||||||
|
|
||||||
// Récupérer TOUS les articles récents, toutes sources confondues
|
|
||||||
hoursStr, _ := p.repo.GetSetting("articles_lookback_hours")
|
hoursStr, _ := p.repo.GetSetting("articles_lookback_hours")
|
||||||
hours, _ := strconv.Atoi(hoursStr)
|
hours, _ := strconv.Atoi(hoursStr)
|
||||||
if hours == 0 {
|
if hours == 0 {
|
||||||
@ -98,16 +94,21 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
|
|||||||
if maxArticles == 0 {
|
if maxArticles == 0 {
|
||||||
maxArticles = 50
|
maxArticles = 50
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Passe 1 : filtrage par pertinence sur les titres si trop d'articles
|
||||||
if len(articles) > maxArticles {
|
if len(articles) > maxArticles {
|
||||||
articles = articles[:maxArticles]
|
fmt.Printf("pipeline: %d articles → filtering to %d via AI\n", len(articles), maxArticles)
|
||||||
|
articles = p.filterByRelevance(ctx, provider, symbols, articles, maxArticles)
|
||||||
|
fmt.Printf("pipeline: %d articles retained after filtering\n", len(articles))
|
||||||
}
|
}
|
||||||
|
|
||||||
systemPrompt, _ := p.repo.GetSetting("ai_system_prompt")
|
systemPrompt, _ := p.repo.GetSetting("ai_system_prompt")
|
||||||
if systemPrompt == "" {
|
if systemPrompt == "" {
|
||||||
systemPrompt = DefaultSystemPrompt
|
systemPrompt = DefaultSystemPrompt
|
||||||
}
|
}
|
||||||
prompt := buildPrompt(systemPrompt, symbols, articles)
|
|
||||||
|
|
||||||
|
// Passe 2 : résumé complet
|
||||||
|
prompt := buildPrompt(systemPrompt, symbols, articles)
|
||||||
summary, err := provider.Summarize(ctx, prompt)
|
summary, err := provider.Summarize(ctx, prompt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("AI summarize: %w", err)
|
return nil, fmt.Errorf("AI summarize: %w", err)
|
||||||
@ -116,7 +117,77 @@ func (p *Pipeline) GenerateForUser(ctx context.Context, userID string) (*models.
|
|||||||
return p.repo.CreateSummary(userID, summary, &providerCfg.ID)
|
return p.repo.CreateSummary(userID, summary, &providerCfg.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GenerateForAll génère les résumés pour tous les utilisateurs ayant une watchlist
|
// filterByRelevance demande à l'IA de sélectionner les articles les plus pertinents
|
||||||
|
// en ne lui envoyant que les titres (prompt très court = rapide).
|
||||||
|
func (p *Pipeline) filterByRelevance(ctx context.Context, provider Provider, symbols []string, articles []models.Article, max int) []models.Article {
|
||||||
|
prompt := buildFilterPrompt(symbols, articles, max)
|
||||||
|
response, err := provider.Summarize(ctx, prompt)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("pipeline: filter AI call failed (%v), falling back to truncation\n", err)
|
||||||
|
return articles[:max]
|
||||||
|
}
|
||||||
|
|
||||||
|
indices := parseIndexArray(response, len(articles))
|
||||||
|
if len(indices) == 0 {
|
||||||
|
fmt.Printf("pipeline: could not parse filter response, falling back to truncation\n")
|
||||||
|
return articles[:max]
|
||||||
|
}
|
||||||
|
|
||||||
|
filtered := make([]models.Article, 0, len(indices))
|
||||||
|
for _, i := range indices {
|
||||||
|
filtered = append(filtered, articles[i])
|
||||||
|
if len(filtered) >= max {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildFilterPrompt(symbols []string, articles []models.Article, max int) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("Tu es un assistant de trading financier. ")
|
||||||
|
sb.WriteString(fmt.Sprintf("Parmi les %d articles ci-dessous, sélectionne les %d plus pertinents pour un trader actif.\n", len(articles), max))
|
||||||
|
|
||||||
|
if len(symbols) > 0 {
|
||||||
|
sb.WriteString("Actifs surveillés (priorité haute) : ")
|
||||||
|
sb.WriteString(strings.Join(symbols, ", "))
|
||||||
|
sb.WriteString("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString(fmt.Sprintf("\nRéponds UNIQUEMENT avec un tableau JSON des indices sélectionnés (base 0), exemple : [0, 3, 7, 12]\n"))
|
||||||
|
sb.WriteString("N'ajoute aucun texte avant ou après le tableau JSON.\n\n")
|
||||||
|
sb.WriteString("Articles :\n")
|
||||||
|
|
||||||
|
for i, a := range articles {
|
||||||
|
sb.WriteString(fmt.Sprintf("[%d] %s (%s)\n", i, a.Title, a.SourceName))
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
var jsonArrayRe = regexp.MustCompile(`\[[\d\s,]+\]`)
|
||||||
|
|
||||||
|
func parseIndexArray(response string, maxIndex int) []int {
|
||||||
|
match := jsonArrayRe.FindString(response)
|
||||||
|
if match == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
match = strings.Trim(match, "[]")
|
||||||
|
parts := strings.Split(match, ",")
|
||||||
|
|
||||||
|
seen := make(map[int]bool)
|
||||||
|
var indices []int
|
||||||
|
for _, p := range parts {
|
||||||
|
n, err := strconv.Atoi(strings.TrimSpace(p))
|
||||||
|
if err != nil || n < 0 || n >= maxIndex || seen[n] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[n] = true
|
||||||
|
indices = append(indices, n)
|
||||||
|
}
|
||||||
|
return indices
|
||||||
|
}
|
||||||
|
|
||||||
func (p *Pipeline) GenerateForAll(ctx context.Context) error {
|
func (p *Pipeline) GenerateForAll(ctx context.Context) error {
|
||||||
users, err := p.repo.ListUsers()
|
users, err := p.repo.ListUsers()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -287,6 +287,45 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
|
|||||||
httputil.OK(c, gin.H{"ok": true})
|
httputil.OK(c, gin.H{"ok": true})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Schedule ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (h *Handler) GetSchedule(c *gin.Context) {
|
||||||
|
slots, err := h.repo.ListScheduleSlots()
|
||||||
|
if err != nil {
|
||||||
|
httputil.InternalError(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
httputil.OK(c, slots)
|
||||||
|
}
|
||||||
|
|
||||||
|
type scheduleRequest struct {
|
||||||
|
Slots []struct {
|
||||||
|
DayOfWeek int `json:"day_of_week"`
|
||||||
|
Hour int `json:"hour"`
|
||||||
|
Minute int `json:"minute"`
|
||||||
|
} `json:"slots"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) UpdateSchedule(c *gin.Context) {
|
||||||
|
var req scheduleRequest
|
||||||
|
if err := c.ShouldBindJSON(&req); err != nil {
|
||||||
|
httputil.BadRequest(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
slots := make([]models.ScheduleSlot, len(req.Slots))
|
||||||
|
for i, s := range req.Slots {
|
||||||
|
slots[i] = models.ScheduleSlot{DayOfWeek: s.DayOfWeek, Hour: s.Hour, Minute: s.Minute}
|
||||||
|
}
|
||||||
|
if err := h.repo.ReplaceSchedule(slots); err != nil {
|
||||||
|
httputil.InternalError(c, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := h.scheduler.Reload(); err != nil {
|
||||||
|
fmt.Printf("schedule reload: %v\n", err)
|
||||||
|
}
|
||||||
|
httputil.OK(c, gin.H{"ok": true})
|
||||||
|
}
|
||||||
|
|
||||||
func (h *Handler) GetDefaultSystemPrompt(c *gin.Context) {
|
func (h *Handler) GetDefaultSystemPrompt(c *gin.Context) {
|
||||||
httputil.OK(c, gin.H{"prompt": ai.DefaultSystemPrompt})
|
httputil.OK(c, gin.H{"prompt": ai.DefaultSystemPrompt})
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import (
|
|||||||
"github.com/tradarr/backend/internal/config"
|
"github.com/tradarr/backend/internal/config"
|
||||||
"github.com/tradarr/backend/internal/crypto"
|
"github.com/tradarr/backend/internal/crypto"
|
||||||
"github.com/tradarr/backend/internal/models"
|
"github.com/tradarr/backend/internal/models"
|
||||||
|
"github.com/tradarr/backend/internal/scheduler"
|
||||||
"github.com/tradarr/backend/internal/scraper"
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -14,6 +15,7 @@ type Handler struct {
|
|||||||
enc *crypto.Encryptor
|
enc *crypto.Encryptor
|
||||||
registry *scraper.Registry
|
registry *scraper.Registry
|
||||||
pipeline *ai.Pipeline
|
pipeline *ai.Pipeline
|
||||||
|
scheduler *scheduler.Scheduler
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(
|
func New(
|
||||||
@ -22,6 +24,7 @@ func New(
|
|||||||
enc *crypto.Encryptor,
|
enc *crypto.Encryptor,
|
||||||
registry *scraper.Registry,
|
registry *scraper.Registry,
|
||||||
pipeline *ai.Pipeline,
|
pipeline *ai.Pipeline,
|
||||||
|
sched *scheduler.Scheduler,
|
||||||
) *Handler {
|
) *Handler {
|
||||||
return &Handler{
|
return &Handler{
|
||||||
repo: repo,
|
repo: repo,
|
||||||
@ -29,5 +32,6 @@ func New(
|
|||||||
enc: enc,
|
enc: enc,
|
||||||
registry: registry,
|
registry: registry,
|
||||||
pipeline: pipeline,
|
pipeline: pipeline,
|
||||||
|
scheduler: sched,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -65,6 +65,9 @@ func SetupRouter(h *handlers.Handler, jwtSecret string) *gin.Engine {
|
|||||||
admin.PUT("/settings", h.UpdateSettings)
|
admin.PUT("/settings", h.UpdateSettings)
|
||||||
admin.GET("/settings/default-prompt", h.GetDefaultSystemPrompt)
|
admin.GET("/settings/default-prompt", h.GetDefaultSystemPrompt)
|
||||||
|
|
||||||
|
admin.GET("/schedule", h.GetSchedule)
|
||||||
|
admin.PUT("/schedule", h.UpdateSchedule)
|
||||||
|
|
||||||
admin.GET("/users", h.ListUsers)
|
admin.GET("/users", h.ListUsers)
|
||||||
admin.PUT("/users/:id", h.UpdateAdminUser)
|
admin.PUT("/users/:id", h.UpdateAdminUser)
|
||||||
admin.DELETE("/users/:id", h.DeleteAdminUser)
|
admin.DELETE("/users/:id", h.DeleteAdminUser)
|
||||||
|
|||||||
@ -11,7 +11,7 @@ type Config struct {
|
|||||||
JWTSecret string
|
JWTSecret string
|
||||||
EncryptionKey []byte
|
EncryptionKey []byte
|
||||||
Port string
|
Port string
|
||||||
ChromePath string
|
ScraperURL string
|
||||||
AdminEmail string
|
AdminEmail string
|
||||||
AdminPassword string
|
AdminPassword string
|
||||||
}
|
}
|
||||||
@ -41,12 +41,17 @@ func Load() (*Config, error) {
|
|||||||
port = "8080"
|
port = "8080"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scraperURL := os.Getenv("SCRAPER_URL")
|
||||||
|
if scraperURL == "" {
|
||||||
|
scraperURL = "http://scraper:3001"
|
||||||
|
}
|
||||||
|
|
||||||
return &Config{
|
return &Config{
|
||||||
DatabaseURL: dbURL,
|
DatabaseURL: dbURL,
|
||||||
JWTSecret: jwtSecret,
|
JWTSecret: jwtSecret,
|
||||||
EncryptionKey: encKey,
|
EncryptionKey: encKey,
|
||||||
Port: port,
|
Port: port,
|
||||||
ChromePath: os.Getenv("CHROME_PATH"),
|
ScraperURL: scraperURL,
|
||||||
AdminEmail: os.Getenv("ADMIN_EMAIL"),
|
AdminEmail: os.Getenv("ADMIN_EMAIL"),
|
||||||
AdminPassword: os.Getenv("ADMIN_PASSWORD"),
|
AdminPassword: os.Getenv("ADMIN_PASSWORD"),
|
||||||
}, nil
|
}, nil
|
||||||
|
|||||||
@ -21,7 +21,7 @@ CREATE TABLE user_assets (
|
|||||||
CREATE TABLE sources (
|
CREATE TABLE sources (
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
name TEXT NOT NULL,
|
name TEXT NOT NULL,
|
||||||
type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits')),
|
type TEXT NOT NULL CHECK (type IN ('bloomberg', 'stocktwits', 'reuters', 'watcherguru')),
|
||||||
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
);
|
);
|
||||||
@ -97,7 +97,7 @@ CREATE INDEX idx_user_assets_user_id ON user_assets(user_id);
|
|||||||
-- Sources initiales
|
-- Sources initiales
|
||||||
INSERT INTO sources (name, type, enabled) VALUES
|
INSERT INTO sources (name, type, enabled) VALUES
|
||||||
('Bloomberg', 'bloomberg', TRUE),
|
('Bloomberg', 'bloomberg', TRUE),
|
||||||
('StockTwits', 'stocktwits', TRUE);
|
('Yahoo Finance', 'stocktwits', TRUE);
|
||||||
|
|
||||||
-- Paramètres par défaut
|
-- Paramètres par défaut
|
||||||
INSERT INTO settings (key, value) VALUES
|
INSERT INTO settings (key, value) VALUES
|
||||||
|
|||||||
@ -0,0 +1 @@
|
|||||||
|
DELETE FROM sources WHERE type IN ('reuters', 'watcherguru');
|
||||||
@ -0,0 +1,4 @@
|
|||||||
|
INSERT INTO sources (name, type, enabled) VALUES
|
||||||
|
('Reuters', 'reuters', true),
|
||||||
|
('Watcher.Guru', 'watcherguru', true)
|
||||||
|
ON CONFLICT DO NOTHING;
|
||||||
@ -0,0 +1 @@
|
|||||||
|
DROP TABLE IF EXISTS scrape_schedules;
|
||||||
17
backend/internal/database/migrations/000004_schedule.up.sql
Normal file
17
backend/internal/database/migrations/000004_schedule.up.sql
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
CREATE TABLE scrape_schedules (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
day_of_week SMALLINT NOT NULL CHECK (day_of_week BETWEEN 0 AND 6),
|
||||||
|
hour SMALLINT NOT NULL CHECK (hour BETWEEN 0 AND 23),
|
||||||
|
minute SMALLINT NOT NULL DEFAULT 0 CHECK (minute BETWEEN 0 AND 59),
|
||||||
|
UNIQUE (day_of_week, hour, minute)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Planning par défaut : lun-ven à 6h et 15h, week-end à 6h uniquement
|
||||||
|
INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES
|
||||||
|
(1, 6, 0), (1, 15, 0),
|
||||||
|
(2, 6, 0), (2, 15, 0),
|
||||||
|
(3, 6, 0), (3, 15, 0),
|
||||||
|
(4, 6, 0), (4, 15, 0),
|
||||||
|
(5, 6, 0), (5, 15, 0),
|
||||||
|
(6, 6, 0),
|
||||||
|
(0, 6, 0);
|
||||||
@ -97,3 +97,10 @@ type Setting struct {
|
|||||||
Key string `json:"key"`
|
Key string `json:"key"`
|
||||||
Value string `json:"value"`
|
Value string `json:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ScheduleSlot struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
DayOfWeek int `json:"day_of_week"` // 0=dimanche, 1=lundi ... 6=samedi
|
||||||
|
Hour int `json:"hour"`
|
||||||
|
Minute int `json:"minute"`
|
||||||
|
}
|
||||||
|
|||||||
@ -520,6 +520,51 @@ func (r *Repository) SetSetting(key, value string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Schedule ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
func (r *Repository) ListScheduleSlots() ([]ScheduleSlot, error) {
|
||||||
|
rows, err := r.db.Query(`
|
||||||
|
SELECT id, day_of_week, hour, minute FROM scrape_schedules
|
||||||
|
ORDER BY day_of_week, hour, minute`)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var slots []ScheduleSlot
|
||||||
|
for rows.Next() {
|
||||||
|
var s ScheduleSlot
|
||||||
|
if err := rows.Scan(&s.ID, &s.DayOfWeek, &s.Hour, &s.Minute); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
slots = append(slots, s)
|
||||||
|
}
|
||||||
|
return slots, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Repository) ReplaceSchedule(slots []ScheduleSlot) error {
|
||||||
|
tx, err := r.db.Begin()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer tx.Rollback()
|
||||||
|
|
||||||
|
if _, err := tx.Exec(`DELETE FROM scrape_schedules`); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
for _, s := range slots {
|
||||||
|
if _, err := tx.Exec(
|
||||||
|
`INSERT INTO scrape_schedules (day_of_week, hour, minute) VALUES ($1, $2, $3)
|
||||||
|
ON CONFLICT (day_of_week, hour, minute) DO NOTHING`,
|
||||||
|
s.DayOfWeek, s.Hour, s.Minute,
|
||||||
|
); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Settings ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
func (r *Repository) ListSettings() ([]Setting, error) {
|
func (r *Repository) ListSettings() ([]Setting, error) {
|
||||||
rows, err := r.db.Query(`SELECT key, value FROM settings ORDER BY key`)
|
rows, err := r.db.Query(`SELECT key, value FROM settings ORDER BY key`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@ -3,7 +3,6 @@ package scheduler
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/robfig/cron/v3"
|
"github.com/robfig/cron/v3"
|
||||||
"github.com/tradarr/backend/internal/ai"
|
"github.com/tradarr/backend/internal/ai"
|
||||||
@ -16,7 +15,7 @@ type Scheduler struct {
|
|||||||
registry *scraper.Registry
|
registry *scraper.Registry
|
||||||
pipeline *ai.Pipeline
|
pipeline *ai.Pipeline
|
||||||
repo *models.Repository
|
repo *models.Repository
|
||||||
entryID cron.EntryID
|
entryIDs []cron.EntryID
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler {
|
func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler {
|
||||||
@ -29,19 +28,10 @@ func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Reposit
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) Start() error {
|
func (s *Scheduler) Start() error {
|
||||||
interval, err := s.getInterval()
|
if err := s.loadSchedule(); err != nil {
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
spec := fmt.Sprintf("@every %dm", interval)
|
|
||||||
s.entryID, err = s.cron.AddFunc(spec, s.run)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("add cron: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
s.cron.Start()
|
s.cron.Start()
|
||||||
fmt.Printf("scheduler started, running every %d minutes\n", interval)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,39 +40,46 @@ func (s *Scheduler) Stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) Reload() error {
|
func (s *Scheduler) Reload() error {
|
||||||
s.cron.Remove(s.entryID)
|
for _, id := range s.entryIDs {
|
||||||
interval, err := s.getInterval()
|
s.cron.Remove(id)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
spec := fmt.Sprintf("@every %dm", interval)
|
s.entryIDs = nil
|
||||||
s.entryID, err = s.cron.AddFunc(spec, s.run)
|
return s.loadSchedule()
|
||||||
return err
|
}
|
||||||
|
|
||||||
|
func (s *Scheduler) loadSchedule() error {
|
||||||
|
slots, err := s.repo.ListScheduleSlots()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("load schedule: %w", err)
|
||||||
|
}
|
||||||
|
if len(slots) == 0 {
|
||||||
|
fmt.Println("scheduler: no schedule configured, scraping disabled")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, slot := range slots {
|
||||||
|
// Format cron: "minute hour * * day_of_week"
|
||||||
|
spec := fmt.Sprintf("%d %d * * %d", slot.Minute, slot.Hour, slot.DayOfWeek)
|
||||||
|
id, err := s.cron.AddFunc(spec, s.run)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("scheduler: invalid cron spec %q: %v\n", spec, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.entryIDs = append(s.entryIDs, id)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("scheduler: %d time slots loaded\n", len(s.entryIDs))
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) run() {
|
func (s *Scheduler) run() {
|
||||||
fmt.Println("scheduler: running scraping cycle")
|
fmt.Println("scheduler: starting scraping cycle")
|
||||||
if err := s.registry.RunAll(); err != nil {
|
if err := s.registry.RunAll(); err != nil {
|
||||||
fmt.Printf("scheduler scrape error: %v\n", err)
|
fmt.Printf("scheduler scrape error: %v\n", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
fmt.Println("scheduler: running AI summaries")
|
fmt.Println("scheduler: starting AI summaries")
|
||||||
if err := s.pipeline.GenerateForAll(context.Background()); err != nil {
|
if err := s.pipeline.GenerateForAll(context.Background()); err != nil {
|
||||||
fmt.Printf("scheduler summary error: %v\n", err)
|
fmt.Printf("scheduler summary error: %v\n", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) getInterval() (int, error) {
|
|
||||||
v, err := s.repo.GetSetting("scrape_interval_minutes")
|
|
||||||
if err != nil {
|
|
||||||
return 60, nil
|
|
||||||
}
|
|
||||||
if v == "" {
|
|
||||||
return 60, nil
|
|
||||||
}
|
|
||||||
n, err := strconv.Atoi(v)
|
|
||||||
if err != nil || n < 1 {
|
|
||||||
return 60, nil
|
|
||||||
}
|
|
||||||
return n, nil
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,206 +1,94 @@
|
|||||||
package bloomberg
|
package bloomberg
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/chromedp/chromedp"
|
|
||||||
"github.com/tradarr/backend/internal/scraper"
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Bloomberg struct {
|
type Bloomberg struct {
|
||||||
username string
|
scraperURL string
|
||||||
password string
|
client *http.Client
|
||||||
chromePath string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(username, password, chromePath string) *Bloomberg {
|
func New(scraperURL string) *Bloomberg {
|
||||||
return &Bloomberg{username: username, password: password, chromePath: chromePath}
|
if scraperURL == "" {
|
||||||
|
scraperURL = "http://scraper:3001"
|
||||||
|
}
|
||||||
|
return &Bloomberg{
|
||||||
|
scraperURL: scraperURL,
|
||||||
|
client: &http.Client{Timeout: 10 * time.Minute},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bloomberg) Name() string { return "bloomberg" }
|
func (b *Bloomberg) Name() string { return "bloomberg" }
|
||||||
|
|
||||||
func (b *Bloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
type scraperRequest struct {
|
||||||
if b.username == "" || b.password == "" {
|
Username string `json:"username"`
|
||||||
return nil, fmt.Errorf("bloomberg credentials not configured")
|
Password string `json:"password"`
|
||||||
}
|
|
||||||
|
|
||||||
opts := []chromedp.ExecAllocatorOption{
|
|
||||||
chromedp.NoFirstRun,
|
|
||||||
chromedp.NoDefaultBrowserCheck,
|
|
||||||
chromedp.Headless,
|
|
||||||
chromedp.DisableGPU,
|
|
||||||
chromedp.Flag("no-sandbox", true),
|
|
||||||
chromedp.Flag("disable-setuid-sandbox", true),
|
|
||||||
chromedp.Flag("disable-dev-shm-usage", true),
|
|
||||||
chromedp.Flag("disable-blink-features", "AutomationControlled"),
|
|
||||||
chromedp.Flag("disable-infobars", true),
|
|
||||||
chromedp.Flag("window-size", "1920,1080"),
|
|
||||||
chromedp.Flag("ignore-certificate-errors", true),
|
|
||||||
chromedp.UserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"),
|
|
||||||
}
|
|
||||||
if b.chromePath != "" {
|
|
||||||
opts = append(opts, chromedp.ExecPath(b.chromePath))
|
|
||||||
}
|
|
||||||
|
|
||||||
allocCtx, cancelAlloc := chromedp.NewExecAllocator(ctx, opts...)
|
|
||||||
defer cancelAlloc()
|
|
||||||
|
|
||||||
chromeCtx, cancelChrome := chromedp.NewContext(allocCtx)
|
|
||||||
defer cancelChrome()
|
|
||||||
|
|
||||||
timeoutCtx, cancelTimeout := context.WithTimeout(chromeCtx, 5*time.Minute)
|
|
||||||
defer cancelTimeout()
|
|
||||||
|
|
||||||
if err := b.login(timeoutCtx); err != nil {
|
|
||||||
return nil, fmt.Errorf("bloomberg login: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var articles []scraper.Article
|
|
||||||
pages := []string{
|
|
||||||
"https://www.bloomberg.com/markets",
|
|
||||||
"https://www.bloomberg.com/technology",
|
|
||||||
"https://www.bloomberg.com/economics",
|
|
||||||
}
|
|
||||||
for _, u := range pages {
|
|
||||||
pageArticles, err := b.scrapePage(timeoutCtx, u, symbols)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Printf("bloomberg scrape %s: %v\n", u, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
articles = append(articles, pageArticles...)
|
|
||||||
}
|
|
||||||
fmt.Printf("bloomberg: %d articles fetched total\n", len(articles))
|
|
||||||
return articles, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bloomberg) login(ctx context.Context) error {
|
type scraperArticle struct {
|
||||||
loginCtx, cancel := context.WithTimeout(ctx, 2*time.Minute)
|
Title string `json:"title"`
|
||||||
defer cancel()
|
URL string `json:"url"`
|
||||||
|
|
||||||
// Masquer la détection d'automation via JS
|
|
||||||
if err := chromedp.Run(loginCtx,
|
|
||||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
|
||||||
return chromedp.Evaluate(`
|
|
||||||
Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
|
|
||||||
window.chrome = { runtime: {} };
|
|
||||||
`, nil).Do(ctx)
|
|
||||||
}),
|
|
||||||
); err != nil {
|
|
||||||
fmt.Printf("bloomberg: could not inject stealth JS: %v\n", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err := chromedp.Run(loginCtx,
|
|
||||||
chromedp.Navigate("https://www.bloomberg.com/account/signin"),
|
|
||||||
chromedp.Sleep(2*time.Second),
|
|
||||||
// Essayer plusieurs sélecteurs pour l'email
|
|
||||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
|
||||||
selectors := []string{
|
|
||||||
`input[name="email"]`,
|
|
||||||
`input[type="email"]`,
|
|
||||||
`input[data-type="email"]`,
|
|
||||||
`input[placeholder*="email" i]`,
|
|
||||||
`input[placeholder*="mail" i]`,
|
|
||||||
}
|
|
||||||
for _, sel := range selectors {
|
|
||||||
var count int
|
|
||||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
|
||||||
fmt.Printf("bloomberg: using email selector: %s\n", sel)
|
|
||||||
return chromedp.SendKeys(sel, b.username, chromedp.ByQuery).Do(ctx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("could not find email input — Bloomberg login page structure may have changed")
|
|
||||||
}),
|
|
||||||
chromedp.Sleep(500*time.Millisecond),
|
|
||||||
// Submit email
|
|
||||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
|
||||||
selectors := []string{`button[type="submit"]`, `input[type="submit"]`, `button[data-testid*="submit"]`}
|
|
||||||
for _, sel := range selectors {
|
|
||||||
var count int
|
|
||||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
|
||||||
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Fallback: press Enter
|
|
||||||
return chromedp.KeyEvent("\r").Do(ctx)
|
|
||||||
}),
|
|
||||||
chromedp.Sleep(2*time.Second),
|
|
||||||
// Password
|
|
||||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
|
||||||
selectors := []string{`input[type="password"]`, `input[name="password"]`}
|
|
||||||
for _, sel := range selectors {
|
|
||||||
var count int
|
|
||||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
|
||||||
fmt.Printf("bloomberg: using password selector: %s\n", sel)
|
|
||||||
return chromedp.SendKeys(sel, b.password, chromedp.ByQuery).Do(ctx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("could not find password input")
|
|
||||||
}),
|
|
||||||
chromedp.Sleep(500*time.Millisecond),
|
|
||||||
chromedp.ActionFunc(func(ctx context.Context) error {
|
|
||||||
selectors := []string{`button[type="submit"]`, `input[type="submit"]`}
|
|
||||||
for _, sel := range selectors {
|
|
||||||
var count int
|
|
||||||
if err := chromedp.Evaluate(fmt.Sprintf(`document.querySelectorAll('%s').length`, sel), &count).Do(ctx); err == nil && count > 0 {
|
|
||||||
return chromedp.Click(sel, chromedp.ByQuery).Do(ctx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return chromedp.KeyEvent("\r").Do(ctx)
|
|
||||||
}),
|
|
||||||
chromedp.Sleep(3*time.Second),
|
|
||||||
)
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bloomberg) scrapePage(ctx context.Context, pageURL string, symbols []string) ([]scraper.Article, error) {
|
type scraperResponse struct {
|
||||||
pageCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
Articles []scraperArticle `json:"articles"`
|
||||||
defer cancel()
|
Error string `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
var articleNodes []map[string]string
|
func (b *Bloomberg) ScrapeWithCredentials(ctx context.Context, username, password string, symbols []string) ([]scraper.Article, error) {
|
||||||
err := chromedp.Run(pageCtx,
|
payload, _ := json.Marshal(scraperRequest{Username: username, Password: password})
|
||||||
chromedp.Navigate(pageURL),
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, b.scraperURL+"/bloomberg/scrape", bytes.NewReader(payload))
|
||||||
chromedp.Sleep(3*time.Second),
|
|
||||||
chromedp.Evaluate(`
|
|
||||||
(function() {
|
|
||||||
var items = [];
|
|
||||||
var seen = new Set();
|
|
||||||
var links = document.querySelectorAll('a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]');
|
|
||||||
links.forEach(function(a) {
|
|
||||||
if (seen.has(a.href)) return;
|
|
||||||
seen.add(a.href);
|
|
||||||
var title = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]');
|
|
||||||
var text = title ? title.innerText.trim() : a.innerText.trim();
|
|
||||||
if (text.length > 20 && a.href.includes('bloomberg.com')) {
|
|
||||||
items.push({title: text, url: a.href});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return items.slice(0, 25);
|
|
||||||
})()
|
|
||||||
`, &articleNodes),
|
|
||||||
)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("navigate %s: %w", pageURL, err)
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
resp, err := b.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("scraper service unreachable: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(resp.Body)
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("scraper service HTTP %d: %s", resp.StatusCode, body)
|
||||||
|
}
|
||||||
|
|
||||||
|
var result scraperResponse
|
||||||
|
if err := json.Unmarshal(body, &result); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse scraper response: %w", err)
|
||||||
|
}
|
||||||
|
if result.Error != "" {
|
||||||
|
return nil, fmt.Errorf("bloomberg: %s", result.Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
var articles []scraper.Article
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
for _, node := range articleNodes {
|
var articles []scraper.Article
|
||||||
title := strings.TrimSpace(node["title"])
|
for _, a := range result.Articles {
|
||||||
url := node["url"]
|
title := strings.TrimSpace(a.Title)
|
||||||
if title == "" || url == "" || !strings.Contains(url, "bloomberg.com") {
|
url := a.URL
|
||||||
|
if title == "" || url == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
syms := scraper.DetectSymbols(title, symbols)
|
syms := scraper.DetectSymbols(title, symbols)
|
||||||
articles = append(articles, scraper.Article{
|
articles = append(articles, scraper.Article{
|
||||||
Title: title,
|
Title: title,
|
||||||
Content: title, // contenu minimal — l'article complet nécessite un accès payant
|
Content: title,
|
||||||
URL: url,
|
URL: url,
|
||||||
PublishedAt: &now,
|
PublishedAt: &now,
|
||||||
Symbols: syms,
|
Symbols: syms,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
fmt.Printf("bloomberg: %d articles fetched\n", len(articles))
|
||||||
return articles, nil
|
return articles, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,21 +9,19 @@ import (
|
|||||||
"github.com/tradarr/backend/internal/scraper"
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DynamicBloomberg charge les credentials depuis la DB avant chaque scraping
|
|
||||||
type DynamicBloomberg struct {
|
type DynamicBloomberg struct {
|
||||||
repo *models.Repository
|
repo *models.Repository
|
||||||
enc *crypto.Encryptor
|
enc *crypto.Encryptor
|
||||||
chromePath string
|
scraperURL string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, chromePath string) *DynamicBloomberg {
|
func NewDynamic(repo *models.Repository, enc *crypto.Encryptor, scraperURL string) *DynamicBloomberg {
|
||||||
return &DynamicBloomberg{repo: repo, enc: enc, chromePath: chromePath}
|
return &DynamicBloomberg{repo: repo, enc: enc, scraperURL: scraperURL}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *DynamicBloomberg) Name() string { return "bloomberg" }
|
func (d *DynamicBloomberg) Name() string { return "bloomberg" }
|
||||||
|
|
||||||
func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scraper.Article, error) {
|
||||||
// Récupérer la source Bloomberg
|
|
||||||
source, err := d.repo.GetSourceByType("bloomberg")
|
source, err := d.repo.GetSourceByType("bloomberg")
|
||||||
if err != nil || source == nil {
|
if err != nil || source == nil {
|
||||||
return nil, fmt.Errorf("bloomberg source not found")
|
return nil, fmt.Errorf("bloomberg source not found")
|
||||||
@ -34,7 +32,7 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra
|
|||||||
return nil, fmt.Errorf("get bloomberg credentials: %w", err)
|
return nil, fmt.Errorf("get bloomberg credentials: %w", err)
|
||||||
}
|
}
|
||||||
if cred == nil || cred.Username == "" {
|
if cred == nil || cred.Username == "" {
|
||||||
return nil, fmt.Errorf("bloomberg credentials not configured — please set them in the admin panel")
|
return nil, fmt.Errorf("bloomberg credentials not configured — configure them in the admin panel")
|
||||||
}
|
}
|
||||||
|
|
||||||
password := ""
|
password := ""
|
||||||
@ -45,6 +43,6 @@ func (d *DynamicBloomberg) Scrape(ctx context.Context, symbols []string) ([]scra
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
b := New(cred.Username, password, d.chromePath)
|
b := New(d.scraperURL)
|
||||||
return b.Scrape(ctx, symbols)
|
return b.ScrapeWithCredentials(ctx, cred.Username, password, symbols)
|
||||||
}
|
}
|
||||||
|
|||||||
129
backend/internal/scraper/reuters/reuters.go
Normal file
129
backend/internal/scraper/reuters/reuters.go
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
package reuters
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Reuters RSS est bloqué par Cloudflare. On utilise des flux RSS financiers
|
||||||
|
// publics fiables à la place : MarketWatch, CNBC, Seeking Alpha.
|
||||||
|
var feeds = []struct {
|
||||||
|
name string
|
||||||
|
url string
|
||||||
|
}{
|
||||||
|
{"MarketWatch Top Stories", "https://feeds.content.dowjones.io/public/rss/mw_topstories"},
|
||||||
|
{"MarketWatch Markets", "https://feeds.content.dowjones.io/public/rss/mw_marketpulse"},
|
||||||
|
{"CNBC Top News", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews"},
|
||||||
|
{"CNBC Finance", "https://search.cnbc.com/rs/search/combinedcombined/rss/topNews?tag=Finance"},
|
||||||
|
}
|
||||||
|
|
||||||
|
type Reuters struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func New() *Reuters {
|
||||||
|
return &Reuters{client: &http.Client{Timeout: 15 * time.Second}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reuters) Name() string { return "reuters" }
|
||||||
|
|
||||||
|
type rssFeed struct {
|
||||||
|
Channel struct {
|
||||||
|
Items []struct {
|
||||||
|
Title string `xml:"title"`
|
||||||
|
Link string `xml:"link"`
|
||||||
|
Description string `xml:"description"`
|
||||||
|
PubDate string `xml:"pubDate"`
|
||||||
|
} `xml:"item"`
|
||||||
|
} `xml:"channel"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reuters) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) {
|
||||||
|
var articles []scraper.Article
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
|
||||||
|
for i, feed := range feeds {
|
||||||
|
if i > 0 {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return articles, ctx.Err()
|
||||||
|
case <-time.After(300 * time.Millisecond):
|
||||||
|
}
|
||||||
|
}
|
||||||
|
items, err := r.fetchFeed(ctx, feed.url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("reuters/financial %s: %v\n", feed.name, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, a := range items {
|
||||||
|
if !seen[a.URL] {
|
||||||
|
seen[a.URL] = true
|
||||||
|
articles = append(articles, a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fmt.Printf("reuters/financial %s: %d articles\n", feed.name, len(items))
|
||||||
|
}
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *Reuters) fetchFeed(ctx context.Context, feedURL string) ([]scraper.Article, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
|
||||||
|
req.Header.Set("Accept", "application/rss+xml, application/xml, text/xml")
|
||||||
|
|
||||||
|
resp, err := r.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 256))
|
||||||
|
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
|
||||||
|
}
|
||||||
|
|
||||||
|
var feed rssFeed
|
||||||
|
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse RSS: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var articles []scraper.Article
|
||||||
|
for _, item := range feed.Channel.Items {
|
||||||
|
title := strings.TrimSpace(item.Title)
|
||||||
|
link := strings.TrimSpace(item.Link)
|
||||||
|
if title == "" || link == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var publishedAt *time.Time
|
||||||
|
for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} {
|
||||||
|
if t, err := time.Parse(f, item.PubDate); err == nil {
|
||||||
|
publishedAt = &t
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
content := strings.TrimSpace(item.Description)
|
||||||
|
if content == "" {
|
||||||
|
content = title
|
||||||
|
}
|
||||||
|
|
||||||
|
articles = append(articles, scraper.Article{
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: link,
|
||||||
|
PublishedAt: publishedAt,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
200
backend/internal/scraper/watcherguru/watcherguru.go
Normal file
200
backend/internal/scraper/watcherguru/watcherguru.go
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
package watcherguru
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
|
||||||
|
"github.com/tradarr/backend/internal/scraper"
|
||||||
|
)
|
||||||
|
|
||||||
|
const baseURL = "https://watcher.guru"
|
||||||
|
|
||||||
|
type WatcherGuru struct {
|
||||||
|
client *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func New() *WatcherGuru {
|
||||||
|
return &WatcherGuru{client: &http.Client{Timeout: 15 * time.Second}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WatcherGuru) Name() string { return "watcherguru" }
|
||||||
|
|
||||||
|
type rssFeed struct {
|
||||||
|
Channel struct {
|
||||||
|
Items []struct {
|
||||||
|
Title string `xml:"title"`
|
||||||
|
Link string `xml:"link"`
|
||||||
|
PubDate string `xml:"pubDate"`
|
||||||
|
Desc string `xml:"description"`
|
||||||
|
} `xml:"item"`
|
||||||
|
} `xml:"channel"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WatcherGuru) Scrape(ctx context.Context, _ []string) ([]scraper.Article, error) {
|
||||||
|
// Try RSS feeds first
|
||||||
|
for _, feedURL := range []string{
|
||||||
|
baseURL + "/feed/",
|
||||||
|
baseURL + "/news/feed/",
|
||||||
|
} {
|
||||||
|
articles, err := w.fetchRSS(ctx, feedURL)
|
||||||
|
if err == nil && len(articles) > 0 {
|
||||||
|
fmt.Printf("watcherguru rss: %d articles\n", len(articles))
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: HTML scraping
|
||||||
|
articles, err := w.scrapeHTML(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("watcherguru: %w", err)
|
||||||
|
}
|
||||||
|
fmt.Printf("watcherguru html: %d articles\n", len(articles))
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WatcherGuru) fetchRSS(ctx context.Context, feedURL string) ([]scraper.Article, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, feedURL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Tradarr/1.0)")
|
||||||
|
|
||||||
|
resp, err := w.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
var feed rssFeed
|
||||||
|
if err := xml.NewDecoder(resp.Body).Decode(&feed); err != nil {
|
||||||
|
return nil, fmt.Errorf("parse RSS: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var articles []scraper.Article
|
||||||
|
for _, item := range feed.Channel.Items {
|
||||||
|
title := strings.TrimSpace(item.Title)
|
||||||
|
link := strings.TrimSpace(item.Link)
|
||||||
|
if title == "" || link == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var publishedAt *time.Time
|
||||||
|
for _, f := range []string{time.RFC1123Z, time.RFC1123, "Mon, 02 Jan 2006 15:04:05 -0700"} {
|
||||||
|
if t, err := time.Parse(f, item.PubDate); err == nil {
|
||||||
|
publishedAt = &t
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
content := strings.TrimSpace(item.Desc)
|
||||||
|
if content == "" {
|
||||||
|
content = title
|
||||||
|
}
|
||||||
|
articles = append(articles, scraper.Article{
|
||||||
|
Title: title,
|
||||||
|
Content: content,
|
||||||
|
URL: link,
|
||||||
|
PublishedAt: publishedAt,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (w *WatcherGuru) scrapeHTML(ctx context.Context) ([]scraper.Article, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL+"/news/", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/122.0.0.0 Safari/537.36")
|
||||||
|
req.Header.Set("Accept", "text/html,application/xhtml+xml")
|
||||||
|
|
||||||
|
resp, err := w.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
|
||||||
|
return nil, fmt.Errorf("HTTP %d: %s", resp.StatusCode, body)
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, err := html.Parse(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("parse HTML: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var articles []scraper.Article
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
var walk func(*html.Node)
|
||||||
|
walk = func(n *html.Node) {
|
||||||
|
if n.Type == html.ElementNode && (n.Data == "a" || n.Data == "article") {
|
||||||
|
if n.Data == "a" {
|
||||||
|
href := attrVal(n, "href")
|
||||||
|
if href == "" || seen[href] {
|
||||||
|
walk(n.FirstChild)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Collect links that look like news articles
|
||||||
|
if strings.Contains(href, "/news/") || strings.Contains(href, "watcher.guru") {
|
||||||
|
text := strings.TrimSpace(nodeText(n))
|
||||||
|
if len(text) > 20 {
|
||||||
|
url := href
|
||||||
|
if !strings.HasPrefix(url, "http") {
|
||||||
|
url = baseURL + url
|
||||||
|
}
|
||||||
|
if !seen[url] {
|
||||||
|
seen[url] = true
|
||||||
|
articles = append(articles, scraper.Article{
|
||||||
|
Title: text,
|
||||||
|
Content: text,
|
||||||
|
URL: url,
|
||||||
|
PublishedAt: &now,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
walk(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
walk(doc)
|
||||||
|
|
||||||
|
if len(articles) > 40 {
|
||||||
|
articles = articles[:40]
|
||||||
|
}
|
||||||
|
return articles, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func attrVal(n *html.Node, key string) string {
|
||||||
|
for _, a := range n.Attr {
|
||||||
|
if a.Key == key {
|
||||||
|
return a.Val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func nodeText(n *html.Node) string {
|
||||||
|
if n.Type == html.TextNode {
|
||||||
|
return n.Data
|
||||||
|
}
|
||||||
|
var sb strings.Builder
|
||||||
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
sb.WriteString(nodeText(c))
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
@ -86,8 +86,13 @@ func (y *YahooFinance) fetchSymbol(ctx context.Context, symbol string) ([]scrape
|
|||||||
return nil, fmt.Errorf("parse RSS: %w", err)
|
return nil, fmt.Errorf("parse RSS: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const maxPerSymbol = 5
|
||||||
|
|
||||||
var articles []scraper.Article
|
var articles []scraper.Article
|
||||||
for _, item := range feed.Channel.Items {
|
for _, item := range feed.Channel.Items {
|
||||||
|
if len(articles) >= maxPerSymbol {
|
||||||
|
break
|
||||||
|
}
|
||||||
title := strings.TrimSpace(item.Title)
|
title := strings.TrimSpace(item.Title)
|
||||||
link := strings.TrimSpace(item.Link)
|
link := strings.TrimSpace(item.Link)
|
||||||
if title == "" || link == "" {
|
if title == "" || link == "" {
|
||||||
|
|||||||
@ -14,6 +14,14 @@ services:
|
|||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
|
||||||
|
scraper:
|
||||||
|
build:
|
||||||
|
context: ./scraper-service
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
expose:
|
||||||
|
- "3001"
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
build:
|
build:
|
||||||
context: ./backend
|
context: ./backend
|
||||||
@ -22,11 +30,14 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
scraper:
|
||||||
|
condition: service_started
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: "host=postgres port=5432 user=${POSTGRES_USER:-tradarr} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB:-tradarr} sslmode=disable"
|
DATABASE_URL: "host=postgres port=5432 user=${POSTGRES_USER:-tradarr} password=${POSTGRES_PASSWORD} dbname=${POSTGRES_DB:-tradarr} sslmode=disable"
|
||||||
JWT_SECRET: ${JWT_SECRET:?JWT_SECRET is required}
|
JWT_SECRET: ${JWT_SECRET:?JWT_SECRET is required}
|
||||||
ENCRYPTION_KEY: ${ENCRYPTION_KEY:?ENCRYPTION_KEY must be 32 bytes hex}
|
ENCRYPTION_KEY: ${ENCRYPTION_KEY:?ENCRYPTION_KEY must be 32 bytes hex}
|
||||||
PORT: "8080"
|
PORT: "8080"
|
||||||
|
SCRAPER_URL: "http://scraper:3001"
|
||||||
ADMIN_EMAIL: ${ADMIN_EMAIL:-admin@tradarr.local}
|
ADMIN_EMAIL: ${ADMIN_EMAIL:-admin@tradarr.local}
|
||||||
ADMIN_PASSWORD: ${ADMIN_PASSWORD:-changeme}
|
ADMIN_PASSWORD: ${ADMIN_PASSWORD:-changeme}
|
||||||
expose:
|
expose:
|
||||||
|
|||||||
@ -12,6 +12,7 @@ export interface ScrapeJob {
|
|||||||
articles_found: number; error_msg: string; created_at: string
|
articles_found: number; error_msg: string; created_at: string
|
||||||
}
|
}
|
||||||
export interface Setting { key: string; value: string }
|
export interface Setting { key: string; value: string }
|
||||||
|
export interface ScheduleSlot { id?: string; day_of_week: number; hour: number; minute: number }
|
||||||
export interface AdminUser { id: string; email: string; role: string; created_at: string }
|
export interface AdminUser { id: string; email: string; role: string; created_at: string }
|
||||||
export interface Credential { source_id: string; source_name: string; username: string; has_password: boolean }
|
export interface Credential { source_id: string; source_name: string; username: string; has_password: boolean }
|
||||||
|
|
||||||
@ -44,6 +45,10 @@ export const adminApi = {
|
|||||||
updateSettings: (settings: Setting[]) => api.put<void>('/admin/settings', { settings }),
|
updateSettings: (settings: Setting[]) => api.put<void>('/admin/settings', { settings }),
|
||||||
getDefaultPrompt: () => api.get<{ prompt: string }>('/admin/settings/default-prompt'),
|
getDefaultPrompt: () => api.get<{ prompt: string }>('/admin/settings/default-prompt'),
|
||||||
|
|
||||||
|
// Schedule
|
||||||
|
getSchedule: () => api.get<ScheduleSlot[]>('/admin/schedule'),
|
||||||
|
updateSchedule: (slots: ScheduleSlot[]) => api.put<void>('/admin/schedule', { slots }),
|
||||||
|
|
||||||
// Users
|
// Users
|
||||||
listUsers: () => api.get<AdminUser[]>('/admin/users'),
|
listUsers: () => api.get<AdminUser[]>('/admin/users'),
|
||||||
updateUser: (id: string, email: string, role: string) =>
|
updateUser: (id: string, email: string, role: string) =>
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
import { NavLink } from 'react-router-dom'
|
import { NavLink } from 'react-router-dom'
|
||||||
import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp } from 'lucide-react'
|
import { LayoutDashboard, Newspaper, Star, Settings, Key, Cpu, Database, ClipboardList, Users, LogOut, TrendingUp, CalendarDays } from 'lucide-react'
|
||||||
import { useAuth } from '@/lib/auth'
|
import { useAuth } from '@/lib/auth'
|
||||||
import { cn } from '@/lib/cn'
|
import { cn } from '@/lib/cn'
|
||||||
|
|
||||||
@ -15,6 +15,7 @@ const adminItems = [
|
|||||||
{ to: '/admin/sources', icon: Database, label: 'Sources' },
|
{ to: '/admin/sources', icon: Database, label: 'Sources' },
|
||||||
{ to: '/admin/jobs', icon: ClipboardList, label: 'Jobs' },
|
{ to: '/admin/jobs', icon: ClipboardList, label: 'Jobs' },
|
||||||
{ to: '/admin/users', icon: Users, label: 'Utilisateurs' },
|
{ to: '/admin/users', icon: Users, label: 'Utilisateurs' },
|
||||||
|
{ to: '/admin/schedule', icon: CalendarDays, label: 'Planning' },
|
||||||
{ to: '/admin/settings', icon: Settings, label: 'Paramètres' },
|
{ to: '/admin/settings', icon: Settings, label: 'Paramètres' },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,7 @@ import { Sources } from '@/pages/admin/Sources'
|
|||||||
import { Jobs } from '@/pages/admin/Jobs'
|
import { Jobs } from '@/pages/admin/Jobs'
|
||||||
import { AdminUsers } from '@/pages/admin/AdminUsers'
|
import { AdminUsers } from '@/pages/admin/AdminUsers'
|
||||||
import { AdminSettings } from '@/pages/admin/AdminSettings'
|
import { AdminSettings } from '@/pages/admin/AdminSettings'
|
||||||
|
import { Schedule } from '@/pages/admin/Schedule'
|
||||||
|
|
||||||
export const router = createBrowserRouter([
|
export const router = createBrowserRouter([
|
||||||
{ path: '/login', element: <Login /> },
|
{ path: '/login', element: <Login /> },
|
||||||
@ -31,6 +32,7 @@ export const router = createBrowserRouter([
|
|||||||
{ path: 'jobs', element: <Jobs /> },
|
{ path: 'jobs', element: <Jobs /> },
|
||||||
{ path: 'users', element: <AdminUsers /> },
|
{ path: 'users', element: <AdminUsers /> },
|
||||||
{ path: 'settings', element: <AdminSettings /> },
|
{ path: 'settings', element: <AdminSettings /> },
|
||||||
|
{ path: 'schedule', element: <Schedule /> },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|||||||
145
frontend/src/pages/admin/Schedule.tsx
Normal file
145
frontend/src/pages/admin/Schedule.tsx
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
import { useState, useEffect } from 'react'
|
||||||
|
import { Plus, Trash2, Save } from 'lucide-react'
|
||||||
|
import { adminApi, type ScheduleSlot } from '@/api/admin'
|
||||||
|
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
||||||
|
import { Button } from '@/components/ui/button'
|
||||||
|
import { Spinner } from '@/components/ui/spinner'
|
||||||
|
|
||||||
|
const DAYS = [
|
||||||
|
{ label: 'Lundi', short: 'LUN', value: 1 },
|
||||||
|
{ label: 'Mardi', short: 'MAR', value: 2 },
|
||||||
|
{ label: 'Mercredi', short: 'MER', value: 3 },
|
||||||
|
{ label: 'Jeudi', short: 'JEU', value: 4 },
|
||||||
|
{ label: 'Vendredi', short: 'VEN', value: 5 },
|
||||||
|
{ label: 'Samedi', short: 'SAM', value: 6 },
|
||||||
|
{ label: 'Dimanche', short: 'DIM', value: 0 },
|
||||||
|
]
|
||||||
|
|
||||||
|
type SlotKey = `${number}-${number}-${number}`
|
||||||
|
|
||||||
|
function toKey(s: ScheduleSlot): SlotKey {
|
||||||
|
return `${s.day_of_week}-${s.hour}-${s.minute}`
|
||||||
|
}
|
||||||
|
|
||||||
|
function fmt(h: number, m: number) {
|
||||||
|
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}`
|
||||||
|
}
|
||||||
|
|
||||||
|
export function Schedule() {
|
||||||
|
const [slots, setSlots] = useState<ScheduleSlot[]>([])
|
||||||
|
const [loading, setLoading] = useState(true)
|
||||||
|
const [saving, setSaving] = useState(false)
|
||||||
|
const [saved, setSaved] = useState(false)
|
||||||
|
const [newTimes, setNewTimes] = useState<Record<number, string>>({})
|
||||||
|
|
||||||
|
useEffect(() => { load() }, [])
|
||||||
|
|
||||||
|
async function load() {
|
||||||
|
setLoading(true)
|
||||||
|
try { setSlots((await adminApi.getSchedule()) ?? []) } finally { setLoading(false) }
|
||||||
|
}
|
||||||
|
|
||||||
|
function slotsForDay(day: number) {
|
||||||
|
return slots
|
||||||
|
.filter(s => s.day_of_week === day)
|
||||||
|
.sort((a, b) => a.hour !== b.hour ? a.hour - b.hour : a.minute - b.minute)
|
||||||
|
}
|
||||||
|
|
||||||
|
function addSlot(day: number) {
|
||||||
|
const time = newTimes[day] || '06:00'
|
||||||
|
const [h, m] = time.split(':').map(Number)
|
||||||
|
const newSlot: ScheduleSlot = { day_of_week: day, hour: h, minute: m }
|
||||||
|
if (slots.some(s => toKey(s) === toKey(newSlot))) return
|
||||||
|
setSlots(prev => [...prev, newSlot])
|
||||||
|
setNewTimes(p => ({ ...p, [day]: '06:00' }))
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeSlot(slot: ScheduleSlot) {
|
||||||
|
setSlots(prev => prev.filter(s => toKey(s) !== toKey(slot)))
|
||||||
|
}
|
||||||
|
|
||||||
|
async function save() {
|
||||||
|
setSaving(true); setSaved(false)
|
||||||
|
await adminApi.updateSchedule(slots)
|
||||||
|
setSaving(false); setSaved(true)
|
||||||
|
setTimeout(() => setSaved(false), 2000)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loading) return <div className="flex justify-center py-20"><Spinner /></div>
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="space-y-6">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<h1 className="text-2xl font-bold">Planning hebdomadaire</h1>
|
||||||
|
<p className="text-muted-foreground text-sm">
|
||||||
|
Définissez les créneaux de scraping + résumé IA pour chaque jour
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Button onClick={save} disabled={saving}>
|
||||||
|
{saving ? <Spinner className="h-4 w-4" /> : <Save className="h-4 w-4" />}
|
||||||
|
{saved ? 'Enregistré !' : 'Enregistrer'}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 xl:grid-cols-7 gap-3">
|
||||||
|
{DAYS.map(day => {
|
||||||
|
const daySlots = slotsForDay(day.value)
|
||||||
|
const isWeekend = day.value === 0 || day.value === 6
|
||||||
|
return (
|
||||||
|
<Card key={day.value} className={isWeekend ? 'border-muted' : ''}>
|
||||||
|
<CardHeader className="pb-2 pt-4 px-4">
|
||||||
|
<CardTitle className="text-sm font-semibold">
|
||||||
|
<span className="hidden xl:block">{day.label}</span>
|
||||||
|
<span className="xl:hidden">{day.short}</span>
|
||||||
|
</CardTitle>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="px-4 pb-4 space-y-2">
|
||||||
|
{/* Créneaux existants */}
|
||||||
|
{daySlots.length === 0 && (
|
||||||
|
<p className="text-xs text-muted-foreground italic">Aucun créneau</p>
|
||||||
|
)}
|
||||||
|
{daySlots.map(slot => (
|
||||||
|
<div
|
||||||
|
key={toKey(slot)}
|
||||||
|
className="flex items-center justify-between rounded bg-primary/10 px-2 py-1"
|
||||||
|
>
|
||||||
|
<span className="text-sm font-mono font-medium">
|
||||||
|
{fmt(slot.hour, slot.minute)}
|
||||||
|
</span>
|
||||||
|
<button
|
||||||
|
onClick={() => removeSlot(slot)}
|
||||||
|
className="text-muted-foreground hover:text-destructive transition-colors ml-2"
|
||||||
|
>
|
||||||
|
<Trash2 className="h-3 w-3" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
|
||||||
|
{/* Ajout d'un créneau */}
|
||||||
|
<div className="flex items-center gap-1 pt-1">
|
||||||
|
<input
|
||||||
|
type="time"
|
||||||
|
value={newTimes[day.value] ?? '06:00'}
|
||||||
|
onChange={e => setNewTimes(p => ({ ...p, [day.value]: e.target.value }))}
|
||||||
|
className="flex-1 min-w-0 rounded border border-input bg-background px-2 py-1 text-xs font-mono focus:outline-none focus:ring-1 focus:ring-ring"
|
||||||
|
/>
|
||||||
|
<button
|
||||||
|
onClick={() => addSlot(day.value)}
|
||||||
|
className="rounded bg-primary/10 p-1 hover:bg-primary/20 transition-colors"
|
||||||
|
>
|
||||||
|
<Plus className="h-3 w-3" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)
|
||||||
|
})}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
À chaque créneau, le service lance le scraping de toutes les sources actives puis génère les résumés IA.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
)
|
||||||
|
}
|
||||||
@ -15,8 +15,7 @@
|
|||||||
"noUnusedLocals": true,
|
"noUnusedLocals": true,
|
||||||
"noUnusedParameters": true,
|
"noUnusedParameters": true,
|
||||||
"noFallthroughCasesInSwitch": true,
|
"noFallthroughCasesInSwitch": true,
|
||||||
"baseUrl": ".",
|
"paths": { "@/*": ["./src/*"] }
|
||||||
"paths": { "@/*": ["src/*"] }
|
|
||||||
},
|
},
|
||||||
"include": ["src"]
|
"include": ["src"]
|
||||||
}
|
}
|
||||||
|
|||||||
46
scraper-service/Dockerfile
Normal file
46
scraper-service/Dockerfile
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
FROM node:20-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
chromium \
|
||||||
|
fonts-liberation \
|
||||||
|
libasound2 \
|
||||||
|
libatk-bridge2.0-0 \
|
||||||
|
libatk1.0-0 \
|
||||||
|
libcairo2 \
|
||||||
|
libcups2 \
|
||||||
|
libdbus-1-3 \
|
||||||
|
libexpat1 \
|
||||||
|
libfontconfig1 \
|
||||||
|
libgbm1 \
|
||||||
|
libglib2.0-0 \
|
||||||
|
libgtk-3-0 \
|
||||||
|
libnspr4 \
|
||||||
|
libnss3 \
|
||||||
|
libpango-1.0-0 \
|
||||||
|
libpangocairo-1.0-0 \
|
||||||
|
libx11-6 \
|
||||||
|
libx11-xcb1 \
|
||||||
|
libxcb1 \
|
||||||
|
libxcomposite1 \
|
||||||
|
libxcursor1 \
|
||||||
|
libxdamage1 \
|
||||||
|
libxext6 \
|
||||||
|
libxfixes3 \
|
||||||
|
libxi6 \
|
||||||
|
libxrandr2 \
|
||||||
|
libxrender1 \
|
||||||
|
libxss1 \
|
||||||
|
libxtst6 \
|
||||||
|
--no-install-recommends \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
|
||||||
|
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY package*.json ./
|
||||||
|
RUN npm install --omit=dev
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
EXPOSE 3001
|
||||||
|
CMD ["node", "index.js"]
|
||||||
205
scraper-service/index.js
Normal file
205
scraper-service/index.js
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
const express = require('express')
|
||||||
|
const puppeteer = require('puppeteer-extra')
|
||||||
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
|
||||||
|
|
||||||
|
puppeteer.use(StealthPlugin())
|
||||||
|
|
||||||
|
const app = express()
|
||||||
|
app.use(express.json())
|
||||||
|
|
||||||
|
const CHROME_PATH = process.env.PUPPETEER_EXECUTABLE_PATH || '/usr/bin/chromium'
|
||||||
|
const PORT = process.env.PORT || 3001
|
||||||
|
|
||||||
|
function launchBrowser() {
|
||||||
|
return puppeteer.launch({
|
||||||
|
executablePath: CHROME_PATH,
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-setuid-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--window-size=1920,1080',
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async function tryClick(page, selectors) {
|
||||||
|
for (const sel of selectors) {
|
||||||
|
try {
|
||||||
|
const el = await page.$(sel)
|
||||||
|
if (el) { await el.click(); return true }
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
await page.keyboard.press('Enter')
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
async function tryType(page, selectors, text) {
|
||||||
|
for (const sel of selectors) {
|
||||||
|
try {
|
||||||
|
await page.waitForSelector(sel, { timeout: 4000 })
|
||||||
|
await page.type(sel, text, { delay: 60 })
|
||||||
|
return true
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
app.get('/health', (_, res) => res.json({ ok: true }))
|
||||||
|
|
||||||
|
app.post('/bloomberg/scrape', async (req, res) => {
|
||||||
|
const { username, password } = req.body || {}
|
||||||
|
if (!username || !password) {
|
||||||
|
return res.status(400).json({ error: 'username and password required' })
|
||||||
|
}
|
||||||
|
|
||||||
|
let browser
|
||||||
|
try {
|
||||||
|
browser = await launchBrowser()
|
||||||
|
const page = await browser.newPage()
|
||||||
|
await page.setViewport({ width: 1920, height: 1080 })
|
||||||
|
|
||||||
|
// Hide automation signals
|
||||||
|
await page.evaluateOnNewDocument(() => {
|
||||||
|
Object.defineProperty(navigator, 'webdriver', { get: () => undefined })
|
||||||
|
window.chrome = { runtime: {} }
|
||||||
|
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] })
|
||||||
|
})
|
||||||
|
|
||||||
|
console.log('[bloomberg] navigating to login page')
|
||||||
|
await page.goto('https://www.bloomberg.com/account/signin', {
|
||||||
|
waitUntil: 'networkidle2',
|
||||||
|
timeout: 60000,
|
||||||
|
})
|
||||||
|
await new Promise(r => setTimeout(r, 2000))
|
||||||
|
|
||||||
|
// Debug: état de la page avant de chercher l'email
|
||||||
|
const pageInputs = await page.evaluate(() =>
|
||||||
|
Array.from(document.querySelectorAll('input')).map(i => ({
|
||||||
|
type: i.type, name: i.name, id: i.id, placeholder: i.placeholder, visible: i.offsetParent !== null
|
||||||
|
}))
|
||||||
|
)
|
||||||
|
console.log('[bloomberg] inputs on login page:', JSON.stringify(pageInputs))
|
||||||
|
const pageTitle = await page.title()
|
||||||
|
console.log('[bloomberg] page title:', pageTitle)
|
||||||
|
|
||||||
|
console.log('[bloomberg] entering email')
|
||||||
|
const emailSelectors = [
|
||||||
|
'#email-form-input',
|
||||||
|
'input[id="email-form-input"]',
|
||||||
|
'input[type="email"]',
|
||||||
|
'input[name="text-input"]',
|
||||||
|
'input[placeholder*="email" i]',
|
||||||
|
]
|
||||||
|
const emailOk = await tryType(page, emailSelectors, username)
|
||||||
|
if (!emailOk) throw new Error('could not find email input')
|
||||||
|
|
||||||
|
await new Promise(r => setTimeout(r, 800))
|
||||||
|
|
||||||
|
// Click submit via JS pour contourner les boutons désactivés
|
||||||
|
const submitted = await page.evaluate(() => {
|
||||||
|
const btns = Array.from(document.querySelectorAll('button'))
|
||||||
|
const btn = btns.find(b =>
|
||||||
|
b.type === 'submit' ||
|
||||||
|
/continue|next|sign.?in/i.test(b.textContent)
|
||||||
|
)
|
||||||
|
if (btn) { btn.click(); return true }
|
||||||
|
const form = document.querySelector('form')
|
||||||
|
if (form) { form.submit(); return true }
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
if (!submitted) await page.keyboard.press('Enter')
|
||||||
|
|
||||||
|
// Attendre que la page change (password input apparaît ou navigation)
|
||||||
|
try {
|
||||||
|
await page.waitForFunction(
|
||||||
|
() => document.querySelector('input[type="password"]') !== null,
|
||||||
|
{ timeout: 10000 }
|
||||||
|
)
|
||||||
|
} catch {
|
||||||
|
await new Promise(r => setTimeout(r, 3000))
|
||||||
|
}
|
||||||
|
console.log('[bloomberg] after email submit, url:', page.url())
|
||||||
|
|
||||||
|
// Debug inputs disponibles
|
||||||
|
const allInputs = await page.evaluate(() =>
|
||||||
|
Array.from(document.querySelectorAll('input')).map(i => ({
|
||||||
|
type: i.type, name: i.name, id: i.id, placeholder: i.placeholder
|
||||||
|
}))
|
||||||
|
)
|
||||||
|
console.log('[bloomberg] inputs after email submit:', JSON.stringify(allInputs))
|
||||||
|
|
||||||
|
console.log('[bloomberg] entering password')
|
||||||
|
const pwdOk = await tryType(page, [
|
||||||
|
'input[type="password"]',
|
||||||
|
'input[name="password"]',
|
||||||
|
'input[autocomplete="current-password"]',
|
||||||
|
'input[autocomplete="password"]',
|
||||||
|
], password)
|
||||||
|
if (!pwdOk) throw new Error('could not find password input — check logs above for available inputs')
|
||||||
|
|
||||||
|
await new Promise(r => setTimeout(r, 500))
|
||||||
|
await tryClick(page, ['button[type="submit"]', 'input[type="submit"]'])
|
||||||
|
await new Promise(r => setTimeout(r, 3000))
|
||||||
|
|
||||||
|
const currentURL = page.url()
|
||||||
|
console.log('[bloomberg] after login, url:', currentURL)
|
||||||
|
|
||||||
|
const pages = [
|
||||||
|
'https://www.bloomberg.com/markets',
|
||||||
|
'https://www.bloomberg.com/technology',
|
||||||
|
'https://www.bloomberg.com/economics',
|
||||||
|
]
|
||||||
|
|
||||||
|
const articles = []
|
||||||
|
const seen = new Set()
|
||||||
|
|
||||||
|
for (const url of pages) {
|
||||||
|
try {
|
||||||
|
console.log('[bloomberg] scraping', url)
|
||||||
|
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 })
|
||||||
|
await new Promise(r => setTimeout(r, 2000))
|
||||||
|
|
||||||
|
const items = await page.evaluate(() => {
|
||||||
|
const results = []
|
||||||
|
const seen = new Set()
|
||||||
|
const links = document.querySelectorAll(
|
||||||
|
'a[href*="/news/articles"], a[href*="/opinion/"], a[href*="/markets/"]'
|
||||||
|
)
|
||||||
|
links.forEach(a => {
|
||||||
|
if (seen.has(a.href)) return
|
||||||
|
seen.add(a.href)
|
||||||
|
const titleEl = a.querySelector('h1,h2,h3,h4,[class*="headline"],[class*="title"]')
|
||||||
|
const text = titleEl ? titleEl.innerText.trim() : a.innerText.trim()
|
||||||
|
if (text.length > 20 && a.href.includes('bloomberg.com')) {
|
||||||
|
results.push({ title: text, url: a.href })
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return results.slice(0, 25)
|
||||||
|
})
|
||||||
|
|
||||||
|
for (const item of items) {
|
||||||
|
if (!seen.has(item.url) && item.title && item.url) {
|
||||||
|
seen.add(item.url)
|
||||||
|
articles.push(item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log('[bloomberg]', url, '->', items.length, 'articles')
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[bloomberg] error on', url, ':', e.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[bloomberg] total:', articles.length, 'articles')
|
||||||
|
res.json({ articles })
|
||||||
|
} catch (e) {
|
||||||
|
console.error('[bloomberg] scrape error:', e.message)
|
||||||
|
res.status(500).json({ error: e.message })
|
||||||
|
} finally {
|
||||||
|
if (browser) await browser.close()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
app.listen(PORT, () => console.log(`scraper-service listening on :${PORT}`))
|
||||||
14
scraper-service/package.json
Normal file
14
scraper-service/package.json
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"name": "tradarr-scraper-service",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"start": "node index.js"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"express": "^4.19.2",
|
||||||
|
"puppeteer-extra": "^3.3.6",
|
||||||
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||||
|
"puppeteer": "^22.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user