package scraper import ( "context" "fmt" "time" "github.com/tradarr/backend/internal/models" ) type Registry struct { scrapers map[string]Scraper repo *models.Repository } func NewRegistry(repo *models.Repository) *Registry { return &Registry{ scrapers: map[string]Scraper{}, repo: repo, } } func (r *Registry) Register(s Scraper) { r.scrapers[s.Name()] = s } // Run exécute le scraper associé à sourceID et persiste les articles func (r *Registry) Run(sourceID string) error { sources, err := r.repo.ListSources() if err != nil { return err } var source *models.Source for i := range sources { if sources[i].ID == sourceID { source = &sources[i] break } } if source == nil { return fmt.Errorf("source %s not found", sourceID) } scrpr, ok := r.scrapers[source.Type] if !ok { return fmt.Errorf("no scraper for type %s", source.Type) } // Créer le job job, err := r.repo.CreateScrapeJob(sourceID) if err != nil { return err } if err := r.repo.UpdateScrapeJob(job.ID, "running", 0, ""); err != nil { return err } // Récupérer les symboles surveillés symbols, err := r.repo.GetAllWatchedSymbols() if err != nil { return err } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) defer cancel() articles, scrapeErr := scrpr.Scrape(ctx, symbols) if scrapeErr != nil { _ = r.repo.UpdateScrapeJob(job.ID, "error", 0, scrapeErr.Error()) return scrapeErr } // Persister les articles count := 0 for _, a := range articles { saved, err := r.repo.UpsertArticle(sourceID, a.Title, a.Content, a.URL, a.PublishedAt) if err != nil { continue } count++ for _, sym := range a.Symbols { _ = r.repo.AddArticleSymbol(saved.ID, sym) } } return r.repo.UpdateScrapeJob(job.ID, "done", count, "") } // RunAll exécute tous les scrapers activés func (r *Registry) RunAll() error { sources, err := r.repo.ListSources() if err != nil { return err } for _, src := range sources { if !src.Enabled { continue } if err := r.Run(src.ID); err != nil { fmt.Printf("scraper %s error: %v\n", src.Name, err) } } return nil }