107 lines
2.1 KiB
Go
107 lines
2.1 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/tradarr/backend/internal/models"
|
|
)
|
|
|
|
type Registry struct {
|
|
scrapers map[string]Scraper
|
|
repo *models.Repository
|
|
}
|
|
|
|
func NewRegistry(repo *models.Repository) *Registry {
|
|
return &Registry{
|
|
scrapers: map[string]Scraper{},
|
|
repo: repo,
|
|
}
|
|
}
|
|
|
|
func (r *Registry) Register(s Scraper) {
|
|
r.scrapers[s.Name()] = s
|
|
}
|
|
|
|
// Run exécute le scraper associé à sourceID et persiste les articles
|
|
func (r *Registry) Run(sourceID string) error {
|
|
sources, err := r.repo.ListSources()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
var source *models.Source
|
|
for i := range sources {
|
|
if sources[i].ID == sourceID {
|
|
source = &sources[i]
|
|
break
|
|
}
|
|
}
|
|
if source == nil {
|
|
return fmt.Errorf("source %s not found", sourceID)
|
|
}
|
|
|
|
scrpr, ok := r.scrapers[source.Type]
|
|
if !ok {
|
|
return fmt.Errorf("no scraper for type %s", source.Type)
|
|
}
|
|
|
|
// Créer le job
|
|
job, err := r.repo.CreateScrapeJob(sourceID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := r.repo.UpdateScrapeJob(job.ID, "running", 0, ""); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Récupérer les symboles surveillés
|
|
symbols, err := r.repo.GetAllWatchedSymbols()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
|
defer cancel()
|
|
|
|
articles, scrapeErr := scrpr.Scrape(ctx, symbols)
|
|
if scrapeErr != nil {
|
|
_ = r.repo.UpdateScrapeJob(job.ID, "error", 0, scrapeErr.Error())
|
|
return scrapeErr
|
|
}
|
|
|
|
// Persister uniquement les nouveaux articles
|
|
count := 0
|
|
for _, a := range articles {
|
|
saved, isNew, err := r.repo.InsertArticleIfNew(sourceID, a.Title, a.Content, a.URL, a.PublishedAt)
|
|
if err != nil || !isNew {
|
|
continue
|
|
}
|
|
count++
|
|
for _, sym := range a.Symbols {
|
|
_ = r.repo.AddArticleSymbol(saved.ID, sym)
|
|
}
|
|
}
|
|
|
|
return r.repo.UpdateScrapeJob(job.ID, "done", count, "")
|
|
}
|
|
|
|
// RunAll exécute tous les scrapers activés
|
|
func (r *Registry) RunAll() error {
|
|
sources, err := r.repo.ListSources()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, src := range sources {
|
|
if !src.Enabled {
|
|
continue
|
|
}
|
|
if err := r.Run(src.ID); err != nil {
|
|
fmt.Printf("scraper %s error: %v\n", src.Name, err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|