feat: add frontend + backend + database to retrieve and compute news from Yahoo
This commit is contained in:
106
backend/internal/scraper/registry.go
Normal file
106
backend/internal/scraper/registry.go
Normal file
@ -0,0 +1,106 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/tradarr/backend/internal/models"
|
||||
)
|
||||
|
||||
type Registry struct {
|
||||
scrapers map[string]Scraper
|
||||
repo *models.Repository
|
||||
}
|
||||
|
||||
func NewRegistry(repo *models.Repository) *Registry {
|
||||
return &Registry{
|
||||
scrapers: map[string]Scraper{},
|
||||
repo: repo,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Registry) Register(s Scraper) {
|
||||
r.scrapers[s.Name()] = s
|
||||
}
|
||||
|
||||
// Run exécute le scraper associé à sourceID et persiste les articles
|
||||
func (r *Registry) Run(sourceID string) error {
|
||||
sources, err := r.repo.ListSources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var source *models.Source
|
||||
for i := range sources {
|
||||
if sources[i].ID == sourceID {
|
||||
source = &sources[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if source == nil {
|
||||
return fmt.Errorf("source %s not found", sourceID)
|
||||
}
|
||||
|
||||
scrpr, ok := r.scrapers[source.Type]
|
||||
if !ok {
|
||||
return fmt.Errorf("no scraper for type %s", source.Type)
|
||||
}
|
||||
|
||||
// Créer le job
|
||||
job, err := r.repo.CreateScrapeJob(sourceID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := r.repo.UpdateScrapeJob(job.ID, "running", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Récupérer les symboles surveillés
|
||||
symbols, err := r.repo.GetAllWatchedSymbols()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
defer cancel()
|
||||
|
||||
articles, scrapeErr := scrpr.Scrape(ctx, symbols)
|
||||
if scrapeErr != nil {
|
||||
_ = r.repo.UpdateScrapeJob(job.ID, "error", 0, scrapeErr.Error())
|
||||
return scrapeErr
|
||||
}
|
||||
|
||||
// Persister les articles
|
||||
count := 0
|
||||
for _, a := range articles {
|
||||
saved, err := r.repo.UpsertArticle(sourceID, a.Title, a.Content, a.URL, a.PublishedAt)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
count++
|
||||
for _, sym := range a.Symbols {
|
||||
_ = r.repo.AddArticleSymbol(saved.ID, sym)
|
||||
}
|
||||
}
|
||||
|
||||
return r.repo.UpdateScrapeJob(job.ID, "done", count, "")
|
||||
}
|
||||
|
||||
// RunAll exécute tous les scrapers activés
|
||||
func (r *Registry) RunAll() error {
|
||||
sources, err := r.repo.ListSources()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, src := range sources {
|
||||
if !src.Enabled {
|
||||
continue
|
||||
}
|
||||
if err := r.Run(src.ID); err != nil {
|
||||
fmt.Printf("scraper %s error: %v\n", src.Name, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user