diff --git a/.claude/settings.local.json b/.claude/settings.local.json index e0eb0a4..5478d86 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -26,7 +26,9 @@ "WebFetch(domain:docs.ollama.com)", "WebFetch(domain:github.com)", "Bash(docker compose *)", - "Bash(sudo docker *)" + "Bash(sudo docker *)", + "Bash(xargs ls *)", + "Bash(xargs cat *)" ] } } diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..4915dbc --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,94 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +# Start all services (rebuilds images) +make up # docker compose up --build + +# Run standalone (requires env vars) +make dev-backend # cd backend && go run ./cmd/server +make dev-frontend # cd frontend && npm run dev (proxies /api → :8080) + +# Build +cd backend && go build ./cmd/server +cd frontend && npm run build # tsc -b && vite build + +# Lint / type-check +cd frontend && npm run lint # eslint +cd backend && go vet ./... + +# Tests (no test files exist yet) +cd backend && go test ./... + +# Production release +./build-push.sh v1.0.0 # builds and pushes all three images to Gitea registry +docker compose -f docker-compose.prod.yml up -d +``` + +## Architecture + +Three Docker services behind nginx: + +``` +Browser → nginx:80 + ├── serves React SPA (static build) + └── proxies /api/* → backend:8080 + +backend (Go/Gin) → PostgreSQL:5432 + → scraper-service:3001 (Bloomberg only) + → ollama:11434 (optional) + +scraper-service (Node.js/Puppeteer) — headless Chromium for Bloomberg login +``` + +### Backend layout + +- `cmd/server/main.go` — entry point, wires all dependencies +- `internal/models/` — all DB structs (`models.go`) and every SQL query (`repository.go`) +- `internal/api/router.go` — all Gin routes; `handlers/` has one file per domain +- `internal/ai/` — AI providers, two-pass pipeline, async report manager +- `internal/scraper/` — scraper interface, registry, per-source Go implementations +- `internal/scheduler/` — `robfig/cron` v3 scheduler that runs scrape → summarize on each tick +- `internal/database/migrations/` — numbered SQL migrations (auto-applied at startup via golang-migrate) + +### Frontend layout + +- `src/api/client.ts` — fetch wrapper: reads `token` from `localStorage`, sets `Authorization: Bearer`, auto-redirects on 401, unwraps `{"data": ...}` envelopes +- `src/lib/auth.tsx` — `AuthProvider`/`useAuth` React context with `login()`/`logout()` +- `src/lib/router.tsx` — `createBrowserRouter`; authenticated pages nested under `AppLayout`; admin pages under `/admin` +- UI: Radix UI primitives + Tailwind CSS + CVA; path alias `@` → `src/` + +### AI pipeline + +1. **Filter pass** (optional): if article count > `2 × summary_max_articles`, batches articles in groups of `filter_batch_size` (default 20) and asks AI for relevant indices. Falls back to summary provider if no filter role is configured. +2. **Summary pass**: structured French-language prompt with watchlist symbols + truncated article bodies (max 1000 chars). Called with `Think: true, NumCtx: 32768`. +3. **Report generation**: async goroutine, 30-minute context. DB row created immediately with `status=generating`; frontend polls `GET /reports` — there is no WebSocket/SSE. + +Three AI roles (`summary`, `report`, `filter`) resolve their provider via settings keys `ai_role__provider` / `ai_role__model`, falling back to the single `is_active=TRUE` provider. + +Supported providers: `openai`, `anthropic`, `gemini`, `ollama`, `claudecode` (shells out to `claude -p`). + +### Key non-obvious patterns + +- **`httputil.OK`** uses reflection to convert nil slices to `[]` so the frontend never receives JSON `null` for lists. +- **Bloomberg scraping is split**: the Go backend calls `POST http://scraper:3001/bloomberg/scrape` — it does not run Chromium in-process. +- **API keys are never stored raw**: `ListAIProviders` replaces the encrypted key with a boolean `has_key` field. +- **Scheduler timezone**: cron specs are prefixed with `TZ=` so schedules respect the app timezone, not UTC. +- **Nginx resolver trick**: `resolver 127.0.0.11` + `set $backend http://backend:8080` (variable) prevents startup failures when the backend container isn't ready yet — Docker DNS is queried per-request. +- **`claudecode` provider model list** is hardcoded in `backend/internal/ai/claudecode.go` — update it when new Claude models are released. +- **UUID primary keys** require the `pgcrypto` Postgres extension (`gen_random_uuid()`). Migrations handle this automatically. +- **`scrape_jobs`** status values: `pending | running | done | error`. **`reports`** status values: `generating | done | error`. + +## Environment variables + +| Variable | Required | Description | +|----------|----------|-------------| +| `DATABASE_URL` | Yes | Postgres DSN | +| `JWT_SECRET` | Yes | JWT signing secret | +| `ENCRYPTION_KEY` | Yes | 32-byte hex for AES-256-GCM (`openssl rand -hex 32`) | +| `PORT` | No | HTTP port, default `8080` | +| `SCRAPER_URL` | No | Scraper service URL, default `http://scraper:3001` | +| `ADMIN_EMAIL` / `ADMIN_PASSWORD` | No | Bootstrap admin account created on startup | diff --git a/backend/internal/ai/pipeline.go b/backend/internal/ai/pipeline.go index d14c758..716208e 100644 --- a/backend/internal/ai/pipeline.go +++ b/backend/internal/ai/pipeline.go @@ -249,8 +249,9 @@ func parseIndexArray(response string, maxIndex int) []int { func (p *Pipeline) GenerateForAll(ctx context.Context) error { users, err := p.repo.ListUsers() if err != nil { - return err + return fmt.Errorf("GenerateForAll: list users: %w", err) } + fmt.Printf("[pipeline] GenerateForAll: %d user(s) to process\n", len(users)) for _, user := range users { if _, err := p.GenerateForUser(ctx, user.ID); err != nil { fmt.Printf("summary for user %s: %v\n", user.Email, err) diff --git a/backend/internal/scheduler/scheduler.go b/backend/internal/scheduler/scheduler.go index 50b3adf..d1075e1 100644 --- a/backend/internal/scheduler/scheduler.go +++ b/backend/internal/scheduler/scheduler.go @@ -3,6 +3,7 @@ package scheduler import ( "context" "fmt" + "sync" "github.com/robfig/cron/v3" "github.com/tradarr/backend/internal/ai" @@ -16,6 +17,7 @@ type Scheduler struct { pipeline *ai.Pipeline repo *models.Repository entryIDs []cron.EntryID + running sync.Mutex } func New(registry *scraper.Registry, pipeline *ai.Pipeline, repo *models.Repository) *Scheduler { @@ -78,6 +80,17 @@ func (s *Scheduler) loadSchedule() error { } func (s *Scheduler) run() { + if !s.running.TryLock() { + fmt.Println("scheduler: previous cycle still running, skipping") + return + } + defer s.running.Unlock() + defer func() { + if r := recover(); r != nil { + fmt.Printf("scheduler: panic recovered: %v\n", r) + } + }() + fmt.Println("scheduler: starting scraping cycle") if err := s.registry.RunAll(); err != nil { fmt.Printf("scheduler scrape error: %v\n", err) @@ -87,4 +100,5 @@ func (s *Scheduler) run() { if err := s.pipeline.GenerateForAll(context.Background()); err != nil { fmt.Printf("scheduler summary error: %v\n", err) } + fmt.Println("scheduler: cycle complete") }