INSIGHT-MVP/docker-compose.observability.yml
Thomas Reitz 10f291cdda feat: implement Sprint 1 Alpha - full stack with Docker, NestJS, React
Docker Infrastructure:
- docker-compose.yml with Traefik 3, PostgreSQL 16, PgBouncer, Redis 7, step-ca
- docker-compose.observability.yml with Prometheus, Grafana, Loki, Tempo, Promtail
- Traefik dynamic config (TLS, security headers, CORS, compression)
- PostgreSQL init script (uuid-ossp, pgcrypto, pg_trgm extensions)
- Grafana auto-provisioned datasources (Prometheus, Loki, Tempo)

NestJS Core-Service:
- Auth module: Login (email/password), TOTP 2FA, JWT RS256, token refresh/revocation
- Users module: CRUD, bcrypt cost 12, pagination, role-based access
- Tenants module: CRUD, member management, slug validation
- Prisma schemas: core (Users, AuthProviders, Tenants, Modules, AuditLog)
                  tenant (Contacts, Activities - CRM reference for Sprint 2)
- TenantPrismaService: Dynamic per-tenant DB connections with caching
- RedisService: Token blocklist, refresh token families, generic cache
- Global JwtAuthGuard with @Public() decorator, RolesGuard, GlobalExceptionFilter
- Health endpoint with DB + Redis status checks
- Swagger API documentation (dev only)
- Multi-stage Dockerfile (dev + production)

React Frontend:
- Vite 6 + React 18 + TypeScript strict
- AuthContext with silent refresh (access token in memory, NOT localStorage)
- Login page with TOTP 2FA support
- App shell with sidebar navigation
- Admin pages: Users + Tenants management tables
- API client with automatic token refresh interceptor
- Multi-stage Dockerfile (dev + nginx production)

CI/CD Pipelines:
- ci.yml: Lint, type-check, test, build on all branches
- deploy.yml: Docker build, push to Forgejo registry, SSH deploy

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:33:36 +01:00

185 lines
5.7 KiB
YAML

# ============================================================
# INSIGHT MVP - Docker Compose (Observability-Stack)
# ============================================================
# Ergaenzt docker-compose.yml um Monitoring, Logging & Tracing.
#
# Nutzung:
# docker compose -f docker-compose.yml -f docker-compose.observability.yml up -d
#
# Grafana (nur via SSH-Tunnel):
# ssh -L 3001:localhost:3001 -i .keys/deploy_ed25519 deploy@172.20.10.59
# Browser: http://localhost:3001
# ============================================================
networks:
insight-web:
external: true
insight-db:
external: true
volumes:
prometheus-data:
name: insight-prometheus-data
grafana-data:
name: insight-grafana-data
loki-data:
name: insight-loki-data
tempo-data:
name: insight-tempo-data
services:
# --------------------------------------------------------
# Prometheus - Metrics-Sammlung & -Speicherung
# --------------------------------------------------------
prometheus:
image: prom/prometheus:latest
container_name: insight-prometheus
restart: unless-stopped
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=30d"
- "--web.enable-lifecycle"
volumes:
- ./config/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
networks:
- insight-web
ports:
- "127.0.0.1:9090:9090"
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/ready"]
interval: 30s
timeout: 5s
retries: 3
# --------------------------------------------------------
# Grafana - Dashboards & Alerting
# --------------------------------------------------------
grafana:
image: grafana/grafana:latest
container_name: insight-grafana
restart: unless-stopped
environment:
GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin}
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD muss gesetzt sein}
GF_SERVER_ROOT_URL: "http://localhost:3001"
GF_SERVER_HTTP_PORT: 3001
# Datenquellen per Provisioning
GF_PATHS_PROVISIONING: /etc/grafana/provisioning
# Keine anonyme Nutzung
GF_AUTH_ANONYMOUS_ENABLED: "false"
# Logging
GF_LOG_LEVEL: info
volumes:
- grafana-data:/var/lib/grafana
- ./config/grafana/provisioning:/etc/grafana/provisioning:ro
networks:
- insight-web
ports:
- "127.0.0.1:3001:3001"
depends_on:
prometheus:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:3001/api/health || exit 1"]
interval: 30s
timeout: 5s
retries: 3
# --------------------------------------------------------
# Loki - Log-Aggregation
# --------------------------------------------------------
loki:
image: grafana/loki:latest
container_name: insight-loki
restart: unless-stopped
command: -config.file=/etc/loki/loki.yml
volumes:
- ./config/loki/loki.yml:/etc/loki/loki.yml:ro
- loki-data:/loki
networks:
- insight-web
ports:
- "127.0.0.1:3100:3100"
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://localhost:3100/ready || exit 1"]
interval: 30s
timeout: 5s
retries: 3
# --------------------------------------------------------
# Promtail - Log-Collector (liest Docker Logs)
# --------------------------------------------------------
promtail:
image: grafana/promtail:latest
container_name: insight-promtail
restart: unless-stopped
command: -config.file=/etc/promtail/promtail.yml
volumes:
- ./config/promtail/promtail.yml:/etc/promtail/promtail.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
networks:
- insight-web
depends_on:
- loki
# --------------------------------------------------------
# Tempo - Distributed Tracing
# --------------------------------------------------------
tempo:
image: grafana/tempo:latest
container_name: insight-tempo
restart: unless-stopped
command: -config.file=/etc/tempo/tempo.yml
volumes:
- ./config/tempo/tempo.yml:/etc/tempo/tempo.yml:ro
- tempo-data:/var/tempo
networks:
- insight-web
ports:
- "127.0.0.1:3200:3200" # Tempo API
- "127.0.0.1:4317:4317" # OTLP gRPC
healthcheck:
test: ["CMD-SHELL", "wget --spider -q http://localhost:3200/ready || exit 1"]
interval: 30s
timeout: 5s
retries: 3
# --------------------------------------------------------
# cAdvisor - Container-Metriken
# --------------------------------------------------------
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
container_name: insight-cadvisor
restart: unless-stopped
privileged: true
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
networks:
- insight-web
ports:
- "127.0.0.1:8081:8080"
# --------------------------------------------------------
# PostgreSQL Exporter - DB-Metriken fuer Prometheus
# --------------------------------------------------------
postgres-exporter:
image: prometheuscommunity/postgres-exporter:latest
container_name: insight-postgres-exporter
restart: unless-stopped
environment:
DATA_SOURCE_NAME: "postgresql://${DB_USER:-insight}:${DB_PASSWORD}@postgres:5432/${DB_NAME:-platform_core}?sslmode=disable"
networks:
- insight-web
- insight-db
ports:
- "127.0.0.1:9187:9187"
depends_on:
- postgres