# ============================================================ # INSIGHT MVP - Docker Compose (Observability-Stack) # ============================================================ # Ergaenzt docker-compose.yml um Monitoring, Logging & Tracing. # # Nutzung: # docker compose -f docker-compose.yml -f docker-compose.observability.yml up -d # # Grafana (nur via SSH-Tunnel): # ssh -L 3001:localhost:3001 -i .keys/deploy_ed25519 deploy@172.20.10.59 # Browser: http://localhost:3001 # ============================================================ networks: insight-web: external: true insight-db: external: true volumes: prometheus-data: name: insight-prometheus-data grafana-data: name: insight-grafana-data loki-data: name: insight-loki-data tempo-data: name: insight-tempo-data services: # -------------------------------------------------------- # Prometheus - Metrics-Sammlung & -Speicherung # -------------------------------------------------------- prometheus: image: prom/prometheus:latest container_name: insight-prometheus restart: unless-stopped command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" - "--storage.tsdb.retention.time=30d" - "--web.enable-lifecycle" volumes: - ./config/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro - prometheus-data:/prometheus networks: - insight-web ports: - "127.0.0.1:9090:9090" healthcheck: test: ["CMD", "wget", "--spider", "-q", "http://localhost:9090/-/ready"] interval: 30s timeout: 5s retries: 3 # -------------------------------------------------------- # Grafana - Dashboards & Alerting # -------------------------------------------------------- grafana: image: grafana/grafana:latest container_name: insight-grafana restart: unless-stopped environment: GF_SECURITY_ADMIN_USER: ${GRAFANA_ADMIN_USER:-admin} GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:?GRAFANA_ADMIN_PASSWORD muss gesetzt sein} GF_SERVER_ROOT_URL: "http://localhost:3001" GF_SERVER_HTTP_PORT: 3001 # Datenquellen per Provisioning GF_PATHS_PROVISIONING: /etc/grafana/provisioning # Keine anonyme Nutzung GF_AUTH_ANONYMOUS_ENABLED: "false" # Logging GF_LOG_LEVEL: info volumes: - grafana-data:/var/lib/grafana - ./config/grafana/provisioning:/etc/grafana/provisioning:ro networks: - insight-web ports: - "127.0.0.1:3001:3001" depends_on: prometheus: condition: service_healthy healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:3001/api/health || exit 1"] interval: 30s timeout: 5s retries: 3 # -------------------------------------------------------- # Loki - Log-Aggregation # -------------------------------------------------------- loki: image: grafana/loki:latest container_name: insight-loki restart: unless-stopped command: -config.file=/etc/loki/loki.yml volumes: - ./config/loki/loki.yml:/etc/loki/loki.yml:ro - loki-data:/loki networks: - insight-web ports: - "127.0.0.1:3100:3100" healthcheck: test: ["CMD-SHELL", "wget --spider -q http://localhost:3100/ready || exit 1"] interval: 30s timeout: 5s retries: 3 # -------------------------------------------------------- # Promtail - Log-Collector (liest Docker Logs) # -------------------------------------------------------- promtail: image: grafana/promtail:latest container_name: insight-promtail restart: unless-stopped command: -config.file=/etc/promtail/promtail.yml volumes: - ./config/promtail/promtail.yml:/etc/promtail/promtail.yml:ro - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro - /var/run/docker.sock:/var/run/docker.sock:ro networks: - insight-web depends_on: - loki # -------------------------------------------------------- # Tempo - Distributed Tracing # -------------------------------------------------------- tempo: image: grafana/tempo:latest container_name: insight-tempo restart: unless-stopped command: -config.file=/etc/tempo/tempo.yml volumes: - ./config/tempo/tempo.yml:/etc/tempo/tempo.yml:ro - tempo-data:/var/tempo networks: - insight-web ports: - "127.0.0.1:3200:3200" # Tempo API - "127.0.0.1:4317:4317" # OTLP gRPC healthcheck: test: ["CMD-SHELL", "wget --spider -q http://localhost:3200/ready || exit 1"] interval: 30s timeout: 5s retries: 3 # -------------------------------------------------------- # cAdvisor - Container-Metriken # -------------------------------------------------------- cadvisor: image: gcr.io/cadvisor/cadvisor:latest container_name: insight-cadvisor restart: unless-stopped privileged: true volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro networks: - insight-web ports: - "127.0.0.1:8081:8080" # -------------------------------------------------------- # PostgreSQL Exporter - DB-Metriken fuer Prometheus # -------------------------------------------------------- postgres-exporter: image: prometheuscommunity/postgres-exporter:latest container_name: insight-postgres-exporter restart: unless-stopped environment: DATA_SOURCE_NAME: "postgresql://${DB_USER:-insight}:${DB_PASSWORD}@postgres:5432/${DB_NAME:-platform_core}?sslmode=disable" networks: - insight-web - insight-db ports: - "127.0.0.1:9187:9187" depends_on: - postgres