---
title: "Prometheus — Déploiement et configuration"
domain: monitoring
subdomain: prometheus
type: snippet
tags: [prometheus, alertmanager, promql, exporters, docker, metrics, alerting]
difficulty: intermediate
status: stable
updated: "2026-05-26"
---
import { Tip, Warning } from '@/components/mdx';

## Architecture

```
Targets (exporters)
    │  /metrics (HTTP)
    ▼
Prometheus :9090  ──→  Alertmanager :9093  ──→  Email / Slack / PagerDuty
    │
    ▼
Grafana :3000 (datasource)
```

## Déploiement Docker

```yaml
# docker-compose.yml
services:
  prometheus:
    image: prom/prometheus:v2.51.0
    ports: ["9090:9090"]
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
      - ./prometheus/rules:/etc/prometheus/rules
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=30d'
      - '--web.enable-lifecycle'

  alertmanager:
    image: prom/alertmanager:v0.27.0
    ports: ["9093:9093"]
    volumes:
      - ./prometheus/alertmanager.yml:/etc/alertmanager/alertmanager.yml

volumes:
  prometheus_data:
```

```bash
docker compose up -d prometheus alertmanager
# Interface : http://localhost:9090
```

## prometheus.yml

```yaml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

alerting:
  alertmanagers:
    - static_configs:
        - targets: ["alertmanager:9093"]

rule_files:
  - "rules/*.yml"

scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ["localhost:9090"]

  - job_name: node
    static_configs:
      - targets: ["node-exporter:9100"]
        labels:
          env: prod

  - job_name: cadvisor
    static_configs:
      - targets: ["cadvisor:8080"]

  - job_name: blackbox
    metrics_path: /probe
    params:
      module: [http_2xx]
    static_configs:
      - targets:
          - https://mon-app.example.com
          - https://api.example.com
    relabel_configs:
      - source_labels: [__address__]
        target_label: __param_target
      - source_labels: [__param_target]
        target_label: instance
      - target_label: __address__
        replacement: blackbox-exporter:9115
```

## Exporters courants

```yaml
# Node Exporter — métriques système Linux
  node-exporter:
    image: prom/node-exporter:v1.7.0
    ports: ["9100:9100"]
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
      - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'

  # cAdvisor — métriques containers Docker
  cadvisor:
    image: gcr.io/cadvisor/cadvisor:v0.49.1
    ports: ["8080:8080"]
    volumes:
      - /:/rootfs:ro
      - /var/run:/var/run:ro
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    privileged: true

  # Blackbox Exporter — sondes HTTP/TCP/ICMP
  blackbox-exporter:
    image: prom/blackbox-exporter:v0.24.0
    ports: ["9115:9115"]
    volumes:
      - ./prometheus/blackbox.yml:/etc/blackbox_exporter/config.yml
```

## PromQL — Requêtes essentielles

```promql
# CPU utilisé (%) par instance
100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)

# RAM disponible (Go)
node_memory_MemAvailable_bytes / 1024 / 1024 / 1024

# RAM utilisée (%)
(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100

# Disque utilisé (%) par point de montage
(node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes * 100

# Réseau entrant (Mb/s)
rate(node_network_receive_bytes_total{device!="lo"}[5m]) * 8 / 1024 / 1024

# Réseau sortant (Mb/s)
rate(node_network_transmit_bytes_total{device!="lo"}[5m]) * 8 / 1024 / 1024

# Containers en cours d'exécution
count(container_last_seen{name!=""})

# HTTP probe up/down
probe_success{job="blackbox"}

# Latence HTTP (p95)
histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))

# Taux d'erreurs HTTP (5xx)
rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m])
```

## Règles d'alerte

```yaml
# prometheus/rules/alerts.yml
groups:
  - name: infra
    rules:
      - alert: InstanceDown
        expr: up == 0
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: "Instance {{ $labels.instance }} down"
          description: "{{ $labels.instance }} ({{ $labels.job }}) inaccessible depuis 2 min"

      - alert: HighCPU
        expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 85
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "CPU élevé sur {{ $labels.instance }}"
          description: "CPU à {{ $value | printf \"%.0f\" }}%"

      - alert: LowDiskSpace
        expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 15
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Disque faible sur {{ $labels.instance }}"
          description: "Seulement {{ $value | printf \"%.0f\" }}% disponible sur /"

      - alert: HighMemory
        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 90
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "RAM critique sur {{ $labels.instance }}"

      - alert: SiteDown
        expr: probe_success{job="blackbox"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Site inaccessible : {{ $labels.instance }}"
```

## Alertmanager

```yaml
# prometheus/alertmanager.yml
global:
  resolve_timeout: 5m
  smtp_from: alertmanager@example.com
  smtp_smarthost: smtp.example.com:587
  smtp_auth_username: alertmanager@example.com
  smtp_auth_password: "<PASSWORD>"

route:
  group_by: [alertname, instance]
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 4h
  receiver: default
  routes:
    - match:
        severity: critical
      receiver: critical-slack
    - match:
        severity: warning
      receiver: email-ops

receivers:
  - name: default
    email_configs:
      - to: ops@example.com

  - name: critical-slack
    slack_configs:
      - api_url: "<SLACK_WEBHOOK_URL>"
        channel: "#alerts-critical"
        title: "CRITIQUE — {{ .GroupLabels.alertname }}"
        text: "{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}"

  - name: email-ops
    email_configs:
      - to: ops@example.com
        subject: "[WARNING] {{ .GroupLabels.alertname }}"
```

## Commandes utiles

```bash
# Recharger la config sans redémarrage
curl -X POST http://localhost:9090/-/reload

# Vérifier la config
docker exec prometheus promtool check config /etc/prometheus/prometheus.yml

# Vérifier les règles
docker exec prometheus promtool check rules /etc/prometheus/rules/alerts.yml

# Tester une alerte manuellement
curl -X POST http://localhost:9093/api/v1/alerts \
  -H "Content-Type: application/json" \
  -d '[{"labels":{"alertname":"TestAlert","severity":"warning"},"annotations":{"summary":"Test"}}]'

# Status des targets
curl -s http://localhost:9090/api/v1/targets | jq '.data.activeTargets[] | {job: .labels.job, health, lastError}'
```

<Tip>
Activer `--web.enable-lifecycle` pour recharger la config via `POST /-/reload` sans redémarrer le container. Indispensable en production pour éviter les interruptions lors d'ajout de targets.
</Tip>

<Warning>
Par défaut Prometheus n'a pas d'authentification. En production, protéger l'interface via un reverse proxy (Nginx basic auth ou OAuth2 Proxy) et restreindre l'accès réseau.
</Warning>
