Commit 026a5d99 authored by Pedro Eduardo Trujillo's avatar Pedro Eduardo Trujillo
Browse files

Pisa config de despliegue original, aumentándola

Incorpora uso de Traefik como proxy, omite Caddy original.
Limita más los recursos asignados.
Actualiza definición de CI.
parent 60d355e8
Loading
Loading
Loading
Loading

.env

0 → 100644
+2 −0
Original line number Diff line number Diff line
PUBLIC_HOSTNAME=localhost
UI_AUTH=user:hashpass
+24 −9
Original line number Diff line number Diff line
@@ -8,16 +8,21 @@ deploy-dev:
    GIT_SUBMODULE_STRATEGY: normal
    DOCKER_DRIVER: overlay2
    SSH_REMOTE: ${DEV_SSH_REMOTE}
    DEPLOY_DIR_NAME: swarmprom
    DEPLOY_DIR_NAME: '.'
    STATUS_CHECK_DELAY: 60
    STACK: metric
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus metric_caddy
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus
    ADMIN_USER: ${DEV_ADMIN_USER}
    ADMIN_PASSWORD: ${DEV_ADMIN_PASSWORD}
    UI_AUTH: ${DEV_UI_AUTH}
    PUBLIC_HOSTNAME: ${DEV_PUBLIC_HOSTNAME}
  services:
    - docker:dind
  script:
    - deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD}
    - create-nets.sh metric-net
    - >
      deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD} UI_AUTH=${UI_AUTH}
      PUBLIC_HOSTNAME=${PUBLIC_HOSTNAME}
  environment:
    name: dev
  only:
@@ -31,16 +36,21 @@ deploy-supporting-branch:
    GIT_SUBMODULE_STRATEGY: normal
    DOCKER_DRIVER: overlay2
    SSH_REMOTE: ${DEV_SSH_REMOTE}
    DEPLOY_DIR_NAME: swarmprom
    DEPLOY_DIR_NAME: '.'
    STATUS_CHECK_DELAY: 60
    STACK: metric
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus metric_caddy
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus
    ADMIN_USER: ${DEV_ADMIN_USER}
    ADMIN_PASSWORD: ${DEV_ADMIN_PASSWORD}
    UI_AUTH: ${DEV_UI_AUTH}
    PUBLIC_HOSTNAME: ${DEV_PUBLIC_HOSTNAME}
  services:
    - docker:dind
  script:
    - deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD}
    - create-nets.sh metric-net
    - >
      deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD} UI_AUTH=${UI_AUTH}
      PUBLIC_HOSTNAME=${PUBLIC_HOSTNAME}
  environment:
    name: dev
  only:
@@ -57,16 +67,21 @@ deploy-pro:
    GIT_SUBMODULE_STRATEGY: normal
    DOCKER_DRIVER: overlay2
    SSH_REMOTE: ${PRO_SSH_REMOTE}
    DEPLOY_DIR_NAME: swarmprom
    DEPLOY_DIR_NAME: '.'
    STATUS_CHECK_DELAY: 60
    STACK: metric
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus metric_caddy
    SERVICES_TO_CHECK: metric_dockerd-exporter metric_cadvisor metric_grafana metric_alertmanager metric_unsee metric_node-exporter metric_prometheus
    ADMIN_USER: ${PRO_ADMIN_USER}
    ADMIN_PASSWORD: ${PRO_ADMIN_PASSWORD}
    UI_AUTH: ${PRO_UI_AUTH}
    PUBLIC_HOSTNAME: ${PRO_PUBLIC_HOSTNAME}
  services:
    - docker:dind
  script:
    - deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD}
    - create-nets.sh metric-net
    - >
      deploy.sh ADMIN_USER=${ADMIN_USER} ADMIN_PASSWORD=${ADMIN_PASSWORD} UI_AUTH=${UI_AUTH}
      PUBLIC_HOSTNAME=${PUBLIC_HOSTNAME}
  environment:
    name: pro
  only:

docker-compose.yml

0 → 100644
+240 −0
Original line number Diff line number Diff line
version: '3.5'

services:
  dockerd-exporter:
    image: stefanprodan/caddy
    environment:
      - DOCKER_GWBRIDGE_IP=172.18.0.1
    networks:
      - metric-net
    configs:
      - source: dockerd_config
        target: /etc/caddy/Caddyfile
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  cadvisor:
    image: google/cadvisor
    command: -logtostderr -docker_only
    networks:
      - metric-net
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - /:/rootfs:ro
      - /var/run:/var/run
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  grafana:
    image: stefanprodan/swarmprom-grafana:5.0.1
    environment:
      - GF_SECURITY_ADMIN_USER=${ADMIN_USER:-admin}
      - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
      #- GF_SERVER_ROOT_URL=${GF_SERVER_ROOT_URL:-localhost}
      #- GF_SMTP_ENABLED=${GF_SMTP_ENABLED:-false}
      #- GF_SMTP_FROM_ADDRESS=${GF_SMTP_FROM_ADDRESS:-grafana@test.com}
      #- GF_SMTP_FROM_NAME=${GF_SMTP_FROM_NAME:-Grafana}
      #- GF_SMTP_HOST=${GF_SMTP_HOST:-smtp:25}
      #- GF_SMTP_USER=${GF_SMTP_USER}
      #- GF_SMTP_PASSWORD=${GF_SMTP_PASSWORD}
    networks:
      - metric-net
      - traefik-net
    volumes:
      - grafana:/var/lib/grafana
    deploy:
      mode: replicated
      replicas: 1
      labels:
        traefik.port: "3000"
        traefik.docker.network: traefik-net
        traefik.frontend.rule: Host:grafana.${PUBLIC_HOSTNAME}
        traefik.backend: grafana
      placement:
        constraints:
          - node.role == manager
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  alertmanager:
    image: stefanprodan/swarmprom-alertmanager:v0.14.0
    command:
      - '--config.file=/etc/alertmanager/alertmanager.yml'
      - '--storage.path=/alertmanager'
    environment:
      - SLACK_URL=${SLACK_URL:-https://hooks.slack.com/services/TOKEN}
      - SLACK_CHANNEL=${SLACK_CHANNEL:-general}
      - SLACK_USER=${SLACK_USER:-alertmanager}
    networks:
      - metric-net
      - traefik-net
    volumes:
      - alertmanager:/alertmanager
    deploy:
      mode: replicated
      replicas: 1
      labels:
        traefik.port: "9093"
        traefik.docker.network: traefik-net
        traefik.frontend.auth.basic: ${UI_AUTH}
        traefik.frontend.rule: Host:alertmanager.${PUBLIC_HOSTNAME}
        traefik.backend: alertmanager
      placement:
        constraints:
          - node.role == manager
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  unsee:
    image: cloudflare/unsee:v0.8.0
    networks:
      - metric-net
      - traefik-net
    environment:
      - "ALERTMANAGER_URIS=default:http://alertmanager:9093"
    deploy:
      mode: replicated
      replicas: 1
      labels:
        traefik.port: "8080"
        traefik.docker.network: traefik-net
        traefik.frontend.auth.basic: ${UI_AUTH}
        traefik.frontend.rule: Host:unsee.${PUBLIC_HOSTNAME}
        traefik.backend: unsee
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  node-exporter:
    image: stefanprodan/swarmprom-node-exporter:v0.15.2
    command:
      - '--path.sysfs=/host/sys'
      - '--path.procfs=/host/proc'
      - '--collector.textfile.directory=/etc/node-exporter/'
      - '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
      # no collectors are explicitely enabled here, because the defaults are just fine,
      # see https://github.com/prometheus/node_exporter
      # disable ipvs collector because it barfs the node-exporter logs full with errors on my centos 7 vm's
      - '--no-collector.ipvs'
    environment:
      - NODE_ID={{.Node.ID}}
    networks:
      - metric-net
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro
      - /etc/hostname:/etc/nodename
    deploy:
      mode: global
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 128M
        reservations:
          memory: 64M

  prometheus:
    image: stefanprodan/swarmprom-prometheus:v2.2.0-rc.0
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.path=/prometheus'
      - '--storage.tsdb.retention=24h'
    networks:
      - metric-net
      - traefik-net
    volumes:
      - prometheus:/prometheus
    configs:
      - source: node_rules
        target: /etc/prometheus/swarm_node.rules.yml
      - source: task_rules
        target: /etc/prometheus/swarm_task.rules.yml
    deploy:
      mode: replicated
      replicas: 1
      labels:
        traefik.port: "9090"
        traefik.docker.network: traefik-net
        traefik.frontend.auth.basic: ${UI_AUTH}
        traefik.frontend.rule: Host:prometheus.${PUBLIC_HOSTNAME}
        traefik.backend: prometheus
      placement:
        constraints:
          - node.role == manager
      restart_policy:
        condition: on-failure
        delay: 1m
        window: 3m
      resources:
        limits:
          cpus: '0.5'
          memory: 512M
        reservations:
          memory: 128M

networks:
  metric-net:
    external: true
  traefik-net:
    external: true

configs:
  dockerd_config:
    file: ./swarmprom/dockerd-exporter/Caddyfile
  node_rules:
    file: ./swarmprom/prometheus/rules/swarm_node.rules.yml
  task_rules:
    file: ./swarmprom/prometheus/rules/swarm_task.rules.yml