Commit db8de98d authored by Pedro Eduardo Trujillo's avatar Pedro Eduardo Trujillo
Browse files

Merge branch 'dev' into 'master'

Corrige métricas de disco y diferencia por entorno

See merge request redmic-project/metric/prometheus!9
parents fae15841 32076a5c
Loading
Loading
Loading
Loading
+8 −10
Original line number Diff line number Diff line
@@ -4,8 +4,6 @@ stages:
  - test-package
  - deploy

image: docker:stable

services:
  - docker:dind

@@ -19,11 +17,13 @@ variables:

check-rules:
  stage: check-rules
  image: docker:stable
  script:
    - ./check-rules.sh

docker-build-commit-non-master-branches:
  stage: package
  image: docker:stable
  only:
    - branches
  except:
@@ -35,6 +35,7 @@ docker-build-commit-non-master-branches:

docker-build-commit-master-branch:
  stage: package
  image: docker:stable
  only:
    - master
  script:
@@ -44,6 +45,7 @@ docker-build-commit-master-branch:

container-scanning:
  stage: test-package
  image: docker:stable
  allow_failure: true
  only:
    - branches
@@ -65,11 +67,10 @@ container-scanning:

deploy-supporting-branch-develop:
  stage: deploy
  image: registry.gitlab.com/redmic-project/docker/docker-deploy
  image: ${DOCKER_DEPLOY_IMAGE_NAME}:${DOCKER_DEPLOY_IMAGE_TAG}
  variables:
    SSH_REMOTE: ${DEV_SSH_REMOTE}
    COMPOSE_FILE: docker-compose.tmpl.yml:docker-compose.dev.yml
    UI_AUTH: ${DEV_UI_AUTH}
    PUBLIC_HOSTNAME: ${DEV_PUBLIC_HOSTNAME}
  script:
    - >
@@ -86,11 +87,10 @@ deploy-supporting-branch-develop:

deploy-stable-branch-develop:
  stage: deploy
  image: registry.gitlab.com/redmic-project/docker/docker-deploy
  image: ${DOCKER_DEPLOY_IMAGE_NAME}:${DOCKER_DEPLOY_IMAGE_TAG}
  variables:
    SSH_REMOTE: ${DEV_SSH_REMOTE}
    COMPOSE_FILE: docker-compose.tmpl.yml:docker-compose.dev.yml
    UI_AUTH: ${DEV_UI_AUTH}
    PUBLIC_HOSTNAME: ${DEV_PUBLIC_HOSTNAME}
  script:
    - >
@@ -105,11 +105,10 @@ deploy-stable-branch-develop:

deploy-supporting-branch-production:
  stage: deploy
  image: registry.gitlab.com/redmic-project/docker/docker-deploy
  image: ${DOCKER_DEPLOY_IMAGE_NAME}:${DOCKER_DEPLOY_IMAGE_TAG}
  variables:
    SSH_REMOTE: ${PRO_SSH_REMOTE}
    COMPOSE_FILE: docker-compose.tmpl.yml:docker-compose.prod.yml
    UI_AUTH: ${PRO_UI_AUTH}
    PUBLIC_HOSTNAME: ${PRO_PUBLIC_HOSTNAME}
  script:
    - >
@@ -126,11 +125,10 @@ deploy-supporting-branch-production:

deploy-stable-branch-production:
  stage: deploy
  image: registry.gitlab.com/redmic-project/docker/docker-deploy
  image: ${DOCKER_DEPLOY_IMAGE_NAME}:${DOCKER_DEPLOY_IMAGE_TAG}
  variables:
    SSH_REMOTE: ${PRO_SSH_REMOTE}
    COMPOSE_FILE: docker-compose.tmpl.yml:docker-compose.prod.yml
    UI_AUTH: ${PRO_UI_AUTH}
    PUBLIC_HOSTNAME: ${PRO_PUBLIC_HOSTNAME}
  script:
    - >
+5 −0
Original line number Diff line number Diff line
@@ -3,3 +3,8 @@ version: '3.5'
volumes:
  prometheus:
    name: ${PROMETHEUS_VOL_NAME:-prometheus-vol}

configs:
  node_rules:
    name: ${NODE_RULES_NAME:-node_rules}
    file: ./rules/swarm_node.rules.dev.yml
+5 −0
Original line number Diff line number Diff line
@@ -15,3 +15,8 @@ volumes:
      backing: relocatable
      size: ${PROMETHEUS_VOL_SIZE:-10}
      ebstype: ${PROMETHEUS_VOL_EBS_TYPE:-gp2}

configs:
  node_rules:
    name: ${NODE_RULES_NAME:-node_rules}
    file: ./rules/swarm_node.rules.prod.yml
+2 −6
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ services:
      - source: service_rules
        target: /etc/prometheus/swarm_service.rules.yml
    healthcheck:
      test: wget --spider http://localhost:9090
      test: wget --spider -q http://localhost:9090
      interval: 30s
      timeout: 10s
      retries: 3
@@ -41,7 +41,7 @@ services:
        traefik.backend: prometheus
        traefik.port: '9090'
      restart_policy:
        delay: 30s
        delay: 10s
        window: 2m
      resources:
        limits:
@@ -61,10 +61,6 @@ networks:
    external: true

configs:
  node_rules:
    name: ${NODE_RULES_NAME:-node_rules}
    file: ./rules/swarm_node.rules.yml

  task_rules:
    name: ${TASK_RULES_NAME:-task_rules}
    file: ./rules/swarm_task.rules.yml
+65 −0
Original line number Diff line number Diff line
groups:
- name: swarm_node
  rules:
  - alert: node_cpu_usage
    expr: 100 - (avg(irate(node_cpu_seconds_total{mode="idle"}[1m]) * ON(instance) GROUP_LEFT(node_name)
      node_meta * 100) BY (node_name)) > 80
    for: 1m
    labels:
      severity: warning
    annotations:
      description: Swarm node {{ $labels.node_name }} CPU usage is at {{ humanize
        $value}}%.
      summary: CPU alert for Swarm node '{{ $labels.node_name }}'
  - alert: node_memory_usage
    expr: sum(((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes)
      * ON(instance) GROUP_LEFT(node_name) node_meta * 100) BY (node_name) > 80
    for: 1m
    labels:
      severity: warning
    annotations:
      description: Swarm node {{ $labels.node_name }} memory usage is at {{ humanize
        $value}}%.
      summary: Memory alert for Swarm node '{{ $labels.node_name }}'
  - alert: node_disk_usage
    expr: ((node_filesystem_size_bytes{mountpoint="/rootfs"} - node_filesystem_free_bytes{mountpoint="/rootfs"})
      * 100 / node_filesystem_size_bytes{mountpoint="/rootfs"}) * ON(instance) GROUP_LEFT(node_name)
      node_meta > 85
    for: 1m
    labels:
      severity: warning
    annotations:
      description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize
        $value}}%.
      summary: Disk alert for Swarm node '{{ $labels.node_name }}'
  - alert: node_disk_fill_rate_6h
    expr: predict_linear(node_filesystem_free_bytes{mountpoint="/rootfs"}[1h], 6 * 3600) * ON(instance)
      GROUP_LEFT(node_name) node_meta < 0
    for: 1h
    labels:
      severity: critical
    annotations:
      description: Swarm node {{ $labels.node_name }} disk is going to fill up in
        6h.
      summary: Disk fill alert for Swarm node '{{ $labels.node_name }}'
  - alert: node_docker_disk_usage
    expr: ((node_filesystem_size_bytes{mountpoint="/mnt/data"} - node_filesystem_free_bytes{mountpoint="/mnt/data"})
      * 100 / node_filesystem_size_bytes{mountpoint="/mnt/data"}) * ON(instance) GROUP_LEFT(node_name)
      node_meta > 85
    for: 1m
    labels:
      severity: warning
    annotations:
      description: Swarm node {{ $labels.node_name }} disk usage is at {{ humanize
        $value}}%.
      summary: Disk alert for Swarm node '{{ $labels.node_name }}'
  - alert: node_docker_disk_fill_rate_6h
    expr: predict_linear(node_filesystem_free_bytes{mountpoint="/mnt/data"}[1h], 6 * 3600) * ON(instance)
      GROUP_LEFT(node_name) node_meta < 0
    for: 1h
    labels:
      severity: critical
    annotations:
      description: Swarm node {{ $labels.node_name }} disk is going to fill up in
        6h.
      summary: Disk fill alert for Swarm node '{{ $labels.node_name }}'
 No newline at end of file
Loading