Commit 7685bf07 authored by Pedro Eduardo Trujillo's avatar Pedro Eduardo Trujillo
Browse files

Describe mejor las alertas, relaja ratio de error

parent 8deb8227
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -7,12 +7,13 @@ groups:
          sum(rate({level=~"critical|error"} [5m])) by (service_name)
            /
          sum(rate({container_id=~".+"} [5m])) by (service_name)
            > 0.1
            > 0.2
        for: 10m
        labels:
          severity: error
        annotations:
          summary: High error logs rate at '{{$labels.service_name}}' service
          description: High error logs rate ({{$value | printf "%.1f"}}) at '{{$labels.service_name}}' service
          summary: High rate of errors by service, more than 20%

      - alert: LogsTooManyEntriesByService
        expr: |
@@ -21,4 +22,5 @@ groups:
        labels:
          severity: warning
        annotations:
          summary: Too many logs generated by '{{$labels.service_name}}' service
          description: Too many logs ({{$value | printf "%.1f"}}) generated by '{{$labels.service_name}}' service
          summary: Too many logs by service, more than 60 per minute