- id: "41x0brest0000101" name: '[Brest] Высокое потребление CPU на кластере {{index .Labels "cluster"}}' description: '[Brest] Высокое потребление CPU кластере {{index .Labels "cluster"}}' query: '(one_cluster_cpuusage / one_cluster_totalcpu) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000101" labels_required: labels_absent: type: "threshold_1level_sym" parameters: level: "80" critical: "false" reverse: "false" priority: 1 - id: "41x0brest0000201" name: '[Brest] Непредвиденная остановка сервиса {{index .Labels "name"}} на сервере виртуализации {{.Host}}' description: '[Brest] Непредвиденная остановка сервиса {{index .Labels "name"}} на сервере виртуализации {{.Host}}' query: 'share_eq_over_time(systemd_unit_state_id{name=~"libvirtd.*|postgresql@.*|chrony.*|sssd.*|opennebula.*", product="brest"}[5m], 1) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000201" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "85" recovery: "100" reverse: "true" critical: "true" priority: 1 - id: "41x0brest0000301" name: '[Brest] Непредвиденная ошибка RAFT на сервере виртуализации {{.Host}}' description: '[Brest] Непредвиденная ошибка RAFT на сервере виртуализации {{.Host}}' query: 'share_eq_over_time(one_zone_raft{}[5m], 10) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000301" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "85" recovery: "100" reverse: "true" critical: "true" priority: 1 - id: "41x0brest0000401" name: '[Brest] Непредвиденная ошибка подключения One-exporter к API на сервере виртуализации {{.Host}}' description: '[Brest] Непредвиденная ошибка подключения One-exporter к API на сервере виртуализации {{.Host}}' query: 'share_eq_over_time(one_api_connect{}[5m], 1) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000401" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "85" recovery: "100" reverse: "true" critical: "true" priority: 1 - id: "41x0brest0000501" name: '[Brest] Непредвиденная ошибка подключения к web порталу Brest на сервере виртуализации {{.Host}}' description: '[Brest] Непредвиденная ошибка подключения к web порталу Brest на сервере виртуализации {{.Host}}' query: 'share_eq_over_time(one_web_connect{}[5m], 200) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000501" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "85" recovery: "100" reverse: "true" critical: "true" priority: 1 - id: "41x0brest0000601" name: '[Brest] Большое время подключения к web порталу Brest на сервере виртуализации {{.Host}}' description: '[Brest] Большое время подключения к web порталу Brest на сервере виртуализации {{.Host}}' query: 'one_web_connect_duration{}' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000601" labels_required: labels_absent: type: "threshold_1level_sym" parameters: level: "2000" # 2 seconds critical: "false" reverse: "false" priority: 1 - id: "41x0brest0000701" name: '[Brest] Некорректный статус фронта {{.Host}}' description: '[Brest] Некорректный статус фронта {{.Host}}' query: 'share_eq_over_time(node_exporter_build_info{product="brest", component="front"}[5m], 1) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000701" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "85" recovery: "100" reverse: "true" critical: "true" priority: 1 - id: "41x0brest0000801" name: '[Brest] Некорректный статус ERROR хоста виртуализации {{.Host}}' description: '[Brest] Некорректный статус ERROR хоста виртуализации {{.Host}}' query: 'count(one_host_state == 3)' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000801" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0000901" name: '[Brest] Некорректный статус INIT хоста виртуализации {{.Host}}' description: '[Brest] Некорректный статус INIT хоста виртуализации {{.Host}}' query: 'count(one_host_state == 1)' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0000901" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0001001" name: '[Brest] Некорректный статус DISABLED хоста виртуализации {{.Host}}' description: '[Brest] Некорректный статус DISABLED хоста виртуализации {{.Host}}' query: 'count(one_host_state == 4)' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001001" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0001101" name: '[Brest] Некорректный статус OFFLINE хоста виртуализации {{.Host}}' description: '[Brest] Некорректный статус OFFLINE хоста виртуализации {{.Host}}' query: 'count(one_host_state == 8)' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001101" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0001201" name: '[Brest] Некорректный статус MONITORED хоста виртуализации {{.Host}}' description: '[Brest] Некорректный статус MONITORED хоста виртуализации {{.Host}}' query: 'count(one_host_state == 2)' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001201" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0001301" name: '[Brest] Неожиданная смена статуса RAFT для фронта {{.Host}}' description: '[Brest] Неожиданная смена статуса RAFT для фронта {{.Host}}' query: 'sum by() (changes(one_zone_raft{}[5m]))' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001301" labels_required: labels_absent: type: "threshold_1level_asym" parameters: alert: "1" recovery: "0" reverse: "false" critical: "true" priority: 1 - id: "41x0brest0001401" name: '[Brest] Более 50 новых виртуальных машины было создано за последние 10 минут' description: '[Brest] Более 50 новых виртуальных машины было создано за последние 10 минут' query: 'delta(sum(one_vms_states_count{}))[10m]' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001401" labels_required: labels_absent: type: "threshold_1level_sym" parameters: level: "50" critical: "false" reverse: "false" priority: 1 - id: "41x0brest0001501" name: '[Brest] Более 500 новых виртуальных машины было создано за последние 10 минут' description: '[Brest] Более 500 новых виртуальных машины было создано за последние 10 минут' query: 'delta(sum(one_vms_states_count{}))[10m]' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001501" labels_required: labels_absent: type: "threshold_1level_sym" parameters: level: "500" critical: "true" reverse: "false" priority: 1 - id: "41x0brest0001601" name: '[Brest] Более 50% фронтов имеют ошибки статуса' description: '[Brest] Более 50% фронтов имеют ошибки статуса' query: '(count(one_zone_raft{} == 10) or vector(0)) / count(one_zone_raft{} ) * 100' object_tag: "hostname" group_by: - "hostname" step: 5m rate: 60s metric_ttl: 168h no_data_mode: "No data" rules: - id: "42x0brest0001601" labels_required: labels_absent: type: "threshold_1level_sym" parameters: level: "50" critical: "true" reverse: "false" priority: 1