!7 [patch] Updates the Model and Improves Cause Inference Result
From: @li-zhenxing2022 Reviewed-by: @dowzyx Signed-off-by: @dowzyx
This commit is contained in:
commit
c03bdc5753
284
add_metrics_anomaly_trends_indicator.patch
Normal file
284
add_metrics_anomaly_trends_indicator.patch
Normal file
@ -0,0 +1,284 @@
|
||||
From ece4a0551bd81f64158ab465a865e31e97b63562 Mon Sep 17 00:00:00 2001
|
||||
From: lizhenxing11 <lizhenxing11@huawei.com>
|
||||
Date: Mon, 21 Nov 2022 14:54:20 +0800
|
||||
Subject: [PATCH 2/2] Add Metrics Anomaly Trends Indicator
|
||||
|
||||
update config file
|
||||
---
|
||||
anteater/core/feature.py | 8 +++++
|
||||
anteater/model/slope.py | 15 +++++++++
|
||||
anteater/module/app_sli_detector.py | 20 ++++++++---
|
||||
anteater/utils/data_load.py | 18 ++++++++--
|
||||
config/module/app_sli_rtt.json | 52 ++++++++++++++++++++---------
|
||||
5 files changed, 90 insertions(+), 23 deletions(-)
|
||||
|
||||
diff --git a/anteater/core/feature.py b/anteater/core/feature.py
|
||||
index 306d835..6db764d 100644
|
||||
--- a/anteater/core/feature.py
|
||||
+++ b/anteater/core/feature.py
|
||||
@@ -12,6 +12,13 @@
|
||||
# ******************************************************************************/
|
||||
|
||||
from dataclasses import dataclass
|
||||
+from enum import Enum
|
||||
+
|
||||
+
|
||||
+class AnomalyTrend(Enum):
|
||||
+ DEFAULT = 0
|
||||
+ RISE = 1
|
||||
+ FALL = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -19,3 +26,4 @@ class Feature:
|
||||
metric: str
|
||||
description: str
|
||||
priority: int = 0
|
||||
+ atrend: AnomalyTrend = AnomalyTrend.DEFAULT
|
||||
diff --git a/anteater/model/slope.py b/anteater/model/slope.py
|
||||
index 422d6bc..08c4211 100644
|
||||
--- a/anteater/model/slope.py
|
||||
+++ b/anteater/model/slope.py
|
||||
@@ -29,3 +29,18 @@ def smooth_slope(time_series, windows_length):
|
||||
val = conv_smooth(time_series.to_df(), box_pts=13)
|
||||
val = slope(val, win_len=13)
|
||||
return val[-windows_length:]
|
||||
+
|
||||
+
|
||||
+def trend(y, win_len=None):
|
||||
+ """Gets the trend for the y"""
|
||||
+ if not win_len:
|
||||
+ win_len = len(y) // 2
|
||||
+
|
||||
+ if np.mean(y[:win_len]) < np.mean(y[-win_len:]):
|
||||
+ return 1
|
||||
+
|
||||
+ elif np.mean(y[:win_len]) > np.mean(y[-win_len:]):
|
||||
+ return -1
|
||||
+
|
||||
+ else:
|
||||
+ return 0
|
||||
diff --git a/anteater/module/app_sli_detector.py b/anteater/module/app_sli_detector.py
|
||||
index b69f73c..b63f5e2 100644
|
||||
--- a/anteater/module/app_sli_detector.py
|
||||
+++ b/anteater/module/app_sli_detector.py
|
||||
@@ -20,7 +20,9 @@ import math
|
||||
from typing import List
|
||||
|
||||
from anteater.core.anomaly import Anomaly
|
||||
+from anteater.core.feature import AnomalyTrend
|
||||
from anteater.model.algorithms.spectral_residual import SpectralResidual
|
||||
+from anteater.model.slope import trend
|
||||
from anteater.model.smoother import conv_smooth
|
||||
from anteater.model.three_sigma import three_sigma
|
||||
from anteater.module.detector import Detector
|
||||
@@ -134,10 +136,11 @@ class APPSliDetector(Detector):
|
||||
|
||||
return anomalies
|
||||
|
||||
- def detect_features(self, metrics, machine_id: str, top_n):
|
||||
+ def detect_features(self, machine_id: str, top_n):
|
||||
+ metric_atrend = {f.metric: f.atrend for f in self.features}
|
||||
start, end = dt.last(minutes=6)
|
||||
time_series_list = []
|
||||
- for metric in metrics:
|
||||
+ for metric in metric_atrend.keys():
|
||||
time_series = self.data_loader.get_metric(
|
||||
start, end, metric, label_name='machine_id', label_value=machine_id)
|
||||
time_series_list.extend(time_series)
|
||||
@@ -156,8 +159,16 @@ class APPSliDetector(Detector):
|
||||
if all(x == values[0] for x in values):
|
||||
continue
|
||||
|
||||
+ if trend(time_series.values) < 0 and \
|
||||
+ metric_atrend[time_series.metric] == AnomalyTrend.RISE:
|
||||
+ continue
|
||||
+
|
||||
+ if trend(time_series.values) > 0 and \
|
||||
+ metric_atrend[time_series.metric] == AnomalyTrend.FALL:
|
||||
+ continue
|
||||
+
|
||||
scores = sr_model.compute_score(values)
|
||||
- score = max(scores[-13:])
|
||||
+ score = max(scores[-25:])
|
||||
|
||||
if math.isnan(score) or math.isinf(score):
|
||||
continue
|
||||
@@ -170,9 +181,8 @@ class APPSliDetector(Detector):
|
||||
|
||||
def report(self, anomaly: Anomaly, machine_id: str):
|
||||
"""Reports a single anomaly at each time"""
|
||||
- feature_metrics = [f.metric for f in self.features]
|
||||
description = {f.metric: f.description for f in self.features}
|
||||
- cause_metrics = self.detect_features(feature_metrics, machine_id, top_n=60)
|
||||
+ cause_metrics = self.detect_features(machine_id, top_n=60)
|
||||
cause_metrics = [
|
||||
{'metric': cause[0].metric,
|
||||
'label': cause[0].labels,
|
||||
diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py
|
||||
index 108d5ed..f8ce277 100644
|
||||
--- a/anteater/utils/data_load.py
|
||||
+++ b/anteater/utils/data_load.py
|
||||
@@ -17,7 +17,7 @@ from os import path, sep
|
||||
from json import JSONDecodeError
|
||||
from typing import List, Tuple
|
||||
|
||||
-from anteater.core.feature import Feature
|
||||
+from anteater.core.feature import AnomalyTrend, Feature
|
||||
from anteater.core.kpi import KPI
|
||||
from anteater.utils.log import logger
|
||||
|
||||
@@ -76,7 +76,21 @@ def load_kpi_feature(file_name) -> Tuple[List[KPI], List[Feature]]:
|
||||
raise e
|
||||
|
||||
kpis = [KPI(**param) for param in params.get('KPI')]
|
||||
- features = [Feature(**param) for param in params.get('Features')]
|
||||
+
|
||||
+ features = []
|
||||
+ for param in params.get('Features'):
|
||||
+ parsed_param = {}
|
||||
+ for key, value in param.items():
|
||||
+ if key == 'atrend':
|
||||
+ if value.lower() == 'rise':
|
||||
+ value = AnomalyTrend.RISE
|
||||
+ elif value.lower() == 'fall':
|
||||
+ value = AnomalyTrend.FALL
|
||||
+ else:
|
||||
+ value = AnomalyTrend.DEFAULT
|
||||
+ parsed_param[key] = value
|
||||
+
|
||||
+ features.append(Feature(**parsed_param))
|
||||
|
||||
if duplicated_metric([kpi.metric for kpi in kpis]) or \
|
||||
duplicated_metric([f.metric for f in features]):
|
||||
diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json
|
||||
index 0744416..b7f78b7 100644
|
||||
--- a/config/module/app_sli_rtt.json
|
||||
+++ b/config/module/app_sli_rtt.json
|
||||
@@ -34,19 +34,23 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_block_latency_req_jitter",
|
||||
- "description": "block层request时延抖动异常"
|
||||
+ "description": "block层request时延抖动异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_block_latency_req_last",
|
||||
- "description": "block层request时延最近值异常"
|
||||
+ "description": "block层request时延最近值异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_block_latency_req_max",
|
||||
- "description": "block层request时延最大值异常"
|
||||
+ "description": "block层request时延最大值异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_block_latency_req_sum",
|
||||
- "description": "block层request时延总计值异常"
|
||||
+ "description": "block层request时延总计值异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_cpu_iowait_total_second",
|
||||
@@ -54,11 +58,13 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_cpu_user_total_second",
|
||||
- "description": "用户态cpu占用时间(不包括nice)异常"
|
||||
+ "description": "用户态cpu占用时间(不包括nice)异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_cpu_total_used_per",
|
||||
- "description": "CPU总利用率异常"
|
||||
+ "description": "CPU总利用率异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_cpu_backlog_drops",
|
||||
@@ -86,7 +92,8 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_r_await",
|
||||
- "description": "读响应时间异常"
|
||||
+ "description": "读响应时间异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_rareq",
|
||||
@@ -94,19 +101,23 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_rspeed",
|
||||
- "description": "读速率(IOPS)异常"
|
||||
+ "description": "读速率(IOPS)异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_rspeed_kB",
|
||||
- "description": "读吞吐量异常"
|
||||
+ "description": "读吞吐量异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_util",
|
||||
- "description": "磁盘使用率异常"
|
||||
+ "description": "磁盘使用率异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_w_await",
|
||||
- "description": "写响应时间异常"
|
||||
+ "description": "写响应时间异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_wareq",
|
||||
@@ -114,19 +125,23 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_wspeed",
|
||||
- "description": "写速率(IOPS)异常"
|
||||
+ "description": "写速率(IOPS)异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_disk_wspeed_kB",
|
||||
- "description": "写吞吐量异常"
|
||||
+ "description": "写吞吐量异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_proc_read_bytes",
|
||||
- "description": "进程实际从磁盘读取的字节数异常"
|
||||
+ "description": "进程实际从磁盘读取的字节数异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_proc_write_bytes",
|
||||
- "description": "进程实际从磁盘写入的字节数异常"
|
||||
+ "description": "进程实际从磁盘写入的字节数异常",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_net_tcp_retrans_segs",
|
||||
@@ -134,7 +149,12 @@
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_tcp_link_lost_out",
|
||||
- "description": "TPC丢包数异常"
|
||||
+ "description": "TCP丢包数异常"
|
||||
+ },
|
||||
+ {
|
||||
+ "metric": "gala_gopher_tcp_link_srtt",
|
||||
+ "description": "TCP超时",
|
||||
+ "atrend": "rise"
|
||||
},
|
||||
{
|
||||
"metric": "gala_gopher_tcp_link_notack_bytes",
|
||||
--
|
||||
2.37.0.windows.1
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
Name: gala-anteater
|
||||
Version: 1.0.0
|
||||
Release: 1
|
||||
Release: 2
|
||||
Summary: A time-series anomaly detection platform for operating system.
|
||||
License: MulanPSL2
|
||||
URL: https://gitee.com/openeuler/gala-anteater
|
||||
@ -11,6 +11,9 @@ BuildRoot: %{_builddir}/%{name}-%{version}
|
||||
BuildRequires: procps-ng python3-setuptools
|
||||
Requires: python3-gala-anteater = %{version}-%{release}
|
||||
|
||||
patch0: update_sys_io_latency_detector_model.patch
|
||||
patch1: add_metrics_anomaly_trends_indicator.patch
|
||||
|
||||
%description
|
||||
Abnormal detection module for A-Ops project
|
||||
|
||||
@ -23,7 +26,7 @@ Requires: python3-pandas python3-requests python3-scikit-learn python3-py
|
||||
Python3 package of gala-anteater
|
||||
|
||||
%prep
|
||||
%setup -q
|
||||
%autosetup -n %{name}-%{version} -p1
|
||||
|
||||
%build
|
||||
%py3_build
|
||||
@ -56,5 +59,8 @@ Python3 package of gala-anteater
|
||||
|
||||
|
||||
%changelog
|
||||
* Tue Nov 22 2022 Li Zhenxing <lizhenxing11@huawei.com> - 1.0.0-2
|
||||
- Updates anomaly detection model and imporves cause inference result
|
||||
|
||||
* Sat Nov 12 2022 Zhen Chen <chenzhen126@huawei.com> - 1.0.0-1
|
||||
- Package init
|
||||
|
||||
1215
update_sys_io_latency_detector_model.patch
Normal file
1215
update_sys_io_latency_detector_model.patch
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user