!7 [patch] Updates the Model and Improves Cause Inference Result

From: @li-zhenxing2022 
Reviewed-by: @dowzyx 
Signed-off-by: @dowzyx
This commit is contained in:
openeuler-ci-bot 2022-11-22 09:32:50 +00:00 committed by Gitee
commit c03bdc5753
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
3 changed files with 1507 additions and 2 deletions

View File

@ -0,0 +1,284 @@
From ece4a0551bd81f64158ab465a865e31e97b63562 Mon Sep 17 00:00:00 2001
From: lizhenxing11 <lizhenxing11@huawei.com>
Date: Mon, 21 Nov 2022 14:54:20 +0800
Subject: [PATCH 2/2] Add Metrics Anomaly Trends Indicator
update config file
---
anteater/core/feature.py | 8 +++++
anteater/model/slope.py | 15 +++++++++
anteater/module/app_sli_detector.py | 20 ++++++++---
anteater/utils/data_load.py | 18 ++++++++--
config/module/app_sli_rtt.json | 52 ++++++++++++++++++++---------
5 files changed, 90 insertions(+), 23 deletions(-)
diff --git a/anteater/core/feature.py b/anteater/core/feature.py
index 306d835..6db764d 100644
--- a/anteater/core/feature.py
+++ b/anteater/core/feature.py
@@ -12,6 +12,13 @@
# ******************************************************************************/
from dataclasses import dataclass
+from enum import Enum
+
+
+class AnomalyTrend(Enum):
+ DEFAULT = 0
+ RISE = 1
+ FALL = 2
@dataclass
@@ -19,3 +26,4 @@ class Feature:
metric: str
description: str
priority: int = 0
+ atrend: AnomalyTrend = AnomalyTrend.DEFAULT
diff --git a/anteater/model/slope.py b/anteater/model/slope.py
index 422d6bc..08c4211 100644
--- a/anteater/model/slope.py
+++ b/anteater/model/slope.py
@@ -29,3 +29,18 @@ def smooth_slope(time_series, windows_length):
val = conv_smooth(time_series.to_df(), box_pts=13)
val = slope(val, win_len=13)
return val[-windows_length:]
+
+
+def trend(y, win_len=None):
+ """Gets the trend for the y"""
+ if not win_len:
+ win_len = len(y) // 2
+
+ if np.mean(y[:win_len]) < np.mean(y[-win_len:]):
+ return 1
+
+ elif np.mean(y[:win_len]) > np.mean(y[-win_len:]):
+ return -1
+
+ else:
+ return 0
diff --git a/anteater/module/app_sli_detector.py b/anteater/module/app_sli_detector.py
index b69f73c..b63f5e2 100644
--- a/anteater/module/app_sli_detector.py
+++ b/anteater/module/app_sli_detector.py
@@ -20,7 +20,9 @@ import math
from typing import List
from anteater.core.anomaly import Anomaly
+from anteater.core.feature import AnomalyTrend
from anteater.model.algorithms.spectral_residual import SpectralResidual
+from anteater.model.slope import trend
from anteater.model.smoother import conv_smooth
from anteater.model.three_sigma import three_sigma
from anteater.module.detector import Detector
@@ -134,10 +136,11 @@ class APPSliDetector(Detector):
return anomalies
- def detect_features(self, metrics, machine_id: str, top_n):
+ def detect_features(self, machine_id: str, top_n):
+ metric_atrend = {f.metric: f.atrend for f in self.features}
start, end = dt.last(minutes=6)
time_series_list = []
- for metric in metrics:
+ for metric in metric_atrend.keys():
time_series = self.data_loader.get_metric(
start, end, metric, label_name='machine_id', label_value=machine_id)
time_series_list.extend(time_series)
@@ -156,8 +159,16 @@ class APPSliDetector(Detector):
if all(x == values[0] for x in values):
continue
+ if trend(time_series.values) < 0 and \
+ metric_atrend[time_series.metric] == AnomalyTrend.RISE:
+ continue
+
+ if trend(time_series.values) > 0 and \
+ metric_atrend[time_series.metric] == AnomalyTrend.FALL:
+ continue
+
scores = sr_model.compute_score(values)
- score = max(scores[-13:])
+ score = max(scores[-25:])
if math.isnan(score) or math.isinf(score):
continue
@@ -170,9 +181,8 @@ class APPSliDetector(Detector):
def report(self, anomaly: Anomaly, machine_id: str):
"""Reports a single anomaly at each time"""
- feature_metrics = [f.metric for f in self.features]
description = {f.metric: f.description for f in self.features}
- cause_metrics = self.detect_features(feature_metrics, machine_id, top_n=60)
+ cause_metrics = self.detect_features(machine_id, top_n=60)
cause_metrics = [
{'metric': cause[0].metric,
'label': cause[0].labels,
diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py
index 108d5ed..f8ce277 100644
--- a/anteater/utils/data_load.py
+++ b/anteater/utils/data_load.py
@@ -17,7 +17,7 @@ from os import path, sep
from json import JSONDecodeError
from typing import List, Tuple
-from anteater.core.feature import Feature
+from anteater.core.feature import AnomalyTrend, Feature
from anteater.core.kpi import KPI
from anteater.utils.log import logger
@@ -76,7 +76,21 @@ def load_kpi_feature(file_name) -> Tuple[List[KPI], List[Feature]]:
raise e
kpis = [KPI(**param) for param in params.get('KPI')]
- features = [Feature(**param) for param in params.get('Features')]
+
+ features = []
+ for param in params.get('Features'):
+ parsed_param = {}
+ for key, value in param.items():
+ if key == 'atrend':
+ if value.lower() == 'rise':
+ value = AnomalyTrend.RISE
+ elif value.lower() == 'fall':
+ value = AnomalyTrend.FALL
+ else:
+ value = AnomalyTrend.DEFAULT
+ parsed_param[key] = value
+
+ features.append(Feature(**parsed_param))
if duplicated_metric([kpi.metric for kpi in kpis]) or \
duplicated_metric([f.metric for f in features]):
diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json
index 0744416..b7f78b7 100644
--- a/config/module/app_sli_rtt.json
+++ b/config/module/app_sli_rtt.json
@@ -34,19 +34,23 @@
},
{
"metric": "gala_gopher_block_latency_req_jitter",
- "description": "block层request时延抖动异常"
+ "description": "block层request时延抖动异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_block_latency_req_last",
- "description": "block层request时延最近值异常"
+ "description": "block层request时延最近值异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_block_latency_req_max",
- "description": "block层request时延最大值异常"
+ "description": "block层request时延最大值异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_block_latency_req_sum",
- "description": "block层request时延总计值异常"
+ "description": "block层request时延总计值异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_cpu_iowait_total_second",
@@ -54,11 +58,13 @@
},
{
"metric": "gala_gopher_cpu_user_total_second",
- "description": "用户态cpu占用时间不包括nice异常"
+ "description": "用户态cpu占用时间不包括nice异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_cpu_total_used_per",
- "description": "CPU总利用率异常"
+ "description": "CPU总利用率异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_cpu_backlog_drops",
@@ -86,7 +92,8 @@
},
{
"metric": "gala_gopher_disk_r_await",
- "description": "读响应时间异常"
+ "description": "读响应时间异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_rareq",
@@ -94,19 +101,23 @@
},
{
"metric": "gala_gopher_disk_rspeed",
- "description": "读速率IOPS异常"
+ "description": "读速率IOPS异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_rspeed_kB",
- "description": "读吞吐量异常"
+ "description": "读吞吐量异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_util",
- "description": "磁盘使用率异常"
+ "description": "磁盘使用率异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_w_await",
- "description": "写响应时间异常"
+ "description": "写响应时间异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_wareq",
@@ -114,19 +125,23 @@
},
{
"metric": "gala_gopher_disk_wspeed",
- "description": "写速率IOPS异常"
+ "description": "写速率IOPS异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_disk_wspeed_kB",
- "description": "写吞吐量异常"
+ "description": "写吞吐量异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_proc_read_bytes",
- "description": "进程实际从磁盘读取的字节数异常"
+ "description": "进程实际从磁盘读取的字节数异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_proc_write_bytes",
- "description": "进程实际从磁盘写入的字节数异常"
+ "description": "进程实际从磁盘写入的字节数异常",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_net_tcp_retrans_segs",
@@ -134,7 +149,12 @@
},
{
"metric": "gala_gopher_tcp_link_lost_out",
- "description": "TPC丢包数异常"
+ "description": "TCP丢包数异常"
+ },
+ {
+ "metric": "gala_gopher_tcp_link_srtt",
+ "description": "TCP超时",
+ "atrend": "rise"
},
{
"metric": "gala_gopher_tcp_link_notack_bytes",
--
2.37.0.windows.1

View File

@ -2,7 +2,7 @@
Name: gala-anteater
Version: 1.0.0
Release: 1
Release: 2
Summary: A time-series anomaly detection platform for operating system.
License: MulanPSL2
URL: https://gitee.com/openeuler/gala-anteater
@ -11,6 +11,9 @@ BuildRoot: %{_builddir}/%{name}-%{version}
BuildRequires: procps-ng python3-setuptools
Requires: python3-gala-anteater = %{version}-%{release}
patch0: update_sys_io_latency_detector_model.patch
patch1: add_metrics_anomaly_trends_indicator.patch
%description
Abnormal detection module for A-Ops project
@ -23,7 +26,7 @@ Requires: python3-pandas python3-requests python3-scikit-learn python3-py
Python3 package of gala-anteater
%prep
%setup -q
%autosetup -n %{name}-%{version} -p1
%build
%py3_build
@ -56,5 +59,8 @@ Python3 package of gala-anteater
%changelog
* Tue Nov 22 2022 Li Zhenxing <lizhenxing11@huawei.com> - 1.0.0-2
- Updates anomaly detection model and imporves cause inference result
* Sat Nov 12 2022 Zhen Chen <chenzhen126@huawei.com> - 1.0.0-1
- Package init

File diff suppressed because it is too large Load Diff