From 27bb7cdd80f76bfc7ebb0f3041544740aa2fa91b Mon Sep 17 00:00:00 2001 From: lizhenxing11 Date: Tue, 10 Jan 2023 15:31:44 +0800 Subject: [PATCH] fix str2enum bug & data query refactor --- anteater/core/anomaly.py | 10 ++++ anteater/core/kpi.py | 14 ++++++ anteater/model/algorithms/slope.py | 11 +++-- anteater/model/detector/base.py | 20 ++++---- anteater/model/detector/n_sigma_detector.py | 15 +++--- .../model/detector/online_vae_detector.py | 3 +- .../tcp_establish_n_sigma_detector.py | 3 +- .../tcp_trans_latency_n_sigma_detector.py | 48 +++++++++++++++++-- anteater/model/detector/th_base_detector.py | 3 +- anteater/module/app/app_sli_detector.py | 4 +- anteater/module/sys/disk_throughput.py | 4 +- anteater/module/sys/proc_io_latency.py | 4 +- anteater/module/sys/sys_io_latency.py | 4 +- .../module/sys/tcp_transmission_latency.py | 4 +- .../module/sys/tcp_transmission_throughput.py | 4 +- anteater/source/metric_loader.py | 41 +++++++++++++++- anteater/utils/data_load.py | 4 +- config/module/app_sli_rtt.json | 6 ++- config/module/disk_throughput.json | 6 ++- config/module/proc_io_latency.json | 15 ++++-- config/module/sys_io_latency.json | 2 +- config/module/sys_tcp_establish.json | 2 +- .../module/sys_tcp_transmission_latency.json | 4 +- 23 files changed, 172 insertions(+), 59 deletions(-) diff --git a/anteater/core/anomaly.py b/anteater/core/anomaly.py index 45c4fc3..fdee3d1 100644 --- a/anteater/core/anomaly.py +++ b/anteater/core/anomaly.py @@ -52,3 +52,13 @@ class AnomalyTrend(Enum): DEFAULT = 0 RISE = 1 FALL = 2 + + @staticmethod + def from_str(label: str): + """Trans str to Enum type""" + if label.upper() == 'RISE': + return AnomalyTrend.RISE + elif label.upper() == 'FALL': + return AnomalyTrend.FALL + else: + return AnomalyTrend.DEFAULT diff --git a/anteater/core/kpi.py b/anteater/core/kpi.py index f83b666..70cc9ee 100644 --- a/anteater/core/kpi.py +++ b/anteater/core/kpi.py @@ -27,6 +27,13 @@ class KPI: params: dict = field(default=dict) atrend: AnomalyTrend = AnomalyTrend.DEFAULT + @classmethod + def from_dict(cls, **data): + if 'atrend' in data: + data['atrend'] = AnomalyTrend.from_str(data.get('atrend')) + + return cls(**data) + @dataclass class Feature: @@ -35,6 +42,13 @@ class Feature: priority: int = 0 atrend: AnomalyTrend = AnomalyTrend.DEFAULT + @classmethod + def from_dict(cls, **data): + if 'atrend' in data: + data['atrend'] = AnomalyTrend.from_str(data.get('atrend')) + + return cls(**data) + @dataclass class ModelConfig: diff --git a/anteater/model/algorithms/slope.py b/anteater/model/algorithms/slope.py index d324d58..e546183 100644 --- a/anteater/model/algorithms/slope.py +++ b/anteater/model/algorithms/slope.py @@ -17,6 +17,7 @@ import numpy as np from anteater.core.anomaly import AnomalyTrend from anteater.model.algorithms.smooth import conv_smooth +from anteater.utils.common import divide def slope(y, win_len): @@ -36,13 +37,15 @@ def smooth_slope(time_series, windows_length): def trend(y, win_len=None): """Gets the trend for the y""" + y = conv_smooth(y, box_pts=7) + if not win_len: win_len = len(y) // 2 - if np.mean(y[:win_len]) < np.mean(y[-win_len:]): + if divide(np.mean(y[:win_len]), np.mean(y[-win_len:])) < 0.9: return 1 - elif np.mean(y[:win_len]) > np.mean(y[-win_len:]): + elif divide(np.mean(y[:win_len]), np.mean(y[-win_len:])) > 1.1: return -1 else: @@ -51,10 +54,10 @@ def trend(y, win_len=None): def check_trend(values: List[float], atrend: AnomalyTrend): """Checks the values with an 'atrend' trend""" - if atrend == AnomalyTrend.RISE and trend(values) < 0: + if atrend == AnomalyTrend.RISE and trend(values) != 1: return False - if atrend == AnomalyTrend.FALL and trend(values) > 0: + if atrend == AnomalyTrend.FALL and trend(values) != -1: return False return True diff --git a/anteater/model/detector/base.py b/anteater/model/detector/base.py index 2b2dafe..a23b6d9 100644 --- a/anteater/model/detector/base.py +++ b/anteater/model/detector/base.py @@ -11,6 +11,7 @@ # See the Mulan PSL v2 for more details. # ******************************************************************************/ +import logging import math from abc import abstractmethod from typing import List @@ -39,12 +40,6 @@ class Detector: """Executes anomaly detection on kpis""" pass - def get_unique_machine_id(self, start, end, kpis: List[KPI]) -> List[str]: - """Gets unique machine ids during past minutes""" - metrics = [_kpi.metric for _kpi in kpis] - machine_ids = self.data_loader.get_unique_machines(start, end, metrics) - return machine_ids - def execute(self, job_config: JobConfig) -> List[Anomaly]: """The main function of the detector""" kpis = job_config.kpis @@ -56,6 +51,12 @@ class Detector: return self._execute(kpis, features, top_n=n) + def get_unique_machine_id(self, start, end, kpis: List[KPI]) -> List[str]: + """Gets unique machine ids during past minutes""" + metrics = [_kpi.metric for _kpi in kpis] + machine_ids = self.data_loader.get_unique_machines(start, end, metrics) + return machine_ids + def find_root_causes(self, anomalies: List[Anomaly], features: List[Feature], top_n=3)\ -> List[Anomaly]: """Finds root causes for each anomaly events""" @@ -82,6 +83,7 @@ class Detector: tmp_ts_scores = self.cal_anomaly_score(f.metric, f.description, machine_id=machine_id) for _ts_score in tmp_ts_scores: if not check_trend(_ts_score.ts.values, f.atrend): + logging.info(f"Trends Filtered: {f.metric}") _ts_score.score = 0 if same_intersection_key_value(_ts_score.ts.labels, filters): ts_scores.append(_ts_score) @@ -101,6 +103,7 @@ class Detector: for _ts_s in ts_scores: if same_intersection_key_value(_ts_s.ts.labels, anomaly.labels): if not check_trend(_ts_s.ts.values, kpi_atrends[anomaly.metric]): + logging.info(f"Trends Filtered: {anomaly.metric}") anomaly.score = 0 else: anomaly.score = _ts_s.score @@ -115,12 +118,11 @@ class Detector: machine_id: str)\ -> List[TimeSeriesScore]: """Calculates metric anomaly scores based on sr model""" - start, end = dt.last(minutes=6) + start, end = dt.last(minutes=10) point_count = self.data_loader.expected_point_length(start, end) model = SpectralResidual(12, 24, 50) ts_scores = [] - ts_list = self.data_loader.\ - get_metric(start, end, metric, label_name='machine_id', label_value=machine_id) + ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id) for _ts in ts_list: if sum(_ts.values) == 0 or \ len(_ts.values) < point_count * 0.9 or\ diff --git a/anteater/model/detector/n_sigma_detector.py b/anteater/model/detector/n_sigma_detector.py index 3a2ab01..dbf83c6 100644 --- a/anteater/model/detector/n_sigma_detector.py +++ b/anteater/model/detector/n_sigma_detector.py @@ -29,10 +29,9 @@ from anteater.utils.log import logger class NSigmaDetector(Detector): """The three sigma anomaly detector""" - def __init__(self, data_loader: MetricLoader, method: str): + def __init__(self, data_loader: MetricLoader): """The detector base class initializer""" super().__init__(data_loader) - self.method = method def detect_kpis(self, kpis: List[KPI]): """Executes anomaly detection on kpis""" @@ -48,7 +47,7 @@ class NSigmaDetector(Detector): def detect_signal_kpi(self, kpi, machine_id: str) -> List[Anomaly]: """Detects kpi based on signal time series anomaly detection model""" outlier_ratio_th = kpi.params['outlier_ratio_th'] - ts_scores = self.calculate_metric_three_sigma_score( + ts_scores = self.calculate_n_sigma_score( kpi.metric, kpi.description, machine_id, **kpi.params) if not ts_scores: logger.warning(f'Key metric {kpi.metric} is null on the target machine {machine_id}!') @@ -68,17 +67,17 @@ class NSigmaDetector(Detector): return anomalies - def calculate_metric_three_sigma_score(self, metric, description, machine_id: str, **kwargs)\ + def calculate_n_sigma_score(self, metric, description, machine_id: str, **kwargs)\ -> List[TimeSeriesScore]: """Calculate kpi anomaly scores based on three sigma scores""" + method = kwargs.get('method', 'abs') look_back = kwargs.get('look_back') smooth_params = kwargs.get('smooth_params') obs_size = kwargs.get('obs_size') n = kwargs.get('n', 3) start, end = dt.last(minutes=look_back) point_count = self.data_loader.expected_point_length(start, end) - ts_list = self.data_loader.\ - get_metric(start, end, metric, label_name='machine_id', label_value=machine_id) + ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id) ts_scores = [] for _ts in ts_list: dedup_values = [k for k, g in groupby(_ts.values)] @@ -87,12 +86,12 @@ class NSigmaDetector(Detector): len(_ts.values) > point_count * 1.5 or \ all(x == _ts.values[0] for x in _ts.values): ratio = 0 - elif len(dedup_values) < point_count * 0.3: + elif len(dedup_values) < point_count * 0.6: ratio = 0 else: smoothed_val = smoothing(_ts.values, **smooth_params) outlier, mean, std = n_sigma( - smoothed_val, obs_size=obs_size, n=n, method=self.method) + smoothed_val, obs_size=obs_size, n=n, method=method) ratio = divide(len(outlier), obs_size) ts_scores.append(TimeSeriesScore(ts=_ts, score=ratio, description=description)) diff --git a/anteater/model/detector/online_vae_detector.py b/anteater/model/detector/online_vae_detector.py index 63a7b09..0f91576 100644 --- a/anteater/model/detector/online_vae_detector.py +++ b/anteater/model/detector/online_vae_detector.py @@ -110,8 +110,7 @@ class OnlineVAEDetector(Detector): metric_dfs = [] for metric in metrics: _ts_list = self.data_loader.\ - get_metric(start, end, metric, label_name="machine_id", - label_value=machine_id, operator_name='avg') + get_metric(start, end, metric, operator='avg', keys="machine_id", machine_id=machine_id) if len(_ts_list) > 1: raise ValueError(f'Got multiple time_series based on machine id: {len(_ts_list)}') diff --git a/anteater/model/detector/tcp_establish_n_sigma_detector.py b/anteater/model/detector/tcp_establish_n_sigma_detector.py index 82d7837..3720069 100644 --- a/anteater/model/detector/tcp_establish_n_sigma_detector.py +++ b/anteater/model/detector/tcp_establish_n_sigma_detector.py @@ -73,8 +73,7 @@ class TcpEstablishNSigmaDetector(Detector): min_rtt = kpi.params.get('min_rtt') start, end = dt.last(minutes=look_back) - ts_list = self.data_loader.\ - get_metric(start, end, kpi.metric, label_name='machine_id', label_value=machine_id) + ts_list = self.data_loader.get_metric(start, end, kpi.metric, machine_id=machine_id) anomalies = [] for _ts in ts_list: diff --git a/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py b/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py index 1eeb95f..6d41775 100644 --- a/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py +++ b/anteater/model/detector/tcp_trans_latency_n_sigma_detector.py @@ -11,20 +11,61 @@ # See the Mulan PSL v2 for more details. # ******************************************************************************/ +from itertools import groupby from typing import List +import numpy as np + from anteater.core.time_series import TimeSeriesScore +from anteater.model.algorithms.smooth import smoothing +from anteater.model.algorithms.three_sigma import n_sigma from anteater.model.detector.n_sigma_detector import NSigmaDetector from anteater.source.metric_loader import MetricLoader +from anteater.utils.common import divide from anteater.utils.datetime import DateTimeManager as dt class TcpTransLatencyNSigmaDetector(NSigmaDetector): """The three sigma anomaly detector""" - def __init__(self, data_loader: MetricLoader, method: str): + def __init__(self, data_loader: MetricLoader): """The detector base class initializer""" - super().__init__(data_loader, method) + super().__init__(data_loader) + + def calculate_n_sigma_score(self, metric, description, machine_id: str, **kwargs)\ + -> List[TimeSeriesScore]: + """Calculates anomaly scores based on n sigma scores""" + method = kwargs.get('method', 'abs') + look_back = kwargs.get('look_back') + smooth_params = kwargs.get('smooth_params') + obs_size = kwargs.get('obs_size') + min_srtt = kwargs.get("min_srtt") + n = kwargs.get('n', 3) + start, end = dt.last(minutes=look_back) + point_count = self.data_loader.expected_point_length(start, end) + ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id) + ts_scores = [] + for _ts in ts_list: + dedup_values = [k for k, g in groupby(_ts.values)] + if sum(_ts.values) == 0 or \ + len(_ts.values) < point_count * 0.6 or \ + len(_ts.values) > point_count * 1.5 or \ + all(x == _ts.values[0] for x in _ts.values): + ratio = 0 + elif len(dedup_values) < point_count * 0.6: + ratio = 0 + else: + smoothed_val = smoothing(_ts.values, **smooth_params) + outlier, mean, std = n_sigma( + smoothed_val, obs_size=obs_size, n=n, method=method) + if outlier and np.average(outlier) <= min_srtt: + ratio = 0 + else: + ratio = divide(len(outlier), obs_size) + + ts_scores.append(TimeSeriesScore(ts=_ts, score=ratio, description=description)) + + return ts_scores def cal_anomaly_score(self, metric, description, machine_id: str) \ -> List[TimeSeriesScore]: @@ -32,8 +73,7 @@ class TcpTransLatencyNSigmaDetector(NSigmaDetector): start, end = dt.last(minutes=2) point_count = self.data_loader.expected_point_length(start, end) ts_scores = [] - ts_list = self.data_loader. \ - get_metric(start, end, metric, label_name='machine_id', label_value=machine_id) + ts_list = self.data_loader.get_metric(start, end, metric, machine_id=machine_id) for _ts in ts_list: if sum(_ts.values) == 0 or \ len(_ts.values) < point_count * 0.5 or \ diff --git a/anteater/model/detector/th_base_detector.py b/anteater/model/detector/th_base_detector.py index bec9705..0af4f22 100644 --- a/anteater/model/detector/th_base_detector.py +++ b/anteater/model/detector/th_base_detector.py @@ -44,8 +44,7 @@ class ThBaseDetector(Detector): look_back = kpi.params.get('look_back') th = kpi.params.get('th') start, end = dt.last(minutes=look_back) - ts_list = self.data_loader.\ - get_metric(start, end, kpi.metric, label_name='machine_id', label_value=machine_id) + ts_list = self.data_loader.get_metric(start, end, kpi.metric, machine_id=machine_id) if not ts_list: logger.warning(f'Key metric {kpi.metric} is null on the target machine {machine_id}!') diff --git a/anteater/module/app/app_sli_detector.py b/anteater/module/app/app_sli_detector.py index 102ed11..e506332 100644 --- a/anteater/module/app/app_sli_detector.py +++ b/anteater/module/app/app_sli_detector.py @@ -44,12 +44,12 @@ class APPSliDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - NSigmaDetector(data_loader, method='min'), + NSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - NSigmaDetector(data_loader, method='min') + NSigmaDetector(data_loader) ] return detectors diff --git a/anteater/module/sys/disk_throughput.py b/anteater/module/sys/disk_throughput.py index 9a192fb..7971505 100644 --- a/anteater/module/sys/disk_throughput.py +++ b/anteater/module/sys/disk_throughput.py @@ -38,12 +38,12 @@ class DiskThroughputDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - NSigmaDetector(data_loader, method='max'), + NSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - NSigmaDetector(data_loader, method='max') + NSigmaDetector(data_loader) ] return detectors diff --git a/anteater/module/sys/proc_io_latency.py b/anteater/module/sys/proc_io_latency.py index a34c48d..b76acea 100644 --- a/anteater/module/sys/proc_io_latency.py +++ b/anteater/module/sys/proc_io_latency.py @@ -38,12 +38,12 @@ class ProcIOLatencyDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - NSigmaDetector(data_loader, method='max'), + NSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - NSigmaDetector(data_loader, method='max') + NSigmaDetector(data_loader) ] return detectors diff --git a/anteater/module/sys/sys_io_latency.py b/anteater/module/sys/sys_io_latency.py index a6f01c2..17a34c9 100644 --- a/anteater/module/sys/sys_io_latency.py +++ b/anteater/module/sys/sys_io_latency.py @@ -38,12 +38,12 @@ class SysIOLatencyDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - NSigmaDetector(data_loader, method='abs'), + NSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - NSigmaDetector(data_loader, method='abs') + NSigmaDetector(data_loader) ] return detectors diff --git a/anteater/module/sys/tcp_transmission_latency.py b/anteater/module/sys/tcp_transmission_latency.py index cf0f406..e085ec3 100644 --- a/anteater/module/sys/tcp_transmission_latency.py +++ b/anteater/module/sys/tcp_transmission_latency.py @@ -39,12 +39,12 @@ class SysTcpTransmissionLatencyDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - TcpTransLatencyNSigmaDetector(data_loader, method='max'), + TcpTransLatencyNSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - TcpTransLatencyNSigmaDetector(data_loader, method='max') + TcpTransLatencyNSigmaDetector(data_loader) ] return detectors diff --git a/anteater/module/sys/tcp_transmission_throughput.py b/anteater/module/sys/tcp_transmission_throughput.py index 86ecc9e..2921602 100644 --- a/anteater/module/sys/tcp_transmission_throughput.py +++ b/anteater/module/sys/tcp_transmission_throughput.py @@ -38,12 +38,12 @@ class SysTcpTransmissionThroughputDetector(E2EDetector): def init_detectors(self, data_loader): if self.job_config.model_config.enable: detectors = [ - NSigmaDetector(data_loader, method='abs'), + NSigmaDetector(data_loader), OnlineVAEDetector(data_loader, self.job_config.model_config) ] else: detectors = [ - NSigmaDetector(data_loader, method='abs') + NSigmaDetector(data_loader) ] return detectors diff --git a/anteater/source/metric_loader.py b/anteater/source/metric_loader.py index ef2d012..4745d87 100644 --- a/anteater/source/metric_loader.py +++ b/anteater/source/metric_loader.py @@ -65,6 +65,43 @@ def get_query(metric: str, return query +def get_query2( + metric: str, operator: str = None, value: float = None, keys: Union[str, List] = None, **labels): + """Gets aggregated query patterns + + Format: [operator]([value,] metric{[**labels]}) by (keys) + + Such as: + - 1. gala_gopher_bind_sends{machine_id="1234"} + - 2. sum(gala_gopher_bind_sends) by (machine_id) + - 2. sum(gala_gopher_bind_sends) by (machine_id) + - 3. sum(gala_gopher_bind_sends{machine_id="1234"}) by (machine_id) + - 4. quantile(0.7, gala_gopher_bind_sends{machine_id="1234"}) by (machine_id) + """ + if operator and not keys: + raise ValueError("Please provide param 'keys' when specified 'operator'!") + + rule = "" + if labels: + pairs = ",".join([f"{n}='{v}'" for n, v in labels.items()]) + rule = f"{{{pairs}}}" + + group = "" + if isinstance(keys, list): + group = ",".join([k for k in keys]) + elif isinstance(keys, str): + group = keys + + if operator and value: + query = f"{operator}({value}, {metric}{rule}) by ({group})" + elif operator: + query = f"{operator}({metric}{rule}) by ({group})" + else: + query = f"{metric}{rule}" + + return query + + class MetricLoader: """ The metric loader that consumes raw data from PrometheusAdapter, @@ -87,7 +124,7 @@ class MetricLoader: :return List of TimeSeries """ - query = get_query(metric, **kwargs) + query = get_query2(metric, **kwargs) time_series = self.provider.range_query(start, end, metric, query) return time_series @@ -109,7 +146,7 @@ class MetricLoader: """Gets unique labels of all metrics""" unique_labels = set() for metric in metrics: - time_series = self.get_metric(start, end, metric, label_name=label_name) + time_series = self.get_metric(start, end, metric) unique_labels.update([item.labels.get(label_name, "") for item in time_series]) return list([lbl for lbl in unique_labels if lbl]) diff --git a/anteater/utils/data_load.py b/anteater/utils/data_load.py index 730c9c6..60c28e5 100644 --- a/anteater/utils/data_load.py +++ b/anteater/utils/data_load.py @@ -48,8 +48,8 @@ def load_job_config(file_name) -> JobConfig: keywords = config['keywords'] root_cause_number = config['root_cause_number'] - kpis = [KPI(**update_description(_conf)) for _conf in config['KPI']] - features = [Feature(**update_description(_conf)) for _conf in config['Features']] + kpis = [KPI.from_dict(**update_description(_conf)) for _conf in config['KPI']] + features = [Feature.from_dict(**update_description(_conf)) for _conf in config['Features']] model_config = None if 'OnlineModel' in config: diff --git a/config/module/app_sli_rtt.json b/config/module/app_sli_rtt.json index 0146883..5027b8d 100644 --- a/config/module/app_sli_rtt.json +++ b/config/module/app_sli_rtt.json @@ -10,13 +10,14 @@ "metric": "gala_gopher_sli_rtt_nsec", "kpi_type": "rtt", "entity_name": "sli", - "enable": false, + "enable": true, "description": "sli rtt 异常", "description-zh": "应用级请求往返时延(RTT)异常", "params": { + "method": "max", "look_back": 10, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.5, "smooth_params": { "method": "conv_smooth", "box_pts": 3 @@ -31,6 +32,7 @@ "description": "sli tps 异常", "description-zh": "应用级请求吞吐量(TPS)异常", "params": { + "method": "min", "look_back": 10, "obs_size": 25, "outlier_ratio_th": 0.3, diff --git a/config/module/disk_throughput.json b/config/module/disk_throughput.json index f6244f6..e3bcf68 100644 --- a/config/module/disk_throughput.json +++ b/config/module/disk_throughput.json @@ -14,9 +14,10 @@ "description": "Disk read await time is increasing!", "description-zh": "磁盘读响应时间升高,性能发生劣化", "params": { + "method": "max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.5, "smooth_params": { "method": "conv_smooth", "box_pts": 3 @@ -31,9 +32,10 @@ "description": "Disk write await time is increasing!", "description-zh": "磁盘写响应时间升高,性能发生劣化", "params": { + "method": "max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.5, "smooth_params": { "method": "conv_smooth", "box_pts": 3 diff --git a/config/module/proc_io_latency.json b/config/module/proc_io_latency.json index f086b87..171c5f4 100644 --- a/config/module/proc_io_latency.json +++ b/config/module/proc_io_latency.json @@ -14,9 +14,10 @@ "description": "I/O operation delay at the BIO layer (unit: us)", "description-zh": "BIO层I/O操作延时高(单位:us)", "params": { + "method":"max", "look_back": 20, "obs_size": 37, - "outlier_ratio_th": 0.4, + "outlier_ratio_th": 0.5, "smooth_params": { "method": "conv_smooth", "box_pts": 3 @@ -31,9 +32,10 @@ "description": "Number of small I/O (less than 4 KB) read operations at the BIO layer.", "description-zh": "BIO层小数据I/O读操作数量异常(小于4KB)", "params": { + "method":"max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.4, "smooth_params": { "method": "conv_smooth", "box_pts": 3 @@ -48,9 +50,10 @@ "description": "Number of small I/O (less than 4 KB) write operations at the BIO layer.", "description-zh": "BIO层小数据I/O写操作数量异常(小于4KB)", "params": { + "method":"max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.4, "smooth_params": { "method": "savgol_smooth", "window_length": 13, @@ -66,9 +69,10 @@ "description": "Number of big I/O (greater than 4 KB) read operations at the BIO layer.", "description-zh": "BIO层大数据I/O读操作数量异常(大于4KB)", "params": { + "method":"max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.4, "smooth_params": { "method": "conv_smooth", "box_pts": 3 @@ -83,9 +87,10 @@ "description": "Number of big I/O (greater than 4 KB) write operations at the BIO layer.", "description-zh": "BIO层大数据写操作数量异常(大于4KB)", "params": { + "method":"max", "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.4, "smooth_params": { "method": "conv_smooth", "box_pts": 3 diff --git a/config/module/sys_io_latency.json b/config/module/sys_io_latency.json index bdf17d3..3fa1266 100644 --- a/config/module/sys_io_latency.json +++ b/config/module/sys_io_latency.json @@ -16,7 +16,7 @@ "params": { "look_back": 20, "obs_size": 25, - "outlier_ratio_th": 0.3, + "outlier_ratio_th": 0.4, "smooth_params": { "method": "conv_smooth", "box_pts": 3 diff --git a/config/module/sys_tcp_establish.json b/config/module/sys_tcp_establish.json index 7cd2369..9bd2a46 100644 --- a/config/module/sys_tcp_establish.json +++ b/config/module/sys_tcp_establish.json @@ -17,7 +17,7 @@ "look_back": 30, "outlier_ratio_th": 0.5, "obs_size": 3, - "min_rtt": 500000 + "min_rtt": 100000 } } ], diff --git a/config/module/sys_tcp_transmission_latency.json b/config/module/sys_tcp_transmission_latency.json index 0527487..3ba8113 100644 --- a/config/module/sys_tcp_transmission_latency.json +++ b/config/module/sys_tcp_transmission_latency.json @@ -14,10 +14,12 @@ "description": "Smoothed Round Trip Time(us)", "description-zh": "TCP链接往返时延异常,性能劣化", "params": { + "method": "max", "look_back": 20, "obs_size": 25, "n": 3, - "outlier_ratio_th": 0.4, + "min_srtt": 20000, + "outlier_ratio_th": 0.6, "smooth_params": { "method": "conv_smooth", "box_pts": 3 -- 2.33.0