gala-spider/0001-cause-inference-optimization.patch
2022-11-22 16:30:42 +08:00

680 lines
26 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From ac48cfdcac8c93ba59754899237a37dac40659ac Mon Sep 17 00:00:00 2001
From: algorithmofdish <hexiujun1@huawei.com>
Date: Fri, 18 Nov 2022 11:07:47 +0800
Subject: [PATCH] feat(infer): cause inference optimization
1. categorize metrics and select one as cause with max abnormal score;
2. add virtual metric to complete the fault propagation path;
3. use abnormal score without nomarlization;
4. fix a bug where subgraph are incompletely obtained.
---
cause_inference/__main__.py | 4 +-
cause_inference/arangodb.py | 24 +++--
cause_inference/cause_infer.py | 11 +--
cause_inference/infer_policy.py | 84 ++++++++++++++----
cause_inference/model.py | 64 +++++++++++---
cause_inference/rule_parser.py | 149 ++++++++++++++++++++++++--------
config/infer-rule.yaml | 98 +++++++++++++++++----
7 files changed, 337 insertions(+), 97 deletions(-)
diff --git a/cause_inference/__main__.py b/cause_inference/__main__.py
index baddbe4..093f7ac 100644
--- a/cause_inference/__main__.py
+++ b/cause_inference/__main__.py
@@ -15,7 +15,7 @@ from cause_inference.config import init_infer_config
from cause_inference.model import AbnormalEvent
from cause_inference.cause_infer import cause_locating
from cause_inference.cause_infer import parse_abn_evt
-from cause_inference.cause_infer import normalize_abn_score
+from cause_inference.cause_infer import preprocess_abn_score
from cause_inference.rule_parser import rule_engine
from cause_inference.exceptions import InferenceException
from cause_inference.exceptions import DataParseException
@@ -172,7 +172,7 @@ def get_recommend_metric_evts(abn_kpi_data: dict) -> List[AbnormalEvent]:
metric_evt = AbnormalEvent(
timestamp=abn_kpi_data.get('Timestamp'),
abnormal_metric_id=metric_data.get('metric', ''),
- abnormal_score=normalize_abn_score(metric_data.get('score')),
+ abnormal_score=preprocess_abn_score(metric_data.get('score')),
metric_labels=metric_data.get('label', {}),
desc=metric_data.get('description', '')
)
diff --git a/cause_inference/arangodb.py b/cause_inference/arangodb.py
index 424e500..d7982b7 100644
--- a/cause_inference/arangodb.py
+++ b/cause_inference/arangodb.py
@@ -22,6 +22,14 @@ def connect_to_arangodb(arango_url, db_name):
return conn.databases[db_name]
+def query_all(db, aql_query, bind_vars=None, raw_results=True):
+ res = []
+ query_hdl = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=raw_results)
+ for item in query_hdl:
+ res.append(item)
+ return res
+
+
def query_recent_topo_ts(db: Database, ts):
bind_vars = {'@collection': _TIMESTAMP_COLL_NAME, 'ts': ts}
aql_query = '''
@@ -32,12 +40,12 @@ def query_recent_topo_ts(db: Database, ts):
RETURN t._key
'''
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
- if query_res.get('error') or not query_res.get('result'):
+ if len(query_res) == 0:
raise DBException('Can not find topological graph at the abnormal timestamp {}'.format(ts))
- last_ts = query_res.get('result')[0]
+ last_ts = query_res[0]
return int(last_ts)
@@ -56,12 +64,12 @@ def query_topo_entities(db: Database, ts, query_options=None):
return v
'''.format(filter_str)
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
- if query_res.get('error') or not query_res.get('result'):
+ if len(query_res) == 0:
raise DBException('Can not find observe entities satisfied.')
- return query_res.get('result')
+ return query_res
def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
@@ -83,12 +91,12 @@ def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
return {{"vertex": v, "edge": e}}
'''.format(edge_coll_str)
try:
- query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
+ query_res = query_all(db, aql_query, bind_vars)
except AQLQueryError as ex:
raise DBException(ex) from ex
vertices = {}
edges = {}
- for item in query_res.get('result'):
+ for item in query_res:
vertex = item.get('vertex')
edge = item.get('edge')
vertices.setdefault(vertex.get('_id'), vertex)
diff --git a/cause_inference/cause_infer.py b/cause_inference/cause_infer.py
index 1954abf..dff26d0 100644
--- a/cause_inference/cause_infer.py
+++ b/cause_inference/cause_infer.py
@@ -23,8 +23,8 @@ from cause_inference.infer_policy import InferPolicy
from cause_inference.infer_policy import get_infer_policy
-def normalize_abn_score(score):
- return expit(score)
+def preprocess_abn_score(score):
+ return max(0, score)
# 因果推理
@@ -172,9 +172,10 @@ def cause_locating(abnormal_kpi: AbnormalEvent, abnormal_metrics: List[AbnormalE
# 5. 生成异常指标之间的因果图
causal_graph.init_metric_cause_graph()
- logger.logger.debug("Causal graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
- logger.logger.debug("Causal graph predecessors: {}".format(causal_graph.entity_cause_graph.pred))
- logger.logger.debug("Causal graph successors: {}".format(causal_graph.entity_cause_graph.succ))
+ logger.logger.debug("Entity cause graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
+ logger.logger.debug("Entity cause graph edges are: {}".format(causal_graph.entity_cause_graph.edges))
+ logger.logger.debug("Metric cause graph nodes are: {}".format(causal_graph.metric_cause_graph.nodes))
+ logger.logger.debug("Metric cause graph edges are: {}".format(causal_graph.metric_cause_graph.edges))
# 6. 以故障传播图 + 异常KPI为输入执行根因推导算法输出 topK 根因指标
infer_policy = get_infer_policy(infer_config.infer_conf.get('infer_policy'))
diff --git a/cause_inference/infer_policy.py b/cause_inference/infer_policy.py
index c0952e7..0663e74 100644
--- a/cause_inference/infer_policy.py
+++ b/cause_inference/infer_policy.py
@@ -5,9 +5,10 @@ from typing import List
import networkx as nx
-from cause_inference.cause_infer import CausalGraph
-from cause_inference.cause_infer import Cause
+from cause_inference.model import CausalGraph
+from cause_inference.model import Cause
from cause_inference.exceptions import InferenceException
+from cause_inference import rule_parser
class InferPolicy(ABC):
@@ -114,22 +115,28 @@ class DfsPolicy(InferPolicy):
if length < 1:
return 0.0
total_score = 0.0
+ num_of_valid_node = 0
for node in path[:length]:
+ if is_virtual_node(node):
+ continue
total_score += cause_graph.nodes[node].get('abnormal_score', 0)
- if length != 0:
- total_score /= length
+ num_of_valid_node += 1
+ if num_of_valid_node != 0:
+ total_score /= num_of_valid_node
return total_score
- def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
- cause_graph = causal_graph.metric_cause_graph
- abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
-
- reverse_graph = nx.DiGraph()
- reverse_graph.add_nodes_from(cause_graph.nodes)
+ @staticmethod
+ def reverse_graph(cause_graph):
+ reversed_graph = nx.DiGraph()
+ reversed_graph.add_nodes_from(cause_graph.nodes)
for from_, to in cause_graph.edges:
- reverse_graph.add_edge(to, from_)
+ reversed_graph.add_edge(to, from_)
+ return reversed_graph
- successors = nx.dfs_successors(reverse_graph, abn_node_id)
+ @staticmethod
+ def get_all_paths_to_abn_node(abn_node_id, cause_graph):
+ reversed_graph = DfsPolicy.reverse_graph(cause_graph)
+ successors = nx.dfs_successors(reversed_graph, abn_node_id)
paths = []
path = []
@@ -145,24 +152,63 @@ class DfsPolicy(InferPolicy):
path.append(abn_node_id)
dfs_path(abn_node_id)
+ return paths
+ @staticmethod
+ def get_scored_paths(cause_graph, paths) -> list:
scored_paths = []
- for p in paths:
+ for path in paths:
scored_paths.append({
- 'score': self.calc_path_score(p, cause_graph),
- 'path': p
+ 'score': DfsPolicy.calc_path_score(path, cause_graph),
+ 'path': path
})
- scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
- scored_paths = scored_paths[:top_k]
+ return scored_paths
+
+ @staticmethod
+ def get_top_paths(scored_paths, top_k) -> list:
+ top_paths = []
+ node_selected = set()
+ metric_selected = set()
+ for path in scored_paths:
+ if len(top_paths) == top_k:
+ break
+ cause_node_id = path.get('path')[0]
+ if cause_node_id in node_selected:
+ continue
+ if cause_node_id[1] in metric_selected:
+ continue
+ if is_virtual_node(cause_node_id):
+ continue
+ node_selected.add(cause_node_id)
+ metric_selected.add(cause_node_id[1])
+ top_paths.append(path)
+
+ return top_paths
+ @staticmethod
+ def get_top_causes(top_paths) -> List[Cause]:
res = []
- for item in scored_paths:
+ for item in top_paths:
cause_node_id = item.get('path')[0]
cause = Cause(cause_node_id[1], cause_node_id[0], item.get('score'), item.get('path'))
res.append(cause)
-
return res
+ def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
+ cause_graph = causal_graph.metric_cause_graph
+ abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
+
+ paths = self.get_all_paths_to_abn_node(abn_node_id, cause_graph)
+ scored_paths = self.get_scored_paths(cause_graph, paths)
+ scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
+ top_paths = self.get_top_paths(scored_paths, top_k)
+
+ return self.get_top_causes(top_paths)
+
+
+def is_virtual_node(node_id) -> bool:
+ return rule_parser.is_virtual_metric(node_id[1])
+
def get_infer_policy(policy: str, **options) -> InferPolicy:
if policy == 'dfs':
diff --git a/cause_inference/model.py b/cause_inference/model.py
index eb473da..136e4c3 100644
--- a/cause_inference/model.py
+++ b/cause_inference/model.py
@@ -7,6 +7,7 @@ from spider.conf.observe_meta import ObserveMetaMgt
from spider.util import logger
from spider.util.entity import concate_entity_id
from spider.util.entity import escape_entity_id
+from cause_inference import rule_parser
class AbnormalEvent:
@@ -77,6 +78,10 @@ class CausalGraph:
self.metric_cause_graph = nx.DiGraph()
self.init_casual_graph()
+ @staticmethod
+ def is_virtual_metric_group(metric_group) -> bool:
+ return len(metric_group) == 1 and rule_parser.is_virtual_metric(metric_group[0])
+
def init_casual_graph(self):
for node_id, node_attrs in self.topo_nodes.items():
self.entity_cause_graph.add_node(node_id, **node_attrs)
@@ -134,19 +139,54 @@ class CausalGraph:
self.init_metric_edge(edge)
def init_metric_edge(self, entity_edge):
- from_entity_id = entity_edge[0]
- to_entity_id = entity_edge[1]
+ f_entity_id = entity_edge[0]
+ t_entity_id = entity_edge[1]
+ avail_relations = self.get_avail_metric_causal_relations(entity_edge)
+
+ unique = set()
+ for f_metric_group, t_metric_group in avail_relations:
+ if self.is_virtual_metric_group(f_metric_group):
+ self.add_virtual_metric_node(f_entity_id, f_metric_group[0])
+ if self.is_virtual_metric_group(t_metric_group):
+ self.add_virtual_metric_node(t_entity_id, t_metric_group[0])
+
+ f_metric_id = self.metric_with_largest_abn_score(f_metric_group, f_entity_id)
+ t_metric_id = self.metric_with_largest_abn_score(t_metric_group, t_entity_id)
+ if (f_metric_id, t_metric_id) not in unique:
+ self.metric_cause_graph.add_edge((f_entity_id, f_metric_id), (t_entity_id, t_metric_id))
+ unique.add((f_metric_id, t_metric_id))
+
+ def add_virtual_metric_node(self, entity_id, metric_id):
+ self.metric_cause_graph.add_node((entity_id, metric_id))
+
+ def get_avail_metric_causal_relations(self, entity_edge):
+ f_metric_ids = self.get_abn_metric_ids(entity_edge[0])
+ t_metric_ids = self.get_abn_metric_ids(entity_edge[1])
rule_meta = self.entity_cause_graph.edges[entity_edge].get('rule_meta')
- for from_metric_evt in self.get_abnormal_metrics_of_node(from_entity_id):
- for to_metric_evt in self.get_abnormal_metrics_of_node(to_entity_id):
- from_metric_id = from_metric_evt.abnormal_metric_id
- to_metric_id = to_metric_evt.abnormal_metric_id
- if rule_meta is not None and not rule_meta.check_metric_pair(from_metric_id, to_metric_id):
- continue
- self.metric_cause_graph.add_edge(
- (from_entity_id, from_metric_id),
- (to_entity_id, to_metric_id)
- )
+ return rule_meta.get_avail_causal_relations(f_metric_ids, t_metric_ids)
+
+ def get_abn_metric_ids(self, entity_id):
+ metric_evts = self.get_abnormal_metrics_of_node(entity_id)
+ return [evt.abnormal_metric_id for evt in metric_evts]
+
+ def metric_with_largest_abn_score(self, metric_group: list, entity_id) -> str:
+ if len(metric_group) == 1:
+ return metric_group[0]
+
+ metric_evt_map = {}
+ metric_evts = self.get_abnormal_metrics_of_node(entity_id)
+ for metric_evt in metric_evts:
+ metric_evt_map.setdefault(metric_evt.abnormal_metric_id, metric_evt)
+
+ metric_id_of_largest = metric_group[0]
+ largest_abn_score = metric_evt_map.get(metric_id_of_largest).abnormal_score
+ for metric_id in metric_group:
+ abn_score = metric_evt_map.get(metric_id).abnormal_score
+ if abn_score > largest_abn_score:
+ metric_id_of_largest = metric_id
+ largest_abn_score = abn_score
+
+ return metric_id_of_largest
class Cause:
diff --git a/cause_inference/rule_parser.py b/cause_inference/rule_parser.py
index f08ed3c..eb6227c 100644
--- a/cause_inference/rule_parser.py
+++ b/cause_inference/rule_parser.py
@@ -1,7 +1,7 @@
import os
from abc import ABCMeta
from abc import abstractmethod
-from typing import List
+from typing import List, Dict, Tuple
import yaml
@@ -9,33 +9,86 @@ from spider.conf.observe_meta import RelationType
from spider.conf.observe_meta import EntityType
from spider.util import logger
+METRIC_CATEGORY_ALL = 'ALL'
+METRIC_CATEGORY_OTHER = 'OTHER'
+METRIC_CATEGORY_VIRTUAL = 'VIRTUAL'
+METRIC_ID_OF_CATEGORY_VIRTUAL = 'virtual_metric'
-class MetricPairSet:
- def __init__(self, from_: set, to_: set):
+
+def is_virtual_metric(metric_id: str) -> bool:
+ return metric_id == METRIC_ID_OF_CATEGORY_VIRTUAL
+
+
+class MetricCategoryPair:
+ def __init__(self, from_: str, to_: str):
self.from_ = from_
self.to_ = to_
- def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
- if self.from_ and from_metric_id not in self.from_:
- return False
- if self.to_ and to_metric_id not in self.to_:
- return False
- return True
-
class RuleMeta:
- def __init__(self, from_type, to_type, metric_range=None):
+ def __init__(self, from_type, to_type, from_categories=None, to_categories=None, metric_range=None):
self.from_type = from_type
self.to_type = to_type
- self.metric_range: List[MetricPairSet] = metric_range or []
-
- def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
- if not self.metric_range:
- return True
- for item in self.metric_range:
- if item.check_metric_pair(from_metric_id, to_metric_id):
- return True
- return False
+ self.from_categories = from_categories or {}
+ self.to_categories = to_categories or {}
+ self.category_pairs: List[MetricCategoryPair] = metric_range or []
+
+ @staticmethod
+ def aggregate_metric_from_groups(category_type, metric_groups) -> List[list]:
+ res = []
+ if category_type == METRIC_CATEGORY_ALL:
+ for cate_type, metric_group in metric_groups.items():
+ if cate_type == METRIC_CATEGORY_VIRTUAL:
+ continue
+ elif cate_type == METRIC_CATEGORY_OTHER:
+ res.extend([metric] for metric in metric_group)
+ else:
+ res.append(metric_group)
+ else:
+ metric_group = metric_groups.get(category_type)
+ if metric_group:
+ res.append(metric_group)
+
+ return res
+
+ @staticmethod
+ def _group_metric_by_category(metrics, categories) -> Dict[str, list]:
+ parts = {}
+ parted_metrics = set()
+ for cate_type, cate_metrics in categories.items():
+ part = []
+ for metric in metrics:
+ if metric in cate_metrics:
+ part.append(metric)
+ parted_metrics.add(metric)
+ if len(part) > 0:
+ parts.setdefault(cate_type, part)
+
+ other_part = []
+ for metric in metrics:
+ if metric not in parted_metrics:
+ other_part.append(metric)
+ if len(other_part) > 0:
+ parts.setdefault(METRIC_CATEGORY_OTHER, other_part)
+
+ virtual_part = [METRIC_ID_OF_CATEGORY_VIRTUAL]
+ parts.setdefault(METRIC_CATEGORY_VIRTUAL, virtual_part)
+
+ return parts
+
+ def get_avail_causal_relations(self, real_from_metrics, real_to_metrics) -> List[Tuple[list, list]]:
+ causal_relations = []
+
+ from_groups = self._group_metric_by_category(real_from_metrics, self.from_categories)
+ to_groups = self._group_metric_by_category(real_to_metrics, self.to_categories)
+ for cate_pair in self.category_pairs:
+ all_from_metrics = self.aggregate_metric_from_groups(cate_pair.from_, from_groups)
+ all_to_metrics = self.aggregate_metric_from_groups(cate_pair.to_, to_groups)
+ for from_metrics in all_from_metrics:
+ for to_metrics in all_to_metrics:
+ causal_relations.append((from_metrics, to_metrics))
+
+ return causal_relations
class Rule(metaclass=ABCMeta):
@@ -180,7 +233,8 @@ class NicRule1(Rule):
class RuleEngine:
def __init__(self):
self.rules: List[Rule] = []
- self.rule_metas = {}
+ self.metric_categories = {}
+ self.rule_metas: Dict[tuple, RuleMeta] = {}
def add_rule(self, rule: Rule):
self.rules.append(rule)
@@ -190,33 +244,58 @@ class RuleEngine:
rule.rule_parsing(causal_graph)
def load_rule_meta_from_yaml(self, rule_path: str) -> bool:
- abs_rule_path = os.path.abspath(rule_path)
- if not os.path.exists(abs_rule_path):
- logger.logger.warning("Rule meta path '{}' not exist", abs_rule_path)
- return True
try:
- with open(abs_rule_path, 'r') as file:
+ with open(os.path.abspath(rule_path), 'r') as file:
data = yaml.safe_load(file)
except IOError as ex:
logger.logger.warning(ex)
return False
- infer_rules = data.get("infer_rules", [])
- for rule_meta in infer_rules:
- saved_metric_range = []
- for item in rule_meta.get("metric_range", []):
- saved_metric_range.append(MetricPairSet(set(item.get('from', [])), set(item.get('to', []))))
- saved_rule_meta = RuleMeta(rule_meta.get('from_type'), rule_meta.get('to_type'), saved_metric_range)
- self.rule_metas.setdefault((rule_meta.get("from_type"), rule_meta.get("to_type")), saved_rule_meta)
-
+ self.load_rule_meta_from_dict(data)
return True
+ def create_default_rule_meta(self, from_type, to_type):
+ return RuleMeta(
+ from_type,
+ to_type,
+ self.metric_categories.get(from_type),
+ self.metric_categories.get(to_type),
+ [MetricCategoryPair(METRIC_CATEGORY_ALL, METRIC_CATEGORY_ALL)]
+ )
+
def add_rule_meta(self, causal_graph):
entity_cause_graph = causal_graph.entity_cause_graph
for edge in entity_cause_graph.edges:
from_type = entity_cause_graph.nodes[edge[0]].get('type')
to_type = entity_cause_graph.nodes[edge[1]].get('type')
- entity_cause_graph.edges[edge]["rule_meta"] = self.rule_metas.get((from_type, to_type))
+ rule_meta = self.rule_metas.get((from_type, to_type))
+ if not rule_meta:
+ rule_meta = self.create_default_rule_meta(from_type, to_type)
+ entity_cause_graph.edges[edge]["rule_meta"] = rule_meta
+
+ def load_rule_meta_from_dict(self, data: dict):
+ self.load_metric_categories(data.get('metric_categories', {}))
+ self.load_infer_rules(data.get("infer_rules", []))
+
+ def load_metric_categories(self, metric_categories: dict):
+ for entity_type, categories in metric_categories.items():
+ category_dict = {}
+ for category in categories:
+ category_dict.setdefault(category.get('category'), category.get('metrics'))
+ self.metric_categories.setdefault(entity_type, category_dict)
+
+ def load_infer_rules(self, infer_rules: list):
+ for rule_meta in infer_rules:
+ from_entity_type = rule_meta.get('from_type')
+ to_entity_type = rule_meta.get('to_type')
+ saved_metric_range = []
+ for item in rule_meta.get("metric_range", []):
+ from_category = item.get('from')
+ to_category = item.get('to')
+ saved_metric_range.append(MetricCategoryPair(from_category, to_category))
+ saved_rule_meta = RuleMeta(from_entity_type, to_entity_type, self.metric_categories.get(from_entity_type),
+ self.metric_categories.get(to_entity_type), saved_metric_range)
+ self.rule_metas.setdefault((from_entity_type, to_entity_type), saved_rule_meta)
rule_engine = RuleEngine()
diff --git a/config/infer-rule.yaml b/config/infer-rule.yaml
index c3bc3f3..d287972 100644
--- a/config/infer-rule.yaml
+++ b/config/infer-rule.yaml
@@ -1,32 +1,98 @@
+metric_categories:
+ proc:
+ -
+ category: PROC_CPU
+ metrics:
+ - gala_gopher_proc_utime_jiffies
+ - gala_gopher_proc_stime_jiffies
+ -
+ category: PROC_IO_LOAD
+ metrics:
+ - gala_gopher_proc_read_bytes
+ - gala_gopher_proc_write_bytes
+ - gala_gopher_proc_less_4k_io_read
+ - gala_gopher_proc_less_4k_io_write
+ - gala_gopher_proc_greater_4k_io_read
+ - gala_gopher_proc_greater_4k_io_write
+ disk:
+ -
+ category: DISK_LOAD
+ metrics:
+ - gala_gopher_disk_rspeed_kB
+ - gala_gopher_disk_wspeed_kB
+ - gala_gopher_disk_rspeed
+ - gala_gopher_disk_wspeed
+ -
+ category: DISK_DELAY
+ metrics:
+ - gala_gopher_disk_r_await
+ - gala_gopher_disk_w_await
+ - gala_gopher_disk_rareq
+ - gala_gopher_disk_wareq
+ block:
+ -
+ category: BLOCK_DELAY
+ metrics:
+ - gala_gopher_block_latency_req_max
+ - gala_gopher_block_latency_req_last
+ - gala_gopher_block_latency_req_sum
+ - gala_gopher_block_latency_req_jitter
+ nic:
+ -
+ category: NIC_DROP
+ metrics:
+ - gala_gopher_nic_tc_sent_drop
+ - gala_gopher_nic_tx_dropped
+ - gala_gopher_nic_rx_dropped
+ cpu:
+ -
+ category: CPU_TOTAL
+ metrics:
+ - gala_gopher_cpu_total_used_per
+
infer_rules:
-
from_type: cpu
to_type: proc
metric_range:
-
- from: []
- to:
- - gala_gopher_proc_utime_jiffies
- - gala_gopher_proc_stime_jiffies
+ from: CPU_TOTAL
+ to: PROC_CPU
-
from_type: block
to_type: proc
metric_range:
-
- from: []
- to:
- - gala_gopher_proc_ns_ext4_read
- - gala_gopher_proc_ns_ext4_write
- - gala_gopher_proc_ns_ext4_flush
- - gala_gopher_proc_ns_overlay_read
- - gala_gopher_proc_ns_overlay_write
- - gala_gopher_proc_ns_overlay_flush
+ from: BLOCK_DELAY
+ to: VIRTUAL
-
from_type: proc
to_type: disk
metric_range:
-
- from:
- - gala_gopher_proc_read_bytes
- - gala_gopher_proc_write_bytes
- to: []
\ No newline at end of file
+ from: PROC_IO_LOAD
+ to: ALL
+ -
+ from_type: disk
+ to_type: block
+ metric_range:
+ -
+ from: DISK_DELAY
+ to: BLOCK_DELAY
+ -
+ from_type: proc
+ to_type: sli
+ metric_range:
+ -
+ from: VIRTUAL
+ to: ALL
+ -
+ from: PROC_CPU
+ to: ALL
+ -
+ from_type: tcp_link
+ to_type: proc
+ metric_range:
+ -
+ from: ALL
+ to: VIRTUAL
\ No newline at end of file
--
2.21.0.windows.1