!20 Update to 1.0.1: support cross host cause location

From: @algorithmofdish Reviewed-by: @dowzyx Signed-off-by: @dowzyx
2022-12-14 02:06:35 +00:00 · 2022-12-14 02:06:35 +00:00 · 98b4ebef87
commit 98b4ebef87
parent 87b34daac7 9b19749468
5 changed files with 6 additions and 792 deletions
--- a/0001-cause-inference-optimization.patch
+++ b/0001-cause-inference-optimization.patch
@ -1,679 +0,0 @@
-From ac48cfdcac8c93ba59754899237a37dac40659ac Mon Sep 17 00:00:00 2001
-From: algorithmofdish <hexiujun1@huawei.com>
-Date: Fri, 18 Nov 2022 11:07:47 +0800
-Subject: [PATCH] feat(infer): cause inference optimization
-
-1. categorize metrics and select one as cause with max abnormal score;
-2. add virtual metric to complete the fault propagation path;
-3. use abnormal score without nomarlization;
-4. fix a bug where subgraph are incompletely obtained.
---
- cause_inference/__main__.py     |   4 +-
- cause_inference/arangodb.py     |  24 +++--
- cause_inference/cause_infer.py  |  11 +--
- cause_inference/infer_policy.py |  84 ++++++++++++++----
- cause_inference/model.py        |  64 +++++++++++---
- cause_inference/rule_parser.py  | 149 ++++++++++++++++++++++++--------
- config/infer-rule.yaml          |  98 +++++++++++++++++----
- 7 files changed, 337 insertions(+), 97 deletions(-)
-
-diff --git a/cause_inference/__main__.py b/cause_inference/__main__.py
-index baddbe4..093f7ac 100644
--- a/cause_inference/__main__.py
-+++ b/cause_inference/__main__.py
-@@ -15,7 +15,7 @@ from cause_inference.config import init_infer_config
- from cause_inference.model import AbnormalEvent
- from cause_inference.cause_infer import cause_locating
- from cause_inference.cause_infer import parse_abn_evt
-from cause_inference.cause_infer import normalize_abn_score
-+from cause_inference.cause_infer import preprocess_abn_score
- from cause_inference.rule_parser import rule_engine
- from cause_inference.exceptions import InferenceException
- from cause_inference.exceptions import DataParseException
-@@ -172,7 +172,7 @@ def get_recommend_metric_evts(abn_kpi_data: dict) -> List[AbnormalEvent]:
-         metric_evt = AbnormalEvent(
-             timestamp=abn_kpi_data.get('Timestamp'),
-             abnormal_metric_id=metric_data.get('metric', ''),
-            abnormal_score=normalize_abn_score(metric_data.get('score')),
-+            abnormal_score=preprocess_abn_score(metric_data.get('score')),
-             metric_labels=metric_data.get('label', {}),
-             desc=metric_data.get('description', '')
-         )
-diff --git a/cause_inference/arangodb.py b/cause_inference/arangodb.py
-index 424e500..d7982b7 100644
--- a/cause_inference/arangodb.py
-+++ b/cause_inference/arangodb.py
-@@ -22,6 +22,14 @@ def connect_to_arangodb(arango_url, db_name):
-     return conn.databases[db_name]
- 
- 
-+def query_all(db, aql_query, bind_vars=None, raw_results=True):
-+    res = []
-+    query_hdl = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=raw_results)
-+    for item in query_hdl:
-+        res.append(item)
-+    return res
-+
-+
- def query_recent_topo_ts(db: Database, ts):
-     bind_vars = {'@collection': _TIMESTAMP_COLL_NAME, 'ts': ts}
-     aql_query = '''
-@@ -32,12 +40,12 @@ def query_recent_topo_ts(db: Database, ts):
-       RETURN t._key
-     '''
-     try:
-        query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
-+        query_res = query_all(db, aql_query, bind_vars)
-     except AQLQueryError as ex:
-         raise DBException(ex) from ex
-    if query_res.get('error') or not query_res.get('result'):
-+    if len(query_res) == 0:
-         raise DBException('Can not find topological graph at the abnormal timestamp {}'.format(ts))
-    last_ts = query_res.get('result')[0]
-+    last_ts = query_res[0]
-     return int(last_ts)
- 
- 
-@@ -56,12 +64,12 @@ def query_topo_entities(db: Database, ts, query_options=None):
-       return v
-     '''.format(filter_str)
-     try:
-        query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
-+        query_res = query_all(db, aql_query, bind_vars)
-     except AQLQueryError as ex:
-         raise DBException(ex) from ex
-    if query_res.get('error') or not query_res.get('result'):
-+    if len(query_res) == 0:
-         raise DBException('Can not find observe entities satisfied.')
-    return query_res.get('result')
-+    return query_res
- 
- 
- def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
-@@ -83,12 +91,12 @@ def query_subgraph(db, ts, start_node_id, edge_collection, depth=1):
-       return {{"vertex": v, "edge": e}}
-     '''.format(edge_coll_str)
-     try:
-        query_res = db.AQLQuery(aql_query, bindVars=bind_vars, rawResults=True).response
-+        query_res = query_all(db, aql_query, bind_vars)
-     except AQLQueryError as ex:
-         raise DBException(ex) from ex
-     vertices = {}
-     edges = {}
-    for item in query_res.get('result'):
-+    for item in query_res:
-         vertex = item.get('vertex')
-         edge = item.get('edge')
-         vertices.setdefault(vertex.get('_id'), vertex)
-diff --git a/cause_inference/cause_infer.py b/cause_inference/cause_infer.py
-index 1954abf..dff26d0 100644
--- a/cause_inference/cause_infer.py
-+++ b/cause_inference/cause_infer.py
-@@ -23,8 +23,8 @@ from cause_inference.infer_policy import InferPolicy
- from cause_inference.infer_policy import get_infer_policy
- 
- 
-def normalize_abn_score(score):
-    return expit(score)
-+def preprocess_abn_score(score):
-+    return max(0, score)
- 
- 
- # 因果推理
-@@ -172,9 +172,10 @@ def cause_locating(abnormal_kpi: AbnormalEvent, abnormal_metrics: List[AbnormalE
-     # 5. 生成异常指标之间的因果图
-     causal_graph.init_metric_cause_graph()
- 
-    logger.logger.debug("Causal graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
-    logger.logger.debug("Causal graph predecessors: {}".format(causal_graph.entity_cause_graph.pred))
-    logger.logger.debug("Causal graph successors: {}".format(causal_graph.entity_cause_graph.succ))
-+    logger.logger.debug("Entity cause graph nodes are: {}".format(causal_graph.entity_cause_graph.nodes))
-+    logger.logger.debug("Entity cause graph edges are: {}".format(causal_graph.entity_cause_graph.edges))
-+    logger.logger.debug("Metric cause graph nodes are: {}".format(causal_graph.metric_cause_graph.nodes))
-+    logger.logger.debug("Metric cause graph edges are: {}".format(causal_graph.metric_cause_graph.edges))
- 
-     # 6. 以故障传播图 + 异常KPI为输入，执行根因推导算法，输出 topK 根因指标
-     infer_policy = get_infer_policy(infer_config.infer_conf.get('infer_policy'))
-diff --git a/cause_inference/infer_policy.py b/cause_inference/infer_policy.py
-index c0952e7..0663e74 100644
--- a/cause_inference/infer_policy.py
-+++ b/cause_inference/infer_policy.py
-@@ -5,9 +5,10 @@ from typing import List
- 
- import networkx as nx
- 
-from cause_inference.cause_infer import CausalGraph
-from cause_inference.cause_infer import Cause
-+from cause_inference.model import CausalGraph
-+from cause_inference.model import Cause
- from cause_inference.exceptions import InferenceException
-+from cause_inference import rule_parser
- 
- 
- class InferPolicy(ABC):
-@@ -114,22 +115,28 @@ class DfsPolicy(InferPolicy):
-         if length < 1:
-             return 0.0
-         total_score = 0.0
-+        num_of_valid_node = 0
-         for node in path[:length]:
-+            if is_virtual_node(node):
-+                continue
-             total_score += cause_graph.nodes[node].get('abnormal_score', 0)
-        if length != 0:
-            total_score /= length
-+            num_of_valid_node += 1
-+        if num_of_valid_node != 0:
-+            total_score /= num_of_valid_node
-         return total_score
- 
-    def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
-        cause_graph = causal_graph.metric_cause_graph
-        abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
-
-        reverse_graph = nx.DiGraph()
-        reverse_graph.add_nodes_from(cause_graph.nodes)
-+    @staticmethod
-+    def reverse_graph(cause_graph):
-+        reversed_graph = nx.DiGraph()
-+        reversed_graph.add_nodes_from(cause_graph.nodes)
-         for from_, to in cause_graph.edges:
-            reverse_graph.add_edge(to, from_)
-+            reversed_graph.add_edge(to, from_)
-+        return reversed_graph
- 
-        successors = nx.dfs_successors(reverse_graph, abn_node_id)
-+    @staticmethod
-+    def get_all_paths_to_abn_node(abn_node_id, cause_graph):
-+        reversed_graph = DfsPolicy.reverse_graph(cause_graph)
-+        successors = nx.dfs_successors(reversed_graph, abn_node_id)
-         paths = []
-         path = []
- 
-@@ -145,24 +152,63 @@ class DfsPolicy(InferPolicy):
- 
-         path.append(abn_node_id)
-         dfs_path(abn_node_id)
-+        return paths
- 
-+    @staticmethod
-+    def get_scored_paths(cause_graph, paths) -> list:
-         scored_paths = []
-        for p in paths:
-+        for path in paths:
-             scored_paths.append({
-                'score': self.calc_path_score(p, cause_graph),
-                'path': p
-+                'score': DfsPolicy.calc_path_score(path, cause_graph),
-+                'path': path
-             })
-        scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
-        scored_paths = scored_paths[:top_k]
-+        return scored_paths
-+
-+    @staticmethod
-+    def get_top_paths(scored_paths, top_k) -> list:
-+        top_paths = []
-+        node_selected = set()
-+        metric_selected = set()
-+        for path in scored_paths:
-+            if len(top_paths) == top_k:
-+                break
-+            cause_node_id = path.get('path')[0]
-+            if cause_node_id in node_selected:
-+                continue
-+            if cause_node_id[1] in metric_selected:
-+                continue
-+            if is_virtual_node(cause_node_id):
-+                continue
-+            node_selected.add(cause_node_id)
-+            metric_selected.add(cause_node_id[1])
-+            top_paths.append(path)
-+
-+        return top_paths
- 
-+    @staticmethod
-+    def get_top_causes(top_paths) -> List[Cause]:
-         res = []
-        for item in scored_paths:
-+        for item in top_paths:
-             cause_node_id = item.get('path')[0]
-             cause = Cause(cause_node_id[1], cause_node_id[0], item.get('score'), item.get('path'))
-             res.append(cause)
-
-         return res
- 
-+    def infer(self, causal_graph: CausalGraph, top_k: int) -> List[Cause]:
-+        cause_graph = causal_graph.metric_cause_graph
-+        abn_node_id = (causal_graph.entity_id_of_abn_kpi, causal_graph.abnormal_kpi.abnormal_metric_id)
-+
-+        paths = self.get_all_paths_to_abn_node(abn_node_id, cause_graph)
-+        scored_paths = self.get_scored_paths(cause_graph, paths)
-+        scored_paths = sorted(scored_paths, key=lambda k: k['score'], reverse=True)
-+        top_paths = self.get_top_paths(scored_paths, top_k)
-+
-+        return self.get_top_causes(top_paths)
-+
-+
-+def is_virtual_node(node_id) -> bool:
-+    return rule_parser.is_virtual_metric(node_id[1])
-+
- 
- def get_infer_policy(policy: str, **options) -> InferPolicy:
-     if policy == 'dfs':
-diff --git a/cause_inference/model.py b/cause_inference/model.py
-index eb473da..136e4c3 100644
--- a/cause_inference/model.py
-+++ b/cause_inference/model.py
-@@ -7,6 +7,7 @@ from spider.conf.observe_meta import ObserveMetaMgt
- from spider.util import logger
- from spider.util.entity import concate_entity_id
- from spider.util.entity import escape_entity_id
-+from cause_inference import rule_parser
- 
- 
- class AbnormalEvent:
-@@ -77,6 +78,10 @@ class CausalGraph:
-         self.metric_cause_graph = nx.DiGraph()
-         self.init_casual_graph()
- 
-+    @staticmethod
-+    def is_virtual_metric_group(metric_group) -> bool:
-+        return len(metric_group) == 1 and rule_parser.is_virtual_metric(metric_group[0])
-+
-     def init_casual_graph(self):
-         for node_id, node_attrs in self.topo_nodes.items():
-             self.entity_cause_graph.add_node(node_id, **node_attrs)
-@@ -134,19 +139,54 @@ class CausalGraph:
-             self.init_metric_edge(edge)
- 
-     def init_metric_edge(self, entity_edge):
-        from_entity_id = entity_edge[0]
-        to_entity_id = entity_edge[1]
-+        f_entity_id = entity_edge[0]
-+        t_entity_id = entity_edge[1]
-+        avail_relations = self.get_avail_metric_causal_relations(entity_edge)
-+
-+        unique = set()
-+        for f_metric_group, t_metric_group in avail_relations:
-+            if self.is_virtual_metric_group(f_metric_group):
-+                self.add_virtual_metric_node(f_entity_id, f_metric_group[0])
-+            if self.is_virtual_metric_group(t_metric_group):
-+                self.add_virtual_metric_node(t_entity_id, t_metric_group[0])
-+
-+            f_metric_id = self.metric_with_largest_abn_score(f_metric_group, f_entity_id)
-+            t_metric_id = self.metric_with_largest_abn_score(t_metric_group, t_entity_id)
-+            if (f_metric_id, t_metric_id) not in unique:
-+                self.metric_cause_graph.add_edge((f_entity_id, f_metric_id), (t_entity_id, t_metric_id))
-+                unique.add((f_metric_id, t_metric_id))
-+
-+    def add_virtual_metric_node(self, entity_id, metric_id):
-+        self.metric_cause_graph.add_node((entity_id, metric_id))
-+
-+    def get_avail_metric_causal_relations(self, entity_edge):
-+        f_metric_ids = self.get_abn_metric_ids(entity_edge[0])
-+        t_metric_ids = self.get_abn_metric_ids(entity_edge[1])
-         rule_meta = self.entity_cause_graph.edges[entity_edge].get('rule_meta')
-        for from_metric_evt in self.get_abnormal_metrics_of_node(from_entity_id):
-            for to_metric_evt in self.get_abnormal_metrics_of_node(to_entity_id):
-                from_metric_id = from_metric_evt.abnormal_metric_id
-                to_metric_id = to_metric_evt.abnormal_metric_id
-                if rule_meta is not None and not rule_meta.check_metric_pair(from_metric_id, to_metric_id):
-                    continue
-                self.metric_cause_graph.add_edge(
-                    (from_entity_id, from_metric_id),
-                    (to_entity_id, to_metric_id)
-                )
-+        return rule_meta.get_avail_causal_relations(f_metric_ids, t_metric_ids)
-+
-+    def get_abn_metric_ids(self, entity_id):
-+        metric_evts = self.get_abnormal_metrics_of_node(entity_id)
-+        return [evt.abnormal_metric_id for evt in metric_evts]
-+
-+    def metric_with_largest_abn_score(self, metric_group: list, entity_id) -> str:
-+        if len(metric_group) == 1:
-+            return metric_group[0]
-+
-+        metric_evt_map = {}
-+        metric_evts = self.get_abnormal_metrics_of_node(entity_id)
-+        for metric_evt in metric_evts:
-+            metric_evt_map.setdefault(metric_evt.abnormal_metric_id, metric_evt)
-+
-+        metric_id_of_largest = metric_group[0]
-+        largest_abn_score = metric_evt_map.get(metric_id_of_largest).abnormal_score
-+        for metric_id in metric_group:
-+            abn_score = metric_evt_map.get(metric_id).abnormal_score
-+            if abn_score > largest_abn_score:
-+                metric_id_of_largest = metric_id
-+                largest_abn_score = abn_score
-+
-+        return metric_id_of_largest
- 
- 
- class Cause:
-diff --git a/cause_inference/rule_parser.py b/cause_inference/rule_parser.py
-index f08ed3c..eb6227c 100644
--- a/cause_inference/rule_parser.py
-+++ b/cause_inference/rule_parser.py
-@@ -1,7 +1,7 @@
- import os
- from abc import ABCMeta
- from abc import abstractmethod
-from typing import List
-+from typing import List, Dict, Tuple
- 
- import yaml
- 
-@@ -9,33 +9,86 @@ from spider.conf.observe_meta import RelationType
- from spider.conf.observe_meta import EntityType
- from spider.util import logger
- 
-+METRIC_CATEGORY_ALL = 'ALL'
-+METRIC_CATEGORY_OTHER = 'OTHER'
-+METRIC_CATEGORY_VIRTUAL = 'VIRTUAL'
-+METRIC_ID_OF_CATEGORY_VIRTUAL = 'virtual_metric'
- 
-class MetricPairSet:
-    def __init__(self, from_: set, to_: set):
-+
-+def is_virtual_metric(metric_id: str) -> bool:
-+    return metric_id == METRIC_ID_OF_CATEGORY_VIRTUAL
-+
-+
-+class MetricCategoryPair:
-+    def __init__(self, from_: str, to_: str):
-         self.from_ = from_
-         self.to_ = to_
- 
-    def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
-        if self.from_ and from_metric_id not in self.from_:
-            return False
-        if self.to_ and to_metric_id not in self.to_:
-            return False
-        return True
-
- 
- class RuleMeta:
-    def __init__(self, from_type, to_type, metric_range=None):
-+    def __init__(self, from_type, to_type, from_categories=None, to_categories=None, metric_range=None):
-         self.from_type = from_type
-         self.to_type = to_type
-        self.metric_range: List[MetricPairSet] = metric_range or []
-
-    def check_metric_pair(self, from_metric_id: str, to_metric_id: str) -> bool:
-        if not self.metric_range:
-            return True
-        for item in self.metric_range:
-            if item.check_metric_pair(from_metric_id, to_metric_id):
-                return True
-        return False
-+        self.from_categories = from_categories or {}
-+        self.to_categories = to_categories or {}
-+        self.category_pairs: List[MetricCategoryPair] = metric_range or []
-+
-+    @staticmethod
-+    def aggregate_metric_from_groups(category_type, metric_groups) -> List[list]:
-+        res = []
-+        if category_type == METRIC_CATEGORY_ALL:
-+            for cate_type, metric_group in metric_groups.items():
-+                if cate_type == METRIC_CATEGORY_VIRTUAL:
-+                    continue
-+                elif cate_type == METRIC_CATEGORY_OTHER:
-+                    res.extend([metric] for metric in metric_group)
-+                else:
-+                    res.append(metric_group)
-+        else:
-+            metric_group = metric_groups.get(category_type)
-+            if metric_group:
-+                res.append(metric_group)
-+
-+        return res
-+
-+    @staticmethod
-+    def _group_metric_by_category(metrics, categories) -> Dict[str, list]:
-+        parts = {}
-+        parted_metrics = set()
-+        for cate_type, cate_metrics in categories.items():
-+            part = []
-+            for metric in metrics:
-+                if metric in cate_metrics:
-+                    part.append(metric)
-+                    parted_metrics.add(metric)
-+            if len(part) > 0:
-+                parts.setdefault(cate_type, part)
-+
-+        other_part = []
-+        for metric in metrics:
-+            if metric not in parted_metrics:
-+                other_part.append(metric)
-+        if len(other_part) > 0:
-+            parts.setdefault(METRIC_CATEGORY_OTHER, other_part)
-+
-+        virtual_part = [METRIC_ID_OF_CATEGORY_VIRTUAL]
-+        parts.setdefault(METRIC_CATEGORY_VIRTUAL, virtual_part)
-+
-+        return parts
-+
-+    def get_avail_causal_relations(self, real_from_metrics, real_to_metrics) -> List[Tuple[list, list]]:
-+        causal_relations = []
-+
-+        from_groups = self._group_metric_by_category(real_from_metrics, self.from_categories)
-+        to_groups = self._group_metric_by_category(real_to_metrics, self.to_categories)
-+        for cate_pair in self.category_pairs:
-+            all_from_metrics = self.aggregate_metric_from_groups(cate_pair.from_, from_groups)
-+            all_to_metrics = self.aggregate_metric_from_groups(cate_pair.to_, to_groups)
-+            for from_metrics in all_from_metrics:
-+                for to_metrics in all_to_metrics:
-+                    causal_relations.append((from_metrics, to_metrics))
-+
-+        return causal_relations
- 
- 
- class Rule(metaclass=ABCMeta):
-@@ -180,7 +233,8 @@ class NicRule1(Rule):
- class RuleEngine:
-     def __init__(self):
-         self.rules: List[Rule] = []
-        self.rule_metas = {}
-+        self.metric_categories = {}
-+        self.rule_metas: Dict[tuple, RuleMeta] = {}
- 
-     def add_rule(self, rule: Rule):
-         self.rules.append(rule)
-@@ -190,33 +244,58 @@ class RuleEngine:
-             rule.rule_parsing(causal_graph)
- 
-     def load_rule_meta_from_yaml(self, rule_path: str) -> bool:
-        abs_rule_path = os.path.abspath(rule_path)
-        if not os.path.exists(abs_rule_path):
-            logger.logger.warning("Rule meta path '{}' not exist", abs_rule_path)
-            return True
-         try:
-            with open(abs_rule_path, 'r') as file:
-+            with open(os.path.abspath(rule_path), 'r') as file:
-                 data = yaml.safe_load(file)
-         except IOError as ex:
-             logger.logger.warning(ex)
-             return False
- 
-        infer_rules = data.get("infer_rules", [])
-        for rule_meta in infer_rules:
-            saved_metric_range = []
-            for item in rule_meta.get("metric_range", []):
-                saved_metric_range.append(MetricPairSet(set(item.get('from', [])), set(item.get('to', []))))
-            saved_rule_meta = RuleMeta(rule_meta.get('from_type'), rule_meta.get('to_type'), saved_metric_range)
-            self.rule_metas.setdefault((rule_meta.get("from_type"), rule_meta.get("to_type")), saved_rule_meta)
-
-+        self.load_rule_meta_from_dict(data)
-         return True
- 
-+    def create_default_rule_meta(self, from_type, to_type):
-+        return RuleMeta(
-+            from_type,
-+            to_type,
-+            self.metric_categories.get(from_type),
-+            self.metric_categories.get(to_type),
-+            [MetricCategoryPair(METRIC_CATEGORY_ALL, METRIC_CATEGORY_ALL)]
-+        )
-+
-     def add_rule_meta(self, causal_graph):
-         entity_cause_graph = causal_graph.entity_cause_graph
-         for edge in entity_cause_graph.edges:
-             from_type = entity_cause_graph.nodes[edge[0]].get('type')
-             to_type = entity_cause_graph.nodes[edge[1]].get('type')
-            entity_cause_graph.edges[edge]["rule_meta"] = self.rule_metas.get((from_type, to_type))
-+            rule_meta = self.rule_metas.get((from_type, to_type))
-+            if not rule_meta:
-+                rule_meta = self.create_default_rule_meta(from_type, to_type)
-+            entity_cause_graph.edges[edge]["rule_meta"] = rule_meta
-+
-+    def load_rule_meta_from_dict(self, data: dict):
-+        self.load_metric_categories(data.get('metric_categories', {}))
-+        self.load_infer_rules(data.get("infer_rules", []))
-+
-+    def load_metric_categories(self, metric_categories: dict):
-+        for entity_type, categories in metric_categories.items():
-+            category_dict = {}
-+            for category in categories:
-+                category_dict.setdefault(category.get('category'), category.get('metrics'))
-+            self.metric_categories.setdefault(entity_type, category_dict)
-+
-+    def load_infer_rules(self, infer_rules: list):
-+        for rule_meta in infer_rules:
-+            from_entity_type = rule_meta.get('from_type')
-+            to_entity_type = rule_meta.get('to_type')
-+            saved_metric_range = []
-+            for item in rule_meta.get("metric_range", []):
-+                from_category = item.get('from')
-+                to_category = item.get('to')
-+                saved_metric_range.append(MetricCategoryPair(from_category, to_category))
-+            saved_rule_meta = RuleMeta(from_entity_type, to_entity_type, self.metric_categories.get(from_entity_type),
-+                                       self.metric_categories.get(to_entity_type), saved_metric_range)
-+            self.rule_metas.setdefault((from_entity_type, to_entity_type), saved_rule_meta)
- 
- 
- rule_engine = RuleEngine()
-diff --git a/config/infer-rule.yaml b/config/infer-rule.yaml
-index c3bc3f3..d287972 100644
--- a/config/infer-rule.yaml
-+++ b/config/infer-rule.yaml
-@@ -1,32 +1,98 @@
-+metric_categories:
-+  proc:
-+    -
-+      category: PROC_CPU
-+      metrics:
-+        - gala_gopher_proc_utime_jiffies
-+        - gala_gopher_proc_stime_jiffies
-+    -
-+      category: PROC_IO_LOAD
-+      metrics:
-+        - gala_gopher_proc_read_bytes
-+        - gala_gopher_proc_write_bytes
-+        - gala_gopher_proc_less_4k_io_read
-+        - gala_gopher_proc_less_4k_io_write
-+        - gala_gopher_proc_greater_4k_io_read
-+        - gala_gopher_proc_greater_4k_io_write
-+  disk:
-+    -
-+      category: DISK_LOAD
-+      metrics:
-+        - gala_gopher_disk_rspeed_kB
-+        - gala_gopher_disk_wspeed_kB
-+        - gala_gopher_disk_rspeed
-+        - gala_gopher_disk_wspeed
-+    -
-+      category: DISK_DELAY
-+      metrics:
-+        - gala_gopher_disk_r_await
-+        - gala_gopher_disk_w_await
-+        - gala_gopher_disk_rareq
-+        - gala_gopher_disk_wareq
-+  block:
-+    -
-+      category: BLOCK_DELAY
-+      metrics:
-+        - gala_gopher_block_latency_req_max
-+        - gala_gopher_block_latency_req_last
-+        - gala_gopher_block_latency_req_sum
-+        - gala_gopher_block_latency_req_jitter
-+  nic:
-+    -
-+      category: NIC_DROP
-+      metrics:
-+        - gala_gopher_nic_tc_sent_drop
-+        - gala_gopher_nic_tx_dropped
-+        - gala_gopher_nic_rx_dropped
-+  cpu:
-+    -
-+      category: CPU_TOTAL
-+      metrics:
-+        - gala_gopher_cpu_total_used_per
-+
- infer_rules:
-   -
-     from_type: cpu
-     to_type: proc
-     metric_range:
-       -
-        from: []
-        to:
-          - gala_gopher_proc_utime_jiffies
-          - gala_gopher_proc_stime_jiffies
-+        from: CPU_TOTAL
-+        to: PROC_CPU
-   -
-     from_type: block
-     to_type: proc
-     metric_range:
-       -
-        from: []
-        to:
-          - gala_gopher_proc_ns_ext4_read
-          - gala_gopher_proc_ns_ext4_write
-          - gala_gopher_proc_ns_ext4_flush
-          - gala_gopher_proc_ns_overlay_read
-          - gala_gopher_proc_ns_overlay_write
-          - gala_gopher_proc_ns_overlay_flush
-+        from: BLOCK_DELAY
-+        to: VIRTUAL
-   -
-     from_type: proc
-     to_type: disk
-     metric_range:
-       -
-        from:
-          - gala_gopher_proc_read_bytes
-          - gala_gopher_proc_write_bytes
-        to: []
-\ No newline at end of file
-+        from: PROC_IO_LOAD
-+        to: ALL
-+  -
-+    from_type: disk
-+    to_type: block
-+    metric_range:
-+      -
-+        from: DISK_DELAY
-+        to: BLOCK_DELAY
-+  -
-+    from_type: proc
-+    to_type: sli
-+    metric_range:
-+      -
-+        from: VIRTUAL
-+        to: ALL
-+      -
-+        from: PROC_CPU
-+        to: ALL
-+  -
-+    from_type: tcp_link
-+    to_type: proc
-+    metric_range:
-+      -
-+        from: ALL
-+        to: VIRTUAL
-\ No newline at end of file
-- 
-2.21.0.windows.1
-
--- a/0002-adaptation-for-abnormal-event-output-change.patch
+++ b/0002-adaptation-for-abnormal-event-output-change.patch
@ -1,108 +0,0 @@
-From 8a89c59b0c6194afdd6ed1c9bbd949b2cc62aeb0 Mon Sep 17 00:00:00 2001
-From: algorithmofdish <hexiujun1@huawei.com>
-Date: Tue, 29 Nov 2022 17:07:12 +0800
-Subject: [PATCH] refactor(infer): adaptation for abnormal event output change
-
---
- cause_inference/__main__.py    | 25 +++++++++++++++++++++----
- cause_inference/cause_infer.py | 21 ---------------------
- 2 files changed, 21 insertions(+), 25 deletions(-)
-
-diff --git a/cause_inference/__main__.py b/cause_inference/__main__.py
-index 093f7ac..d586b1f 100644
--- a/cause_inference/__main__.py
-+++ b/cause_inference/__main__.py
-@@ -14,7 +14,6 @@ from cause_inference.config import infer_config
- from cause_inference.config import init_infer_config
- from cause_inference.model import AbnormalEvent
- from cause_inference.cause_infer import cause_locating
-from cause_inference.cause_infer import parse_abn_evt
- from cause_inference.cause_infer import preprocess_abn_score
- from cause_inference.rule_parser import rule_engine
- from cause_inference.exceptions import InferenceException
-@@ -164,16 +163,34 @@ def init_obsv_meta_coll_thd():
-     return obsv_meta_coll_thread
- 
- 
-+def parse_abn_evt(data) -> AbnormalEvent:
-+    resource = data.get('Resource', {})
-+    attrs = data.get('Attributes', {})
-+    if not resource.get('metric'):
-+        raise DataParseException('Attribute "Resource.metric" required in abnormal event')
-+    if not attrs.get('entity_id') and not resource.get('labels'):
-+        raise DataParseException('metric_label or entity_id info need in abnormal event')
-+    abn_evt = AbnormalEvent(
-+        timestamp=data.get('Timestamp'),
-+        abnormal_metric_id=resource.get('metric'),
-+        abnormal_score=preprocess_abn_score(resource.get('score', 0.0)),
-+        metric_labels=resource.get('labels'),
-+        abnormal_entity_id=attrs.get('entity_id'),
-+        desc=resource.get('description', '') or data.get('Body', '')
-+    )
-+    return abn_evt
-+
-+
- def get_recommend_metric_evts(abn_kpi_data: dict) -> List[AbnormalEvent]:
-     metric_evts = []
-     obsv_meta_mgt = ObserveMetaMgt()
-    recommend_metrics = abn_kpi_data.get('Resource', {}).get('recommend_metrics', {})
-+    recommend_metrics = abn_kpi_data.get('Resource', {}).get('cause_metrics', {})
-     for metric_data in recommend_metrics:
-         metric_evt = AbnormalEvent(
-             timestamp=abn_kpi_data.get('Timestamp'),
-             abnormal_metric_id=metric_data.get('metric', ''),
-            abnormal_score=preprocess_abn_score(metric_data.get('score')),
-            metric_labels=metric_data.get('label', {}),
-+            abnormal_score=preprocess_abn_score(metric_data.get('score', 0.0)),
-+            metric_labels=metric_data.get('labels', {}),
-             desc=metric_data.get('description', '')
-         )
-         if not metric_evt.update_entity_id(obsv_meta_mgt):
-diff --git a/cause_inference/cause_infer.py b/cause_inference/cause_infer.py
-index dff26d0..6873c1e 100644
--- a/cause_inference/cause_infer.py
-+++ b/cause_inference/cause_infer.py
-@@ -1,7 +1,5 @@
- from typing import List
- 
-from scipy.special import expit
-
- from cause_inference.model import Cause
- from cause_inference.model import CausalGraph
- from cause_inference.model import AbnormalEvent
-@@ -12,7 +10,6 @@ from spider.collector.data_collector import DataCollector
- from spider.collector.prometheus_collector import PrometheusCollector
- from cause_inference.exceptions import InferenceException
- from cause_inference.exceptions import DBException
-from cause_inference.exceptions import DataParseException
- from cause_inference.config import infer_config
- from cause_inference.rule_parser import rule_engine
- from cause_inference.arangodb import connect_to_arangodb
-@@ -68,24 +65,6 @@ def query_abnormal_topo_subgraph(abnormal_event: AbnormalEvent):
-     return subgraph
- 
- 
-def parse_abn_evt(data) -> AbnormalEvent:
-    resource = data.get('Resource', {})
-    attrs = data.get('Attributes', {})
-    if not resource.get('metrics'):
-        raise DataParseException('Atribute "Resource.metrics" required in abnormal event')
-    if not attrs.get('entity_id') and not resource.get('metric_label'):
-        raise DataParseException('metric_label or entity_id info need in abnormal event')
-    abn_evt = AbnormalEvent(
-        timestamp=data.get('Timestamp'),
-        abnormal_metric_id=resource.get('metrics'),
-        abnormal_score=1.0,
-        metric_labels=resource.get('metric_label'),
-        abnormal_entity_id=attrs.get('entity_id'),
-        desc=resource.get('description', '') or data.get('Body', '')
-    )
-    return abn_evt
-
-
- def parse_entity_id(orig_entity_id: str) -> str:
-     fs_idx = orig_entity_id.index('/')
-     return orig_entity_id[fs_idx+1:]
-- 
-2.21.0.windows.1
-
--- a/gala-spider-1.0.0.tar.gz
+++ b/gala-spider-1.0.0.tar.gz
--- a/gala-spider-1.0.1.tar.gz
+++ b/gala-spider-1.0.1.tar.gz
--- a/gala-spider.spec
+++ b/gala-spider.spec
@ -1,8 +1,8 @@
 %define debug_package %{nil}

 Name:           gala-spider
-Version:        1.0.0
-Release:        6
+Version:        1.0.1
+Release:        1
 Summary:        OS topological graph storage service and cause inference service for gala-ops project
 License:        MulanPSL2
 URL:            https://gitee.com/openeuler/gala-spider
@ -11,9 +11,6 @@ Source0:        %{name}-%{version}.tar.gz
 BuildRequires:  python3-setuptools systemd
 Requires:       python3-%{name} = %{version}-%{release}

-patch0:         0001-cause-inference-optimization.patch
-patch1:         0002-adaptation-for-abnormal-event-output-change.patch
-

 %description
 OS topological graph storage service for gala-ops project
@ -114,6 +111,7 @@ fi
 %config(noreplace) %{_sysconfdir}/gala-inference/gala-inference.yaml
 %config(noreplace) %{_sysconfdir}/gala-inference/ext-observe-meta.yaml
 %config(noreplace) %{_sysconfdir}/gala-inference/infer-rule.yaml
+%config(noreplace) %{_sysconfdir}/gala-inference/cause-keyword.yaml
 %{_bindir}/gala-inference
 %{_unitdir}/gala-inference.service

@ -124,6 +122,9 @@ fi


 %changelog
+* Wed Dec 14 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.1-1
+- Update to 1.0.1: support cross host cause location
+
 * Sat Dec 10 2022 algorithmofdish <hexiujun1@huawei.com> - 1.0.0-6
 - Adaptation for abnormal event output change