EVOLUTION-MANAGER
Edit File: boosted_trees_utils.py
# Copyright 2018 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Debug and model explainability logic for boosted trees.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import collections import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 # For directional feature contributions. _DEBUG_PROTO_KEY = '_serialized_debug_outputs_proto' _BIAS_ID = 0 def _parse_debug_proto_string(example_proto_serialized): example_debug_outputs = boosted_trees_pb2.DebugOutput() example_debug_outputs.ParseFromString(example_proto_serialized) feature_ids = example_debug_outputs.feature_ids logits_path = example_debug_outputs.logits_path return feature_ids, logits_path def _compute_directional_feature_contributions(example_feature_ids, example_logits_paths, activation, feature_col_names): """Directional feature contributions and bias, per example.""" # Initialize contributions to 0. num_features = len(feature_col_names) # Traverse tree subtracting child prediction from parent prediction and # associating change with feature id used to split. predictions = np.array(activation(example_logits_paths)) delta_pred = predictions[_BIAS_ID + 1:] - predictions[:-1] # Group by feature id, then sum delta_pred. contribs = np.bincount( example_feature_ids, weights=delta_pred, minlength=num_features) dfcs = {} for f, dfc in zip(range(num_features), contribs): dfcs[f] = dfcs.setdefault(f, 0) + dfc dfcs = _sum_by_feature_col_name_and_sort(feature_col_names, contribs) return predictions[_BIAS_ID], dfcs def _identity(logits): return logits def _sigmoid(logits): # TODO(crawles): Change to softmax once multiclass support is available. return 1 / (1 + np.exp(-np.array(logits))) def _parse_explanations_from_prediction(serialized_debug_proto, feature_col_names, classification=False): """Parse serialized explanability proto, compute dfc, and return bias, dfc.""" example_feature_ids, example_logits_path = _parse_debug_proto_string( serialized_debug_proto) if classification: activation = _sigmoid else: activation = _identity bias, dfcs = _compute_directional_feature_contributions( example_feature_ids, example_logits_path, activation, feature_col_names) # TODO(crawles): Prediction path and leaf IDs. return bias, dfcs def _sum_by_feature_col_name_and_sort(names, vals): """Group by feature column names, sum values, and sort by absolute value.""" sum_by_dict = {} # Groupby and sum. for name, val in zip(names, vals): sum_by_dict[name] = sum_by_dict.setdefault(name, 0) + val # Then sort. sorted_sum_by = sorted( sum_by_dict.items(), key=lambda tup: abs(tup[1]), reverse=True) return collections.OrderedDict(sorted_sum_by)