Source code for omlt.gbt.gbt_formulation

import collections

import numpy as np
import pyomo.environ as pe

from omlt.formulation import _PyomoFormulation, _setup_scaled_inputs_outputs
from omlt.gbt.model import GradientBoostedTreeModel


[docs]class GBTBigMFormulation(_PyomoFormulation): """ This class is the entry-point to build gradient-boosted trees formulations. This class iterates over all trees in the ensemble and generates constraints to enforce splitting rules according to: References ---------- * Misic, V. "Optimization of tree ensembles." Operations Research 68.5 (2020): 1605-1624. * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with gradient-boosted trees embedded." INFORMS Journal on Computing (2020). Parameters ---------- tree_ensemble_structure : GradientBoostedTreeModel the tree ensemble definition """ def __init__(self, gbt_model): super().__init__() self.model_definition = gbt_model @property def input_indexes(self): """The indexes of the formulation inputs.""" return list(range(self.model_definition.n_inputs)) @property def output_indexes(self): """The indexes of the formulation output.""" return list(range(self.model_definition.n_outputs)) def _build_formulation(self): """This method is called by the OmltBlock to build the corresponding mathematical formulation on the Pyomo block. """ _setup_scaled_inputs_outputs( self.block, self.model_definition.scaling_object, self.model_definition.scaled_input_bounds, ) add_formulation_to_block( block=self.block, model_definition=self.model_definition, input_vars=self.block.scaled_inputs, output_vars=self.block.scaled_outputs, )
[docs]def add_formulation_to_block(block, model_definition, input_vars, output_vars): r""" Adds the gradient-boosted trees formulation to the given Pyomo block. .. math:: \begin{align*} \hat{\mu} &= \sum\limits_{t \in T} \sum\limits_{l \in {L_t}} F_{t,l} z_{t,l}, && \\ \sum\limits_{l \in L_t} z_{t,l} &= 1, && \forall t \in T, \\ \sum\limits_{l \in \text{Left}_{t,s}} z_{t,l} &\leq y_{i(s),j(s)}, && \forall t \in T, \forall s \in V_t, \\ \sum\limits_{l \in \text{Right}_{t,s}} z_{t,l} &\leq 1 - y_{i(s),j(s)}, && \forall t \in T, \forall s \in V_t, \\ y_{i,j} &\leq y_{i,j+1}, && \forall i \in \left [ n \right ], \forall j \in \left [ m_i - 1 \right ], \\ x_{i} &\geq v_{i,0} + \sum\limits_{j=1}^{m_i} \left (v_{i,j} - v_{i,j-1} \right ) \left ( 1 - y_{i,j} \right ), && \forall i \in \left [ n \right ], \\ x_{i} &\leq v_{i,m_i+1} + \sum\limits_{j=1}^{m_i} \left (v_{i,j} - v_{i,j+1} \right ) y_{i,j}, && \forall i \in \left [ n \right ]. \\ \end{align*} References ---------- * Misic, V. "Optimization of tree ensembles." Operations Research 68.5 (2020): 1605-1624. * Mistry, M., et al. "Mixed-integer convex nonlinear optimization with gradient-boosted trees embedded." INFORMS Journal on Computing (2020). Parameters ---------- block : Block the Pyomo block tree_ensemble_structure : GradientBoostedTreeModel the tree ensemble definition input_vars : Var the input variables of the Pyomo block output_vars : Var the output variables of the Pyomo block """ if isinstance(model_definition, GradientBoostedTreeModel): gbt = model_definition.onnx_model else: gbt = model_definition graph = gbt.graph root_node = graph.node[0] attr = _node_attributes(root_node) # base_values don't apply to lgbm models base_value = ( np.array(attr["base_values"].floats)[0] if "base_values" in attr else 0.0 ) nodes_feature_ids = np.array(attr["nodes_featureids"].ints) nodes_values = np.array(attr["nodes_values"].floats) nodes_modes = np.array(attr["nodes_modes"].strings) nodes_tree_ids = np.array(attr["nodes_treeids"].ints) nodes_node_ids = np.array(attr["nodes_nodeids"].ints) nodes_false_node_ids = np.array(attr["nodes_falsenodeids"].ints) nodes_true_node_ids = np.array(attr["nodes_truenodeids"].ints) nodes_hitrates = np.array(attr["nodes_hitrates"].floats) nodes_missing_value_tracks_true = np.array( attr["nodes_missing_value_tracks_true"].ints ) n_targets = attr["n_targets"].i target_ids = np.array(attr["target_ids"].ints) target_node_ids = np.array(attr["target_nodeids"].ints) target_tree_ids = np.array(attr["target_treeids"].ints) target_weights = np.array(attr["target_weights"].floats) # Compute derived data nodes_leaf_mask = nodes_modes == b"LEAF" nodes_branch_mask = nodes_modes == b"BRANCH_LEQ" tree_ids = set(nodes_tree_ids) feature_ids = set(nodes_feature_ids) continuous_vars = dict() for var_idx in input_vars: var = input_vars[var_idx] continuous_vars[var_idx] = var block.z_l = pe.Var( list(zip(nodes_tree_ids[nodes_leaf_mask], nodes_node_ids[nodes_leaf_mask])), bounds=(0, None), domain=pe.Reals, ) branch_value_by_feature_id = dict() branch_value_by_feature_id = collections.defaultdict(list) for f in feature_ids: nodes_feature_mask = nodes_feature_ids == f branch_values = nodes_values[nodes_feature_mask & nodes_branch_mask] branch_value_by_feature_id[f] = np.unique(np.sort(branch_values)) y_index = [ (f, bi) for f in continuous_vars.keys() for bi, _ in enumerate(branch_value_by_feature_id[f]) ] block.y = pe.Var(y_index, domain=pe.Binary) @block.Constraint(tree_ids) def single_leaf(b, tree_id): r""" Add constraint to ensure that only one leaf per tree is active, Mistry et al. Equ. (3b). .. math:: \begin{align*} \sum\limits_{l \in L_t} z_{t,l} &= 1, && \forall t \in T \end{align*} """ tree_mask = nodes_tree_ids == tree_id return ( sum( b.z_l[tree_id, node_id] for node_id in nodes_node_ids[nodes_leaf_mask & tree_mask] ) == 1 ) nodes_tree_branch_ids = [ (t, b) for t in tree_ids for b in nodes_node_ids[(nodes_tree_ids == t) & nodes_branch_mask] ] def _branching_y(tree_id, branch_node_id): node_mask = (nodes_tree_ids == tree_id) & (nodes_node_ids == branch_node_id) feature_id = nodes_feature_ids[node_mask] branch_value = nodes_values[node_mask] if len(branch_value) != 1: raise ValueError( f"The given tree_id and branch_node_id do not uniquely identify a branch value." ) if len(feature_id) != 1: raise ValueError( f"The given tree_id and branch_node_id do not uniquely identify a feature." ) feature_id = feature_id[0] branch_value = branch_value[0] (branch_y_idx,) = np.where( branch_value_by_feature_id[feature_id] == branch_value ) if len(branch_y_idx) != 1: raise ValueError( f"The given tree_id and branch_node_id do not uniquely identify a branch index." ) return block.y[feature_id, branch_y_idx[0]] def _sum_of_z_l(tree_id, start_node_id): tree_mask = nodes_tree_ids == tree_id local_false_node_ids = nodes_false_node_ids[tree_mask] local_true_node_ids = nodes_true_node_ids[tree_mask] local_mode = nodes_modes[tree_mask] visit_queue = [start_node_id] sum_of_z_l = 0.0 while visit_queue: node_id = visit_queue.pop() if local_mode[node_id] == b"LEAF": sum_of_z_l += block.z_l[tree_id, node_id] else: # add left and right child to list of nodes to visit visit_queue.append(local_false_node_ids[node_id]) visit_queue.append(local_true_node_ids[node_id]) return sum_of_z_l @block.Constraint(nodes_tree_branch_ids) def left_split(b, tree_id, branch_node_id): r""" Add constraint to activate all left splits leading to an active leaf, Mistry et al. Equ. (3c). .. math:: \begin{align*} \sum\limits_{l \in \text{Left}_{t,s}} z_{t,l} &\leq y_{i(s),j(s)}, && \forall t \in T, \forall s \in V_t \end{align*} """ node_mask = (nodes_tree_ids == tree_id) & (nodes_node_ids == branch_node_id) y = _branching_y(tree_id, branch_node_id) subtree_root = nodes_true_node_ids[node_mask][0] return _sum_of_z_l(tree_id, subtree_root) <= y @block.Constraint(nodes_tree_branch_ids) def right_split(b, tree_id, branch_node_id): r""" Add constraint to activate all right splits leading to an active leaf, Mistry et al. Equ. (3d). .. math:: \begin{align*} \sum\limits_{l \in \text{Right}_{t,s}} z_{t,l} &\leq 1 - y_{i(s),j(s)}, && \forall t \in T, \forall s \in V_t \end{align*} """ node_mask = (nodes_tree_ids == tree_id) & (nodes_node_ids == branch_node_id) y = _branching_y(tree_id, branch_node_id) subtree_root = nodes_false_node_ids[node_mask][0] return _sum_of_z_l(tree_id, subtree_root) <= 1 - y @block.Constraint(y_index) def order_y(b, feature_id, branch_y_idx): r""" Add constraint to activate splits in the correct order. Mistry et al. Equ. (3e). .. math:: \begin{align*} y_{i,j} &\leq y_{i,j+1}, && \forall i \in \left [ n \right ], \forall j \in \left [ m_i - 1 \right ] \end{align*} """ branch_values = branch_value_by_feature_id[feature_id] if branch_y_idx >= len(branch_values) - 1: return pe.Constraint.Skip return b.y[feature_id, branch_y_idx] <= b.y[feature_id, branch_y_idx + 1] @block.Constraint(y_index) def var_lower(b, feature_id, branch_y_idx): r""" Add constraint to link discrete tree splits to lower bound of continuous variables. Mistry et al. Equ. (4a). .. math:: \begin{align*} x_{i} &\geq v_{i,0} + \sum\limits_{j=1}^{m_i} \left (v_{i,j} - v_{i,j-1} \right ) \left ( 1 - y_{i,j} \right ), && \forall i \in \left [ n \right ] \end{align*} """ x = input_vars[feature_id] if x.lb is None: return pe.Constraint.Skip branch_value = branch_value_by_feature_id[feature_id][branch_y_idx] return x >= x.lb + (branch_value - x.lb) * (1 - b.y[feature_id, branch_y_idx]) @block.Constraint(y_index) def var_upper(b, feature_id, branch_y_idx): r""" Add constraint to link discrete tree splits to upper bound of continuous variables. Mistry et al. Equ. (4b). .. math:: \begin{align*} x_{i} &\leq v_{i,m_i+1} + \sum\limits_{j=1}^{m_i} \left (v_{i,j} - v_{i,j+1} \right ) y_{i,j}, && \forall i \in \left [ n \right ] \end{align*} """ x = input_vars[feature_id] if x.ub is None: return pe.Constraint.Skip branch_value = branch_value_by_feature_id[feature_id][branch_y_idx] return x <= x.ub + (branch_value - x.ub) * b.y[feature_id, branch_y_idx] @block.Constraint() def tree_mean_value(b): r""" Add constraint to link block output tree model mean. Mistry et al. Equ. (3a). .. math:: \begin{align*} \hat{\mu} &= \sum\limits_{t \in T} \sum\limits_{l \in {L_t}} F_{t,l} z_{t,l} \end{align*} """ return ( output_vars[0] == sum( weight * b.z_l[tree_id, node_id] for tree_id, node_id, weight in zip( target_tree_ids, target_node_ids, target_weights ) ) + base_value )
def _node_attributes(node): attr = dict() for at in node.attribute: attr[at.name] = at return attr