Source code for secretflow.ml.boost.homo_boost.tree_core.loss_function
# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from secretflow.utils.errors import InvalidArgumentError
LEGAL_OBJ_FUNCTION = [
"binary:logistic",
"reg:logistic",
"multi:softmax",
"multi:softprob",
"reg:squarederror",
]
[docs]class LossFunction(object):
"""Inner define for loss functions
Attributes:
obj_name: Name of loss function in
["binary:logistic",# logistic regression
"reg:logistic", # logistic regression for binary classification, output probability
"multi:softmax", # logistic regression for binary classification, output score before logistic transformation
"multi:softprob", # logistic regression for binary classification, output probability
"reg:squarederror" # for multi label classification
]
"""
[docs] def __init__(self, obj_name: str):
self.obj_name = obj_name
if not self._check_legal(obj_name):
raise InvalidArgumentError("Illegal loss function params")
@staticmethod
def _check_legal(obj_name: str) -> bool:
if obj_name in LEGAL_OBJ_FUNCTION:
return True
else:
return False
[docs] def obj_function(self):
if self.obj_name == "binary:logistic" or self.obj_name == "reg:logistic":
return self._reg_logistic
if self.obj_name == "multi:softmax" or self.obj_name == "multi:softprob":
return self._softmaxobj
if self.obj_name == "reg:squarederror":
return self._reg_squared
def _reg_logistic(self, preds, dtrain):
"""logistic objective.
Args:
preds: (N, 1) array, N = #data, K = #classes.
dtrain: DMatrix object with training data.
Returns:
grad: N*1 array with gradient values.
hess: N*1 array with second-order gradient values.
"""
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight
grad = (preds - labels).astype(np.float64)
hess = (preds * (1.0 - preds)).astype(np.float64)
return grad, hess
def _reg_squared(self, preds, dtrain):
"""Squared loss objective.
Args:
preds: (N, 1) array, N = #data, K = #classes.
dtrain: DMatrix object with training data.
Returns:
grad: N*1 array with gradient values.
hess: N*1 array with second-order gradient values.
"""
labels = dtrain.get_label()
grad = (-2 * (labels - preds)).astype(np.float64)
hess = (np.ones_like(labels) * 2).astype(np.float64)
return grad, hess
def _softmaxobj(self, preds, dtrain):
"""Softmax objective.
Args:
preds: (N, K) array, N = #data, K = #classes.
dtrain: DMatrix object with training data.
Returns:
grad: N*K array with gradient values.
hess: N*K array with second-order gradient values.
"""
def _softmax(x):
'''Softmax function with x as input vector.'''
e = np.exp(x)
return e / np.sum(e, axis=1).reshape(-1, 1)
# Label is a vector of class indices for each input example
labels = dtrain.get_label()
# When objective=softprob, preds has shape (N, K)
labels = OneHotEncoder(sparse=False).fit_transform(labels.reshape(-1, 1))
prob = _softmax(preds)
grad = (prob - labels).astype(np.float64)
hess = (2.0 * prob * (1.0 - prob)).astype(np.float64)
# Return as n-m metrics
return grad, hess