Source code for secretflow.ml.boost.homo_boost.tree_core.loss_function

# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from sklearn.preprocessing import OneHotEncoder

from secretflow.utils.errors import InvalidArgumentError

LEGAL_OBJ_FUNCTION = [
    "binary:logistic",
    "reg:logistic",
    "multi:softmax",
    "multi:softprob",
    "reg:squarederror",
]


[docs]class LossFunction(object):
    """Inner define for loss functions

    Attributes:
        obj_name: Name of loss function in
            ["binary:logistic",# logistic regression
            "reg:logistic", # logistic regression for binary classification, output probability
            "multi:softmax", # logistic regression for binary classification, output score before logistic transformation
            "multi:softprob", # logistic regression for binary classification, output probability
            "reg:squarederror" # for multi label classification
            ]
    """

[docs]    def __init__(self, obj_name: str):
        self.obj_name = obj_name
        if not self._check_legal(obj_name):
            raise InvalidArgumentError("Illegal loss function params")

    @staticmethod
    def _check_legal(obj_name: str) -> bool:
        if obj_name in LEGAL_OBJ_FUNCTION:
            return True
        else:
            return False

[docs]    def obj_function(self):
        if self.obj_name == "binary:logistic" or self.obj_name == "reg:logistic":
            return self._reg_logistic
        if self.obj_name == "multi:softmax" or self.obj_name == "multi:softprob":
            return self._softmaxobj
        if self.obj_name == "reg:squarederror":
            return self._reg_squared

    def _reg_logistic(self, preds, dtrain):
        """logistic objective.
        Args:
            preds: (N, 1) array, N = #data, K = #classes.
            dtrain: DMatrix object with training data.

        Returns:
            grad: N*1 array with gradient values.
            hess: N*1 array with second-order gradient values.
        """
        labels = dtrain.get_label()
        preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
        grad = (preds - labels).astype(np.float64)
        hess = (preds * (1.0 - preds)).astype(np.float64)
        return grad, hess

    def _reg_squared(self, preds, dtrain):
        """Squared loss objective.
        Args:
            preds: (N, 1) array, N = #data, K = #classes.
            dtrain: DMatrix object with training data.

        Returns:
            grad: N*1 array with gradient values.
            hess: N*1 array with second-order gradient values.
        """
        labels = dtrain.get_label()
        grad = (-2 * (labels - preds)).astype(np.float64)
        hess = (np.ones_like(labels) * 2).astype(np.float64)
        return grad, hess

    def _softmaxobj(self, preds, dtrain):
        """Softmax objective.
        Args:
            preds: (N, K) array, N = #data, K = #classes.
            dtrain: DMatrix object with training data.

        Returns:
            grad: N*K array with gradient values.
            hess: N*K array with second-order gradient values.
        """

        def _softmax(x):
            '''Softmax function with x as input vector.'''
            e = np.exp(x)
            return e / np.sum(e, axis=1).reshape(-1, 1)

        # Label is a vector of class indices for each input example
        labels = dtrain.get_label()
        # When objective=softprob, preds has shape (N, K)
        labels = OneHotEncoder(sparse=False).fit_transform(labels.reshape(-1, 1))
        prob = _softmax(preds)
        grad = (prob - labels).astype(np.float64)
        hess = (2.0 * prob * (1.0 - prob)).astype(np.float64)
        # Return as n-m metrics
        return grad, hess