Source code for secretflow.stats.score_card

# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

import math

import numpy as np

from secretflow.data.vertical import VDataFrame
from secretflow.data.horizontal import HDataFrame
from secretflow.data import FedNdarray


[docs]class ScoreCard: """ The component provides a mapping procedure from binary regression's probability value to an integer range score. The mapping process is as follows: odds = pred / (1 - pred) score = offset + factor * log(odds) The offset and factor in the formula come from the user's settings. Usually users do not directly give offset and factor, but give three constraint parameters: scaled_value: a score baseline odd_base: the odds value at given score baseline pdo: how many scores are needed to double odds The offset and factor can be solved using these three constraint parameters: factor = pdo / log(2) offset = scaled_value - (factor * log(odd_base)) Attributes: odd_base / scaled_value / pdo: see above max_score: up limit for score min_score: down limit for score bad_label_value: which label represents the negative sample """
[docs] def __init__( self, odd_base: float, scaled_value: float, pdo: float, max_score: int = 1000, min_score: int = 0, bad_label_value: int = 0, ): assert odd_base > 0, f"odd_base should be positive, got {odd_base}" assert scaled_value > 0, f"scaled_value should be positive, got {scaled_value}" assert pdo > 0, f"pdo should be positive, got {pdo}" assert ( max_score >= 0 and max_score > scaled_value ), f"max_score should bigger than 0 and scaled_value, got {max_score}" assert ( min_score >= 0 and min_score < scaled_value and scaled_value < max_score ), f"min_score should bigger than 0 but less than scaled_value and max_score, got {min_score}" assert bad_label_value in [ 0, 1, ], f"bad_label_value should be 0 or 1, got {bad_label_value}" self.factor = pdo / math.log(2) self.offset = scaled_value - self.factor * math.log(odd_base) self.max_score = max_score self.min_score = min_score self.bad_label_value = bad_label_value
[docs] def transform(self, pred: Union[FedNdarray, VDataFrame, HDataFrame]) -> FedNdarray: """ computer pvalue for lr model Args: pred : Union[FedNdarray, VDataFrame, HDataFrame] predicted probability from binary regression Return: mapped scores. """ assert isinstance( pred, (FedNdarray, VDataFrame, HDataFrame) ), "pred should be FedNdarray or VDataFrame or HDataFrame" pred = pred if isinstance(pred, FedNdarray) else pred.values shape = pred.shape assert len(shape) == 1 or shape[1] == 1, "pred should be list or 1D array" def score_transform(pred: np.ndarray): assert (pred >= 0).all() and ( pred <= 1 ).all(), f"pred should in [0, 1], but got max pred {pred.max()} and min pred {pred.min()}" if self.bad_label_value == 1: score = self.offset - self.factor * np.log(pred / (1 - pred)) else: score = self.offset + self.factor * np.log(pred / (1 - pred)) score = np.select( [score > self.max_score, score < self.min_score], [self.max_score, self.min_score], score, ) return score return FedNdarray( partitions={ d: d(score_transform)(pred.partitions[d]) for d in pred.partitions }, partition_way=pred.partition_way, )