Source code for secretflow.stats.biclassification_eval

# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This is a wrapper of evaluation functions
from typing import Union

from secretflow.data import FedNdarray
from secretflow.data.vertical import VDataFrame
from secretflow.device import PYUObject

from .core import gen_biclassification_reports

# BiClassification Report is different from Regression Evaluation in that many binning related statistics
# are computed in a sequential batch processing manner, rather than independent evaluations
# on the whole of y_true and y_score

# TODO: HDataFrame, VDataFrame and SPU support in future


[docs]class BiClassificationEval: """Statistics Evaluation for a bi-classification model on a dataset. Attribute: y_true: Union[FedNdarray, VDataFrame] input of labels y_score: Union[FedNdarray, VDataFrame] input of prediction scores bucket_size: int input of number of bins in report """ # binning, n true positive and false positive sequence calculations all require sorting # which is not supported by spu yet # all implementations related to bi-classification uses single-party implementation only
[docs] def __init__( self, y_true: Union[FedNdarray, VDataFrame], y_score: Union[FedNdarray, VDataFrame], bucket_size: int, ): assert isinstance( y_true, (FedNdarray, VDataFrame) ), "y_true should be FedNdarray or VDataFrame" assert isinstance( y_score, (FedNdarray, VDataFrame) ), "y_score should be FedNdarray or VDataFrame" # for now we only consider vertical splitting case # y_true and y_score belongs to the same and single party assert ( y_true.shape == y_score.shape ), "y_true and y_score should have the same shapes" assert ( y_true.shape[1] == 1 ), "y_true must be a single column, reshape before proceed" assert len(y_true.partitions) == len( y_score.partitions ), "y_true and y_score should have the same partitions" assert len(y_score.partitions) == 1, "y_score should have one partition" device1 = [*y_score.partitions.keys()][0] device2 = [*y_true.partitions.keys()][0] assert ( device1 == device2 ), "Currently require the device for two inputs are the same" # Later may use spu self.device = device1 if isinstance(y_true, FedNdarray): self.y_true = [*y_true.partitions.values()][0] else: self.y_true = ([*y_true.partitions.values()][0]).data if isinstance(y_score, FedNdarray): self.y_score = [*y_score.partitions.values()][0] else: self.y_score = ([*y_score.partitions.values()][0]).data self.bucket_size = bucket_size
[docs] def get_all_reports(self) -> PYUObject: """get all reports. The reports contains: summary_report: SummaryReport group_reports: List[GroupReport] eq_frequent_bin_report: List[EqBinReport] eq_range_bin_report: List[EqBinReport] head_report: List[PrReport] reports for fpr = 0.001, 0.005, 0.01, 0.05, 0.1, 0.2 see more in core.biclassification_eval_core """ # possible spu launch and reveal in the future return self.device(gen_biclassification_reports)( self.y_true, self.y_score, self.bucket_size )