Source code for secretflow.stats.biclassification_eval
# Copyright 2022 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is a wrapper of evaluation functions
from typing import Union
from secretflow.data import FedNdarray
from secretflow.data.vertical import VDataFrame
from secretflow.device import PYUObject
from .core import gen_biclassification_reports
# BiClassification Report is different from Regression Evaluation in that many binning related statistics
# are computed in a sequential batch processing manner, rather than independent evaluations
# on the whole of y_true and y_score
# TODO: HDataFrame, VDataFrame and SPU support in future
[docs]class BiClassificationEval:
"""Statistics Evaluation for a bi-classification model on a dataset.
Attribute:
y_true: Union[FedNdarray, VDataFrame]
input of labels
y_score: Union[FedNdarray, VDataFrame]
input of prediction scores
bucket_size: int
input of number of bins in report
"""
# binning, n true positive and false positive sequence calculations all require sorting
# which is not supported by spu yet
# all implementations related to bi-classification uses single-party implementation only
[docs] def __init__(
self,
y_true: Union[FedNdarray, VDataFrame],
y_score: Union[FedNdarray, VDataFrame],
bucket_size: int,
):
assert isinstance(
y_true, (FedNdarray, VDataFrame)
), "y_true should be FedNdarray or VDataFrame"
assert isinstance(
y_score, (FedNdarray, VDataFrame)
), "y_score should be FedNdarray or VDataFrame"
# for now we only consider vertical splitting case
# y_true and y_score belongs to the same and single party
assert (
y_true.shape == y_score.shape
), "y_true and y_score should have the same shapes"
assert (
y_true.shape[1] == 1
), "y_true must be a single column, reshape before proceed"
assert len(y_true.partitions) == len(
y_score.partitions
), "y_true and y_score should have the same partitions"
assert len(y_score.partitions) == 1, "y_score should have one partition"
device1 = [*y_score.partitions.keys()][0]
device2 = [*y_true.partitions.keys()][0]
assert (
device1 == device2
), "Currently require the device for two inputs are the same"
# Later may use spu
self.device = device1
if isinstance(y_true, FedNdarray):
self.y_true = [*y_true.partitions.values()][0]
else:
self.y_true = ([*y_true.partitions.values()][0]).data
if isinstance(y_score, FedNdarray):
self.y_score = [*y_score.partitions.values()][0]
else:
self.y_score = ([*y_score.partitions.values()][0]).data
self.bucket_size = bucket_size
[docs] def get_all_reports(self) -> PYUObject:
"""get all reports. The reports contains:
summary_report: SummaryReport
group_reports: List[GroupReport]
eq_frequent_bin_report: List[EqBinReport]
eq_range_bin_report: List[EqBinReport]
head_report: List[PrReport]
reports for fpr = 0.001, 0.005, 0.01, 0.05, 0.1, 0.2
see more in core.biclassification_eval_core
"""
# possible spu launch and reveal in the future
return self.device(gen_biclassification_reports)(
self.y_true, self.y_score, self.bucket_size
)