Source code for minvime.estimator_classification

""" 
    Estimator
    Functions for estimating model performance requirements from business criteria
"""
import numpy as np
import math

######################################################################

[docs]def estimate_binary_model_requirements(tp, fp, tn, fn, cases, baserate, minroi=0): """ Determine the minimal performance characteristics of a binary classification model :param tp: The benefit of a True Positive Prediction :type tp: float, required :param fp: The cost of a False Positive Prediction :type fp: float, required :param tn: The benefit of a True Negative Prediction :type tn: float, required :param fn: The cost of a False Negative Prediction :type fn: float, required :param cases: The number of events/cases that occur within the period of analysis. :type cases: integer, required :param baserate: The rate at which the event being predicted occurs :type baserate: float, required :param minroi: The minimum required ROI for the model, defaults to 0.0 :type minroi: float, optional *Returns a tuple containing min_auc, min_precision, min_recall, fprates tprs """ beta_range = [2, 3, 4, 5, 6, 8, 10, 12, 14, 16, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100 ] alpha_range = [0.01, 0.03, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.93, 0.95, 0.97, 0.99] fprates = [0.0, 0.00001, 0.0001, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01, 0.015, 0.02,0.025, 0.03, 0.035, 0.04, 0.045, 0.05, 0.055, 0.06, 0.065, 0.07, 0.075, 0.08, 0.09, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 0.999, 0.9999, 1.0] # PLACEHOLDERS FOR THE RETURN VALUES min_auc = 1.0 min_precision = 1.0 min_recall = 1.0 tprs = np.array([1.0 for x in fprates]) current_min_roi = 9999999999999 num_pos = cases * baserate num_neg = cases - num_pos # ############################################# # Iterate over all combinations of exponent and # alpha mix variables so that we can simulate a # range of ROC curves. ############################################### combinations = 0 for b_i in range( len(beta_range) ): beta = beta_range[b_i] for alpha_i in range( len(alpha_range) ): alpha = alpha_range[alpha_i] auc, x, y = generate_roc_auc(fprates, alpha, beta) roi, precision, recall = calculate_peak_roi( fprates, y, tp, fp, tn, fn, num_pos, num_neg ) if (auc <= min_auc) & (roi >= minroi): min_auc = auc min_precision = precision min_recall = recall current_min_roi = roi tprs = y combinations = combinations + 1 #print("Tested ", combinations, " different AUC plots") #print("Number of Exponents", len(beta_range)) #print("Number of Alpha Weights", len(alpha_range)) return min_auc, min_precision, min_recall, np.array(fprates), tprs
######################################################################
[docs]def generate_roc_auc(fprates, alpha, beta): formula = 'alpha*(-(x-1)**(2*beta)+1)+(1-alpha)*x' x = np.array(fprates) y = eval(formula) auc = calculate_auc(x, y) return auc, x, y
######################################################################
[docs]def calculate_peak_roi(fprates, tprates, tp, fp, tn, fn, num_pos, num_neg): """ Calculate the maximal ROI for a given ROC curve (defined by vectors of FPR and TPR) """ roi = -99999999999999 result_precision = 0.0 result_recall = 0.0 for index in range(len(fprates)): false_positive_rate = fprates[index] true_positive_rate = tprates[index] temp = (num_neg * (1-false_positive_rate) * tn) +\ (num_neg * false_positive_rate * fp ) +\ (num_pos * true_positive_rate * tp ) +\ (num_pos * (1-true_positive_rate) * fn) if temp>roi: roi = temp tps = num_pos * true_positive_rate fps = num_neg * false_positive_rate if (tps+fps) > 0: result_precision = tps/(tps+fps) else: result_precision = 0 result_recall = true_positive_rate return roi,result_precision,result_recall
######################################################################
[docs]def calculate_auc(fprates, tprates): """ Calculate the AUC of given ROC curve (defined by vectors of FPR and TPR) """ curr_fprate = fprates[0] curr_tprate = tprates[0] area = 0 for index in range( 1, len(fprates) ): next_fprate = fprates[index] next_tprate = tprates[index] x_delta = next_fprate - curr_fprate y_mean = (curr_tprate + next_tprate)/2 area = area + (x_delta*y_mean) curr_fprate = fprates[index] curr_tprate = tprates[index] return area
########################################################################
[docs]def estimate_intervention_requirements(cases, baserate, cost, payoff, payback, succrate, backfire): if cost < 0: cost = 0 - cost tp = payoff * succrate - cost fp = payback * backfire - cost tn = 0 fn = 0 return tp, fp, tn, fn
########################################################################
[docs]def simplicity_estimate(tp, fp, cases, baserate, minroi=0): minp = minroi / tp fp = 0 - fp # THE COST IS COLLECTED AS A NEGATIVE NUMBER FROM THE APPLICATION p = round(cases * baserate) n = cases - p if p <= minp: return 0.0 total_area = (p*n) tri_y = (p-minp) tri_x = (tp/fp)*(p-minp) temp = nth_triangle(tri_y) if tri_x > n: temp = temp - nth_triangle(tri_x-n) return temp / total_area
[docs]def nth_triangle(n): return (math.pow(n,2) + n)/2
########################################################################