Source code for abed.results.significance

"""
Functions for evaluating statistically significant differences between 
methods.
"""

from scipy.stats import f as f_dist
from scipy.stats import norm as norm_dist
from math import sqrt

from .models import AbedTableTypes
from ..conf import settings
from ..io import error


[docs]def global_difference(table): """ Runs and F-test on the ranks. """ if (not table.is_summary) or (table.type != AbedTableTypes.RANKS): return None N = float(len(settings.DATASETS)) k = float(len(settings.METHODS)) averages = next((row for _id, row in table if _id == "Average"), None) av_sq = sum([pow(float(x), 2.0) for x in averages]) chi2 = 12.0 * N / (k * (k + 1)) * (av_sq - (k * pow(k + 1, 2.0) / 4.0)) # this can happen when the ordering of methods is always the same try: Fstat = (N - 1.0) * chi2 / (N * (k - 1) - chi2) except ZeroDivisionError: Fstat = float("inf") Fprob = 1.0 - f_dist.cdf(Fstat, k - 1, (k - 1) * (N - 1)) return Fstat, Fprob
[docs]def reference_difference(table): """ Runs Holm's procedure for a reference classifier. """ # Sanity checks if settings.REFERENCE_METHOD is None: return None if (not table.is_summary) or (table.type != AbedTableTypes.RANKS): return None if not settings.REFERENCE_METHOD in settings.METHODS: error( "Reference method %s not in list of methods." % settings.REFERENCE_METHOD ) raise SystemExit # define constants N = float(len(settings.DATASETS)) k = float(len(settings.METHODS)) av_ranks = next((row for _id, row in table if _id == "Average"), None) av_ranks = [float(x) for x in av_ranks] ref_idx = settings.METHODS.index(settings.REFERENCE_METHOD) others = [ m for m in settings.METHODS if not m == settings.REFERENCE_METHOD ] # Calculate the Z-scores compared to the reference method Z_scores = [0] * len(others) P_values = [0] * len(others) constant = sqrt((6.0 * N) / (k * (k + 1.0))) for j, method in enumerate(others): i = settings.METHODS.index(method) Z_scores[j] = (av_ranks[ref_idx] - av_ranks[i]) * constant P_values[j] = norm_dist.cdf(Z_scores[j]) # Sort the p-values in ascending order sorted_pvals = sorted((p, i) for i, p in enumerate(P_values)) # Calculate significant differences following Holm's procedure significant_differences = [False] * len(others) CD_threshold = None for i in range(int(k - 1)): threshold = settings.SIGNIFICANCE_LEVEL / float(k - (i + 1)) pval, idx = sorted_pvals[i] significant_differences[idx] = pval < threshold if pval > threshold and CD_threshold is None: CD_threshold = threshold CD = -1 * norm_dist.ppf(CD_threshold) / constant out = list(zip(others, Z_scores, P_values, significant_differences)) return out, CD