Source code for abed.results.cv_tt

"""

Functions for making result tables specifically for the CV_TT type of
experiments.

In CV_TT experiments, the following is done

1. Labels 'y_train' are expected to exist, with 'y_train_true' and
   'y_train_pred' columns. This must be the true y in the first column, and in 
   the second column the predicted values of y when these indices were in the 
   hold-out fold of cross validation.

2. Labels 'y_test' are expected to exist, with 'y_test_true' and 'y_test_pred'.
   These columns are the true and predicted values of y on the test dataset. 
   Predicted values should (theoretically) be made by training the model on the 
   full training dataset, and predicting the test dataset.

3. Tables are created for each possible metric/metric combination of the metrics
   in the configuration file.  In the tables, each method is given in a single 
   column. In each cell, the performance on the test dataset as measured by the 
   second metric is shown for the parameter configuration for which the 
   performance on the first metric is optimal. This is done for all metric 
   targets other than 'y_train'.

"""

from itertools import product

from .models import AbedTable, AbedTableTypes
from .ranks import make_rank_table
from .tables import make_tables_scalar
from ..conf import settings
from ..io import info
from ..progress import enum_progress


[docs]def filter_targets(targets):
    for target in targets:
        if target.startswith(settings.YTRAIN_LABEL):
            continue
        yield target


[docs]def cvtt_tables(abed_cache):
    tables = []
    for target in filter_targets(abed_cache.metric_targets):
        for m1, m2 in product(abed_cache.metrics, abed_cache.metrics):
            tables.extend(cvtt_make_tables_metric(abed_cache, m1, m2, target))
    for scalar in abed_cache.scalars:
        tables.extend(make_tables_scalar(abed_cache, scalar))
    return tables


[docs]def cvtt_make_tables_metric(abed_cache, train_metric, test_metric, target):
    table = cvtt_build_tables_metric(
        abed_cache, train_metric, test_metric, target
    )
    table.higher_better = (
        True if settings.METRICS[test_metric]["best"] == max else False
    )
    table.type = AbedTableTypes.VALUES
    table.desc = "Training metric: %s, testing metric: %s" % (
        train_metric,
        test_metric,
    )
    table.name = "%s_%s" % (train_metric, test_metric)
    table.target = target
    table.is_metric = True
    table.trainmetricname = train_metric
    table.testmetricname = test_metric
    ranktable = make_rank_table(table)
    return [table, ranktable]


[docs]def cvtt_build_tables_metric(abed_cache, train_metric, test_metric, target):
    table = AbedTable()
    table.headers = ["ID"] + sorted(abed_cache.methods)
    info(
        "Generating tables for train metric %s, test metric %s, target %s"
        % (train_metric, test_metric, target)
    )
    for i, dset in enum_progress(sorted(abed_cache.datasets), label="Tables"):
        row = []
        for j, method in enumerate(sorted(abed_cache.methods)):
            results = list(abed_cache.iter_results_dm(dset, method))
            values = [
                r.get_result(settings.YTRAIN_LABEL, metric=train_metric)
                for r in results
            ]
            if not values:
                row.append("NaN")
                continue
            best_value = settings.METRICS[train_metric]["best"](values)
            best_results = [
                r
                for r in results
                if r.get_result(settings.YTRAIN_LABEL, metric=train_metric)
                == best_value
            ]
            target_values = [
                r.get_result(target, metric=test_metric) for r in best_results
            ]
            target_best = settings.METRICS[test_metric]["best"](target_values)
            rounded = round(target_best, settings.RESULT_PRECISION)
            fmt = "%%.%df" % settings.RESULT_PRECISION
            row.append(fmt % rounded)
        table.add_row(dset, row)
    return table
Source code for abed.results.cv_tt

abed

Navigation

Related Topics