Source code for abed.results.cv_tt

"""

Functions for making result tables specifically for the CV_TT type of
experiments.

In CV_TT experiments, the following is done

1. Labels 'y_train' are expected to exist, with 'y_train_true' and
   'y_train_pred' columns. This must be the true y in the first column, and in 
   the second column the predicted values of y when these indices were in the 
   hold-out fold of cross validation.

2. Labels 'y_test' are expected to exist, with 'y_test_true' and 'y_test_pred'.
   These columns are the true and predicted values of y on the test dataset. 
   Predicted values should (theoretically) be made by training the model on the 
   full training dataset, and predicting the test dataset.

3. Tables are created for each possible metric/metric combination of the metrics
   in the configuration file.  In the tables, each method is given in a single 
   column. In each cell, the performance on the test dataset as measured by the 
   second metric is shown for the parameter configuration for which the 
   performance on the first metric is optimal. This is done for all metric 
   targets other than 'y_train'.

"""

from itertools import product

from .models import AbedTable, AbedTableTypes
from .ranks import make_rank_table
from .tables import make_tables_scalar
from ..conf import settings
from ..io import info
from ..progress import enum_progress


[docs]def filter_targets(targets): for target in targets: if target.startswith(settings.YTRAIN_LABEL): continue yield target
[docs]def cvtt_tables(abed_cache): tables = [] for target in filter_targets(abed_cache.metric_targets): for m1, m2 in product(abed_cache.metrics, abed_cache.metrics): tables.extend(cvtt_make_tables_metric(abed_cache, m1, m2, target)) for scalar in abed_cache.scalars: tables.extend(make_tables_scalar(abed_cache, scalar)) return tables
[docs]def cvtt_make_tables_metric(abed_cache, train_metric, test_metric, target): table = cvtt_build_tables_metric( abed_cache, train_metric, test_metric, target ) table.higher_better = ( True if settings.METRICS[test_metric]["best"] == max else False ) table.type = AbedTableTypes.VALUES table.desc = "Training metric: %s, testing metric: %s" % ( train_metric, test_metric, ) table.name = "%s_%s" % (train_metric, test_metric) table.target = target table.is_metric = True table.trainmetricname = train_metric table.testmetricname = test_metric ranktable = make_rank_table(table) return [table, ranktable]
[docs]def cvtt_build_tables_metric(abed_cache, train_metric, test_metric, target): table = AbedTable() table.headers = ["ID"] + sorted(abed_cache.methods) info( "Generating tables for train metric %s, test metric %s, target %s" % (train_metric, test_metric, target) ) for i, dset in enum_progress(sorted(abed_cache.datasets), label="Tables"): row = [] for j, method in enumerate(sorted(abed_cache.methods)): results = list(abed_cache.iter_results_dm(dset, method)) values = [ r.get_result(settings.YTRAIN_LABEL, metric=train_metric) for r in results ] if not values: row.append("NaN") continue best_value = settings.METRICS[train_metric]["best"](values) best_results = [ r for r in results if r.get_result(settings.YTRAIN_LABEL, metric=train_metric) == best_value ] target_values = [ r.get_result(target, metric=test_metric) for r in best_results ] target_best = settings.METRICS[test_metric]["best"](target_values) rounded = round(target_best, settings.RESULT_PRECISION) fmt = "%%.%df" % settings.RESULT_PRECISION row.append(fmt % rounded) table.add_row(dset, row) return table