67 for (i=0; i<q->
N; i++) {
71 note(
"Boundary of the %g-th percentile determined at: %f\n",
72 percentile, boundary);
75 for (i=0; i<q->
N; i++) {
83 for (i=0; i<q->
N; i++) {
132 long i, f, r,
N, *cv_idx = NULL;
133 double p, pi, pr, pt,
139 struct GenData **train_folds = NULL,
143 struct timespec loop_s, loop_e;
148 note(
"Number of items to check: %li\n", nq->N);
152 perf =
Calloc(
double, N*repeats);
172 note(
"(%02li/%02li:%03li)\t", i+1, N, task->
ID);
173 for (r=0; r<repeats; r++) {
178 for (f=0; f<task->
folds; f++) {
182 test_folds[f], cv_idx, f);
194 mean[i] += p/((double) repeats);
199 for (f=0; f<task->
folds; f++) {
209 for (r=0; r<repeats; r++) {
210 std[i] += pow(
matrix_get(perf, repeats, i, r) - mean[i],
214 std[i] /= ((double) repeats) - 1.0;
215 std[i] = sqrt(std[i]);
219 note(
"(m = %3.3f, s = %3.3f, t = %3.3f)\n", mean[i], std[i],
227 note(
"\nBest overall configuration(s):\n");
228 note(
"ID\tweights\tepsilon\t\tp\t\tkappa\t\tlambda\t\t" 229 "mean_perf\tstd_perf\ttime_perf\n");
232 while (breakout ==
false) {
237 if ((pi - mean[i] < 0.0001) &&
238 (std[i] - pr < 0.0001) &&
239 (time[i] - pt < 0.0001)) {
240 note(
"(%li)\tw = %li\te = %f\tp = %f\t" 242 "mean: %3.3f\tstd: %3.3f\t" 245 nq->tasks[i]->weight_idx,
246 nq->tasks[i]->epsilon,
249 nq->tasks[i]->lambda,
277 const double t1 = (*(
double *) elem1);
278 const double t2 = (*(
double *) elem2);
302 double pi, pr, boundary;
303 double *local =
Malloc(
double, N);
305 local[i] = values[i];
312 boundary = (1 - pr)*local[((
long) pi)-1] + pr*local[((
long) pi)];
#define Calloc(type, size)
long folds
number of folds in cross validation
struct GenQueue * gensvm_init_queue(void)
Initialize a GenQueue structure.
double gensvm_elapsed_time(struct timespec *start, struct timespec *stop)
Calculate the time between two time recordings.
long ID
numeric id of the task in the queue
double gensvm_cross_validation(struct GenModel *model, struct GenData **train_folds, struct GenData **test_folds, long folds, long n_total)
Run cross validation with a given set of train/test folds.
#define Memset(var, type, size)
#define matrix_get(M, cols, i, j)
struct GenTask * get_next_task(struct GenQueue *q)
Get new GenTask from GenQueue.
void gensvm_free_model(struct GenModel *model)
Free allocated GenModel struct.
long i
index used for keeping track of the queue
double performance
performance after cross validation
#define Malloc(type, size)
struct GenModel * gensvm_init_model(void)
Initialize a GenModel structure.
void gensvm_consistency_repeats(struct GenQueue *q, long repeats, double percentile)
Run repeats of the GenTask structs in GenQueue to find the best configuration.
A structure to represent the data.
A structure to represent a single GenSVM model.
void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, struct GenData *test_data, long *cv_idx, long fold_idx)
Wrapper around sparse/dense versions of this function.
long n
number of instances in the dataset
void gensvm_make_cv_split(long N, long folds, long *cv_idx)
Create a cross validation split vector.
int gensvm_dsort(const void *elem1, const void *elem2)
Comparison function for doubl.
void gensvm_free_data(struct GenData *data)
Free allocated GenData struct.
void gensvm_free_queue(struct GenQueue *q)
Free the GenQueue struct.
A structure for a single task in the queue.
Header file for gensvm_consistency.c.
void gensvm_allocate_model(struct GenModel *model)
Allocate memory for a GenModel.
void gensvm_init_V(struct GenModel *from_model, struct GenModel *to_model, struct GenData *data)
Seed the matrix V from an existing model or using rand.
double gensvm_percentile(double *values, long N, double p)
Calculate the percentile of an array of doubles.
long K
number of classes in the dataset
struct GenTask ** tasks
array of pointers to Task structs
long m
number of predictors (width of RAW)
struct GenTask * gensvm_copy_task(struct GenTask *t)
Deepcopy a GenTask struct.
#define matrix_set(M, cols, i, j, val)
void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata)
Compute the kernel postprocessing factor.
long n
number of instances
struct GenData * gensvm_init_data(void)
Initialize a GenData structure.
long m
number of predictor variables in the dataset
void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
Do the preprocessing steps needed to perform kernel GenSVM.
struct GenQueue * gensvm_top_queue(struct GenQueue *q, double percentile)
Create GenQueue of tasks with performance above a given percentile.
void note(const char *fmt,...)
Parse a formatted string and write to the output stream.
struct GenData * train_data
pointer to the training data
void gensvm_task_to_model(struct GenTask *task, struct GenModel *model)
Copy parameters from GenTask to GenModel.
#define Timer(spec)
Timer macro for easily recording time.