GenSVM
gensvm_gridsearch.c
Go to the documentation of this file.
1 
31 #include "gensvm_gridsearch.h"
32 
33 extern FILE *GENSVM_OUTPUT_FILE;
34 
54 void gensvm_fill_queue(struct GenGrid *grid, struct GenQueue *queue,
55  struct GenData *train_data, struct GenData *test_data)
56 {
57  long i, j, k;
58  long N, cnt = 0;
59  struct GenTask *task = NULL;
60  queue->i = 0;
61 
62  N = grid->Np;
63  N *= grid->Nl;
64  N *= grid->Nk;
65  N *= grid->Ne;
66  N *= grid->Nw;
67  // these parameters are not necessarily non-zero
68  N *= grid->Ng > 0 ? grid->Ng : 1;
69  N *= grid->Nc > 0 ? grid->Nc : 1;
70  N *= grid->Nd > 0 ? grid->Nd : 1;
71 
72  queue->tasks = Calloc(struct GenTask *, N);
73  queue->N = N;
74 
75  // initialize all tasks
76  for (i=0; i<N; i++) {
77  task = gensvm_init_task();
78  task->ID = i;
79  task->train_data = train_data;
80  task->test_data = test_data;
81  task->folds = grid->folds;
82  task->kerneltype = grid->kerneltype;
83  queue->tasks[i] = task;
84  }
85 
86  // These loops mimick a large nested for loop. The advantage is that
87  // Nd, Nc and Ng which are on the outside of the nested for loop can
88  // now be zero, without large modification (see below). Whether this
89  // is indeed better than the nested for loop has not been tested.
90  cnt = 1;
91  i = 0;
92  while (i < N) {
93  for (j=0; j<grid->Np; j++) {
94  for (k=0; k<cnt; k++) {
95  queue->tasks[i]->p = grid->ps[j];
96  i++;
97  }
98  }
99  }
100 
101  cnt *= grid->Np;
102  i = 0;
103  while (i < N) {
104  for (j=0; j<grid->Nl; j++) {
105  for (k=0; k<cnt; k++) {
106  queue->tasks[i]->lambda =
107  grid->lambdas[j];
108  i++;
109  }
110  }
111  }
112 
113  cnt *= grid->Nl;
114  i = 0;
115  while (i < N) {
116  for (j=0; j<grid->Nk; j++) {
117  for (k=0; k<cnt; k++) {
118  queue->tasks[i]->kappa = grid->kappas[j];
119  i++;
120  }
121  }
122  }
123 
124  cnt *= grid->Nk;
125  i = 0;
126  while (i < N) {
127  for (j=0; j<grid->Nw; j++) {
128  for (k=0; k<cnt; k++) {
129  queue->tasks[i]->weight_idx =
130  grid->weight_idxs[j];
131  i++;
132  }
133  }
134  }
135 
136  cnt *= grid->Nw;
137  i = 0;
138  while (i < N) {
139  for (j=0; j<grid->Ne; j++) {
140  for (k=0; k<cnt; k++) {
141  queue->tasks[i]->epsilon = grid->epsilons[j];
142  i++;
143  }
144  }
145  }
146 
147  cnt *= grid->Ne;
148  i = 0;
149  while (i < N && grid->Ng > 0) {
150  for (j=0; j<grid->Ng; j++) {
151  for (k=0; k<cnt; k++) {
152  queue->tasks[i]->gamma = grid->gammas[j];
153  i++;
154  }
155  }
156  }
157 
158  cnt *= grid->Ng > 0 ? grid->Ng : 1;
159  i = 0;
160  while (i < N && grid->Nc > 0) {
161  for (j=0; j<grid->Nc; j++) {
162  for (k=0; k<cnt; k++) {
163  queue->tasks[i]->coef = grid->coefs[j];
164  i++;
165  }
166  }
167  }
168 
169  cnt *= grid->Nc > 0 ? grid->Nc : 1;
170  i = 0;
171  while (i < N && grid->Nd > 0) {
172  for (j=0; j<grid->Nd; j++) {
173  for (k=0; k<cnt; k++) {
174  queue->tasks[i]->degree = grid->degrees[j];
175  i++;
176  }
177  }
178  }
179 }
180 
195 bool gensvm_kernel_changed(struct GenTask *newtask, struct GenTask *oldtask)
196 {
197  if (oldtask == NULL)
198  return true;
199  if (newtask->kerneltype != oldtask->kerneltype) {
200  return true;
201  } else if (newtask->kerneltype == K_POLY) {
202  if (newtask->gamma != oldtask->gamma)
203  return true;
204  if (newtask->coef != oldtask->coef)
205  return true;
206  if (newtask->degree != oldtask->degree)
207  return true;
208  return false;
209  } else if (newtask->kerneltype == K_RBF) {
210  if (newtask->gamma != oldtask->gamma)
211  return true;
212  return false;
213  } else if (newtask->kerneltype == K_SIGMOID) {
214  if (newtask->gamma != oldtask->gamma)
215  return true;
216  if (newtask->coef != oldtask->coef)
217  return true;
218  return false;
219  }
220  return false;
221 }
222 
238 void gensvm_kernel_folds(long folds, struct GenModel *model,
239  struct GenData **train_folds, struct GenData **test_folds)
240 {
241  long f;
242 
243  if (model->kerneltype != K_LINEAR)
244  note("Computing kernel ... ");
245  for (f=0; f<folds; f++) {
246  if (train_folds[f]->Z != train_folds[f]->RAW)
247  free(train_folds[f]->Z);
248  if (test_folds[f]->Z != test_folds[f]->RAW)
249  free(test_folds[f]->Z);
250  gensvm_kernel_preprocess(model, train_folds[f]);
251  gensvm_kernel_postprocess(model, train_folds[f],
252  test_folds[f]);
253  }
254  if (model->kerneltype != K_LINEAR)
255  note("done.\n");
256 }
257 
275 {
276  long f, folds;
277  double perf, duration, current_max = 0;
278  struct GenTask *task = get_next_task(q);
279  struct GenTask *prevtask = NULL;
280  struct GenModel *model = gensvm_init_model();
281  struct timespec main_s, main_e, loop_s, loop_e;
282 
283  folds = task->folds;
284 
285  model->n = 0;
286  model->m = task->train_data->m;
287  model->K = task->train_data->K;
288  gensvm_allocate_model(model);
289  gensvm_init_V(NULL, model, task->train_data);
290 
291  long *cv_idx = Calloc(long, task->train_data->n);
292  gensvm_make_cv_split(task->train_data->n, task->folds, cv_idx);
293 
294  struct GenData **train_folds = Malloc(struct GenData *, task->folds);
295  struct GenData **test_folds = Malloc(struct GenData *, task->folds);
296  for (f=0; f<folds; f++) {
297  train_folds[f] = gensvm_init_data();
298  test_folds[f] = gensvm_init_data();
299  gensvm_get_tt_split(task->train_data, train_folds[f],
300  test_folds[f], cv_idx, f);
301  }
302 
303  Timer(main_s);
304  while (task) {
305  gensvm_task_to_model(task, model);
306  if (gensvm_kernel_changed(task, prevtask)) {
307  gensvm_kernel_folds(task->folds, model, train_folds,
308  test_folds);
309  }
310 
311  Timer(loop_s);
312  perf = gensvm_cross_validation(model, train_folds, test_folds,
313  folds, task->train_data->n);
314  Timer(loop_e);
315 
316  current_max = maximum(current_max, perf);
317  duration = gensvm_elapsed_time(&loop_s, &loop_e);
318 
319  gensvm_gridsearch_progress(task, q->N, perf, duration,
320  current_max);
321 
322  q->tasks[task->ID]->performance = perf;
323  prevtask = task;
324  task = get_next_task(q);
325  }
326  Timer(main_e);
327 
328  note("\nTotal elapsed training time: %8.8f seconds\n",
329  gensvm_elapsed_time(&main_s, &main_e));
330 
331  gensvm_free_model(model);
332  for (f=0; f<folds; f++) {
333  gensvm_free_data(train_folds[f]);
334  gensvm_free_data(test_folds[f]);
335  }
336  free(train_folds);
337  free(test_folds);
338  free(cv_idx);
339 }
340 
357 void gensvm_gridsearch_progress(struct GenTask *task, long N, double perf,
358  double duration, double current_max)
359 {
360  char buffer[GENSVM_MAX_LINE_LENGTH];
361  sprintf(buffer, "(%03li/%03li)\t", task->ID+1, N);
362  if (task->kerneltype == K_POLY)
363  sprintf(buffer + strlen(buffer), "d = %2.2f\t", task->degree);
364  if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID)
365  sprintf(buffer + strlen(buffer), "c = %2.2f\t", task->coef);
366  if (task->kerneltype == K_POLY || task->kerneltype == K_SIGMOID ||
367  task->kerneltype == K_RBF)
368  sprintf(buffer + strlen(buffer), "g = %3.3f\t", task->gamma);
369  sprintf(buffer + strlen(buffer), "eps = %g\tw = %i\tk = %2.2f\t"
370  "l = %f\tp = %2.2f\t", task->epsilon,
371  task->weight_idx, task->kappa, task->lambda, task->p);
372  note(buffer);
373  note("\t%3.3f%% (%3.3fs)\t(best = %3.3f%%)\n", perf, duration,
374  current_max);
375 }
#define Calloc(type, size)
Definition: gensvm_memory.h:40
long folds
number of folds in cross validation
Definition: gensvm_task.h:60
Structure for describing the entire grid search.
Definition: gensvm_grid.h:67
double * epsilons
array of epsilon values
Definition: gensvm_grid.h:103
double coef
coef parameter for the GenModel
Definition: gensvm_task.h:74
double gensvm_elapsed_time(struct timespec *start, struct timespec *stop)
Calculate the time between two time recordings.
Definition: gensvm_timer.c:58
long ID
numeric id of the task in the queue
Definition: gensvm_task.h:62
double gensvm_cross_validation(struct GenModel *model, struct GenData **train_folds, struct GenData **test_folds, long folds, long n_total)
Run cross validation with a given set of train/test folds.
#define GENSVM_MAX_LINE_LENGTH
long Np
size of the array of p values
Definition: gensvm_grid.h:79
long K
number of classes
Definition: gensvm_base.h:58
long Ne
size of the array of epsilon values
Definition: gensvm_grid.h:85
long Nc
size of the array of coef values
Definition: gensvm_grid.h:91
double * degrees
array of degree values
Definition: gensvm_grid.h:109
long Nk
size of the array of kappa values
Definition: gensvm_grid.h:83
long Nw
size of the array of weight_idx values
Definition: gensvm_grid.h:87
struct GenTask * get_next_task(struct GenQueue *q)
Get new GenTask from GenQueue.
Definition: gensvm_queue.c:82
KernelType kerneltype
type of kernel to use throughout training
Definition: gensvm_grid.h:70
struct GenTask * gensvm_init_task(void)
Initialize a GenTask structure.
Definition: gensvm_task.c:38
long N
size of task array
Definition: gensvm_queue.h:50
void gensvm_free_model(struct GenModel *model)
Free allocated GenModel struct.
Definition: gensvm_base.c:211
bool gensvm_kernel_changed(struct GenTask *newtask, struct GenTask *oldtask)
Check if the kernel parameters change between tasks.
long i
index used for keeping track of the queue
Definition: gensvm_queue.h:52
double performance
performance after cross validation
Definition: gensvm_task.h:84
#define Malloc(type, size)
Definition: gensvm_memory.h:48
struct GenModel * gensvm_init_model(void)
Initialize a GenModel structure.
Definition: gensvm_base.c:102
double gamma
gamma parameter for the GenModel
Definition: gensvm_task.h:72
double * lambdas
array of lambda values
Definition: gensvm_grid.h:99
Simple task queue.
Definition: gensvm_queue.h:47
KernelType kerneltype
kerneltype parameter for the GenModel
Definition: gensvm_task.h:56
A structure to represent the data.
Definition: gensvm_base.h:57
struct GenData * test_data
pointer to the test data (if any)
Definition: gensvm_task.h:82
A structure to represent a single GenSVM model.
Definition: gensvm_base.h:92
void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, struct GenData *test_data, long *cv_idx, long fold_idx)
Wrapper around sparse/dense versions of this function.
void gensvm_gridsearch_progress(struct GenTask *task, long N, double perf, double duration, double current_max)
Print the description of the current task on screen.
long n
number of instances in the dataset
Definition: gensvm_base.h:97
void gensvm_make_cv_split(long N, long folds, long *cv_idx)
Create a cross validation split vector.
Header file for gensvm_gridsearch.c.
#define maximum(a, b)
double degree
degree parameter for the GenModel
Definition: gensvm_task.h:76
long Nl
size of the array of lambda values
Definition: gensvm_grid.h:81
double * kappas
array of kappa values
Definition: gensvm_grid.h:101
long Ng
size of the array of gamma values
Definition: gensvm_grid.h:89
double lambda
lambda parameter for the GenModel
Definition: gensvm_task.h:68
void gensvm_free_data(struct GenData *data)
Free allocated GenData struct.
Definition: gensvm_base.c:73
A structure for a single task in the queue.
Definition: gensvm_task.h:55
void gensvm_allocate_model(struct GenModel *model)
Allocate memory for a GenModel.
Definition: gensvm_base.c:144
FILE * GENSVM_OUTPUT_FILE
Definition: gensvm_print.c:33
void gensvm_train_queue(struct GenQueue *q)
Run the grid search for a GenQueue.
void gensvm_init_V(struct GenModel *from_model, struct GenModel *to_model, struct GenData *data)
Seed the matrix V from an existing model or using rand.
Definition: gensvm_init.c:57
long K
number of classes in the dataset
Definition: gensvm_base.h:95
struct GenTask ** tasks
array of pointers to Task structs
Definition: gensvm_queue.h:48
int * weight_idxs
array of weight_idxs
Definition: gensvm_grid.h:95
void gensvm_kernel_folds(long folds, struct GenModel *model, struct GenData **train_folds, struct GenData **test_folds)
Compute the kernels for the folds of the train and test datasets.
long folds
number of folds in cross validation
Definition: gensvm_grid.h:72
long m
number of predictors (width of RAW)
Definition: gensvm_base.h:62
double kappa
kappa parameter for the GenModel
Definition: gensvm_task.h:66
double * gammas
array of gamma values
Definition: gensvm_grid.h:105
KernelType kerneltype
type of kernel used in the model
Definition: gensvm_base.h:136
double * coefs
array of coef values
Definition: gensvm_grid.h:107
long Nd
size of the array of degree values
Definition: gensvm_grid.h:93
void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata)
Compute the kernel postprocessing factor.
long n
number of instances
Definition: gensvm_base.h:60
struct GenData * gensvm_init_data(void)
Initialize a GenData structure.
Definition: gensvm_base.c:45
double epsilon
epsilon parameter for the GenModel
Definition: gensvm_task.h:70
long m
number of predictor variables in the dataset
Definition: gensvm_base.h:99
void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
Do the preprocessing steps needed to perform kernel GenSVM.
Definition: gensvm_kernel.c:75
double * ps
array of p values
Definition: gensvm_grid.h:97
double p
p parameter for the GenModel
Definition: gensvm_task.h:64
void gensvm_fill_queue(struct GenGrid *grid, struct GenQueue *queue, struct GenData *train_data, struct GenData *test_data)
Initialize a GenQueue from a Training instance.
void note(const char *fmt,...)
Parse a formatted string and write to the output stream.
Definition: gensvm_print.c:62
struct GenData * train_data
pointer to the training data
Definition: gensvm_task.h:80
void gensvm_task_to_model(struct GenTask *task, struct GenModel *model)
Copy parameters from GenTask to GenModel.
Definition: gensvm_task.c:122
#define Timer(spec)
Timer macro for easily recording time.
Definition: gensvm_timer.h:37
int weight_idx
weight_idx parameter for the GenModel
Definition: gensvm_task.h:58