GenSVM
gensvm_optimize.c
Go to the documentation of this file.
1 
31 #include "gensvm_optimize.h"
32 
36 #ifndef GENSVM_PRINT_ITER
37  #define GENSVM_PRINT_ITER 100
38 #endif
39 
56 void gensvm_optimize(struct GenModel *model, struct GenData *data)
57 {
58  long it = 0;
59  double L, Lbar;
60 
61  long n = model->n;
62  long m = model->m;
63  long K = model->K;
64 
65  // initialize the workspace
66  struct GenWork *work = gensvm_init_work(model);
67 
68  // print some info on the dataset and model configuration
69  note("Starting main loop.\n");
70  note("Dataset:\n");
71  note("\tn = %i\n", n);
72  note("\tm = %i\n", m);
73  note("\tK = %i\n", K);
74  note("Parameters:\n");
75  note("\tkappa = %f\n", model->kappa);
76  note("\tp = %f\n", model->p);
77  note("\tlambda = %15.16f\n", model->lambda);
78  note("\tepsilon = %g\n", model->epsilon);
79  note("\n");
80 
81  // compute necessary simplex vectors
82  gensvm_simplex(model);
83  gensvm_simplex_diff(model);
84 
85  // get initial loss
86  L = gensvm_get_loss(model, data, work);
87  Lbar = L + 2.0*model->epsilon*L;
88 
89  // run main loop
90  while ((it < model->max_iter) && (Lbar - L)/L > model->epsilon)
91  {
92  // ensures V contains newest V and Vbar contains V from
93  // previous
94  gensvm_get_update(model, data, work);
95  if (it > 50)
96  gensvm_step_doubling(model);
97 
98  Lbar = L;
99  L = gensvm_get_loss(model, data, work);
100 
101  if (it % GENSVM_PRINT_ITER == 0)
102  note("iter = %li, L = %15.16f, Lbar = %15.16f, "
103  "reldiff = %15.16f\n", it, L, Lbar, (Lbar - L)/L);
104  it++;
105  }
106 
107  // status == 0 means training was successful
108  model->status = 0;
109 
110  // print warnings if necessary
111  if (L > Lbar) {
112  err("[GenSVM Warning]: Negative step occurred in "
113  "majorization.\n");
114  model->status = 1;
115  }
116 
117  if (it >= model->max_iter) {
118  err("[GenSVM Warning]: maximum number of iterations "
119  "reached.\n");
120  model->status = 2;
121  }
122 
123  // print final iteration count and loss
124  note("Optimization finished, iter = %li, loss = %15.16f, "
125  "rel. diff. = %15.16f\n", it-1, L,
126  (Lbar - L)/L);
127 
128  // compute and print the number of SVs in the model
129  note("Number of support vectors: %li\n", gensvm_num_sv(model));
130 
131  // store the training error in the model
132  model->training_error = (Lbar - L)/L;
133 
134  // store the iteration count in the model
135  model->elapsed_iter = it - 1;
136 
137  // free the workspace
138  gensvm_free_work(work);
139 }
140 
155 double gensvm_get_loss(struct GenModel *model, struct GenData *data,
156  struct GenWork *work)
157 {
158  long i, j;
159  long n = model->n;
160  long K = model->K;
161  long m = model->m;
162 
163  double value, rowvalue, loss = 0.0;
164 
165  gensvm_calculate_errors(model, data, work->ZV);
166  gensvm_calculate_huber(model);
167 
168  for (i=0; i<n; i++) {
169  rowvalue = 0;
170  value = 0;
171  for (j=0; j<K; j++) {
172  if (j == (data->y[i]-1))
173  continue;
174  value = matrix_get(model->H, K, i, j);
175  value = pow(value, model->p);
176  rowvalue += value;
177  }
178  rowvalue = pow(rowvalue, 1.0/(model->p));
179  rowvalue *= model->rho[i];
180  loss += rowvalue;
181  }
182  loss /= ((double) n);
183 
184  value = 0;
185  for (i=1; i<m+1; i++) {
186  for (j=0; j<K-1; j++) {
187  value += pow(matrix_get(model->V, K-1, i, j), 2.0);
188  }
189  }
190  loss += model->lambda * value;
191 
192  return loss;
193 }
194 
206 void gensvm_step_doubling(struct GenModel *model)
207 {
208  long i, j;
209  double value;
210 
211  long m = model->m;
212  long K = model->K;
213 
214  for (i=0; i<m+1; i++) {
215  for (j=0; j<K-1; j++) {
216  matrix_mul(model->V, K-1, i, j, 2.0);
217  value = - matrix_get(model->Vbar, K-1, i, j);
218  matrix_add(model->V, K-1, i, j, value);
219  }
220  }
221 }
222 
242 void gensvm_calculate_huber(struct GenModel *model)
243 {
244  long i, j;
245  double q, value;
246 
247  for (i=0; i<model->n; i++) {
248  for (j=0; j<model->K; j++) {
249  q = matrix_get(model->Q, model->K, i, j);
250  value = 0.0;
251  if (q <= -model->kappa) {
252  value = 1.0 - q - (model->kappa+1.0)/2.0;
253  } else if (q <= 1.0) {
254  value = 1.0/(2.0*model->kappa+2.0)*pow(1.0 - q,
255  2.0);
256  }
257  matrix_set(model->H, model->K, i, j, value);
258  }
259  }
260 }
261 
277 void gensvm_calculate_errors(struct GenModel *model, struct GenData *data,
278  double *ZV)
279 {
280  long i, j;
281  double q, *uu_row = NULL;
282 
283  long n = model->n;
284  long K = model->K;
285 
286  gensvm_calculate_ZV(model, data, ZV);
287 
288  for (i=0; i<n; i++) {
289  for (j=0; j<K; j++) {
290  if (j == (data->y[i]-1))
291  continue;
292  uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)];
293  q = cblas_ddot(K-1, &ZV[i*(K-1)], 1, uu_row, 1);
294  matrix_set(model->Q, K, i, j, q);
295  }
296  }
297 }
298 
void gensvm_calculate_huber(struct GenModel *model)
Calculate the Huber hinge errors.
long K
number of classes for the workspace
Definition: gensvm_base.h:156
double * H
Huber weighted error matrix.
Definition: gensvm_base.h:126
double epsilon
stopping criterion for the IM algorithm.
Definition: gensvm_base.h:101
void err(const char *fmt,...)
Parse a formatted string and write it to standard error.
Definition: gensvm_print.c:84
double training_error
loss function value after training has finished
Definition: gensvm_base.h:130
void gensvm_simplex(struct GenModel *model)
Generate matrix of simplex vertex coordinates.
void gensvm_get_update(struct GenModel *model, struct GenData *data, struct GenWork *work)
Perform a single step of the majorization algorithm to update V.
long m
number of features for the workspace
Definition: gensvm_base.h:154
double p
parameter for the L-p norm in the loss function
Definition: gensvm_base.h:103
double * UU
simplex difference matrix
Definition: gensvm_base.h:122
void gensvm_calculate_errors(struct GenModel *model, struct GenData *data, double *ZV)
Calculate the scalar errors.
#define matrix_get(M, cols, i, j)
double * ZV
n x (K-1) working matrix for the Z * V calculation
Definition: gensvm_base.h:169
void gensvm_free_work(struct GenWork *work)
Free an allocated GenWork instance.
Definition: gensvm_base.c:277
A structure to hold the GenSVM workspace.
Definition: gensvm_base.h:151
int status
status of the model after training
Definition: gensvm_base.h:143
long gensvm_num_sv(struct GenModel *model)
Calculate the number of support vectors in a model.
Definition: gensvm_sv.c:46
struct GenWork * gensvm_init_work(struct GenModel *model)
Initialize the workspace structure.
Definition: gensvm_base.c:245
double * V
augmented weight matrix
Definition: gensvm_base.h:115
#define matrix_add(M, cols, i, j, val)
double * Q
error matrix
Definition: gensvm_base.h:124
long * y
array of class labels, 1..K
Definition: gensvm_base.h:66
void gensvm_simplex_diff(struct GenModel *model)
Generate the simplex difference matrix.
A structure to represent the data.
Definition: gensvm_base.h:57
A structure to represent a single GenSVM model.
Definition: gensvm_base.h:92
void gensvm_step_doubling(struct GenModel *model)
Use step doubling.
void gensvm_calculate_ZV(struct GenModel *model, struct GenData *data, double *ZV)
Wrapper around sparse/dense versions of this function.
Definition: gensvm_zv.c:50
double * Vbar
Definition: gensvm_base.h:117
double gensvm_get_loss(struct GenModel *model, struct GenData *data, struct GenWork *work)
Calculate the current value of the loss function.
long n
number of instances in the dataset
Definition: gensvm_base.h:97
long max_iter
maximum number of iterations of the algorithm
Definition: gensvm_base.h:141
double * rho
vector of instance weights
Definition: gensvm_base.h:128
long elapsed_iter
number of elapsed iterations in training
Definition: gensvm_base.h:132
Header file for gensvm_optimize.c.
void gensvm_optimize(struct GenModel *model, struct GenData *data)
The main training loop for GenSVM.
double kappa
parameter for the Huber hinge function
Definition: gensvm_base.h:105
long K
number of classes in the dataset
Definition: gensvm_base.h:95
#define matrix_set(M, cols, i, j, val)
#define matrix_mul(M, cols, i, j, val)
#define GENSVM_PRINT_ITER
long n
number of instances for the workspace
Definition: gensvm_base.h:152
long m
number of predictor variables in the dataset
Definition: gensvm_base.h:99
double lambda
regularization parameter in the loss function
Definition: gensvm_base.h:107
void note(const char *fmt,...)
Parse a formatted string and write to the output stream.
Definition: gensvm_print.c:62