GenSVM
gensvm_kernel.c
Go to the documentation of this file.
1 
32 #include "gensvm_kernel.h"
33 #include "gensvm_print.h"
34 
47  struct GenData *data)
48 {
49  data->kerneltype = model->kerneltype;
50  data->gamma = model->gamma;
51  data->coef = model->coef;
52  data->degree = model->degree;
53 }
54 
75 void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
76 {
77  if (model->kerneltype == K_LINEAR) {
78  data->r = data->m;
79  return;
80  }
81 
82  long r, n = data->n;
83  double *P = NULL,
84  *Sigma = NULL,
85  *K = NULL;
86 
87  // build the kernel matrix
88  K = Calloc(double, n*n);
89  gensvm_kernel_compute(model, data, K);
90 
91  // generate the eigen decomposition
92  r = gensvm_kernel_eigendecomp(K, n, model->kernel_eigen_cutoff, &P,
93  &Sigma);
94 
95  // build M and set to data (leave RAW intact)
96  gensvm_kernel_trainfactor(data, P, Sigma, r);
97 
98  // Set Sigma to data->Sigma (need it again for prediction)
99  if (data->Sigma != NULL) {
100  free(data->Sigma);
101  data->Sigma = NULL;
102  }
103  data->Sigma = Sigma;
104 
105  // write kernel params to data
107 
108  free(K);
109  free(P);
110 }
111 
126  struct GenData *traindata, struct GenData *testdata)
127 {
128  if (model->kerneltype == K_LINEAR) {
129  testdata->r = testdata->m;
130  return;
131  }
132 
133  // build the cross kernel matrix between train and test
134  double *K2 = gensvm_kernel_cross(model, traindata, testdata);
135 
136  // generate the data matrix N = K2 * M * Sigma^{-2}
137  gensvm_kernel_testfactor(testdata, traindata, K2);
138 
139  free(K2);
140 }
141 
157 void gensvm_kernel_compute(struct GenModel *model, struct GenData *data,
158  double *K)
159 {
160  long i, j;
161  long n = data->n;
162  double value;
163  double *x1 = NULL,
164  *x2 = NULL;
165 
166  for (i=0; i<n; i++) {
167  for (j=i; j<n; j++) {
168  x1 = &data->RAW[i*(data->m+1)+1];
169  x2 = &data->RAW[j*(data->m+1)+1];
170  if (model->kerneltype == K_POLY)
171  value = gensvm_kernel_dot_poly(x1, x2, data->m,
172  model->gamma, model->coef,
173  model->degree);
174  else if (model->kerneltype == K_RBF)
175  value = gensvm_kernel_dot_rbf(x1, x2, data->m,
176  model-> gamma);
177  else if (model->kerneltype == K_SIGMOID)
178  value = gensvm_kernel_dot_sigmoid(x1, x2,
179  data->m, model->gamma,
180  model->coef);
181  else {
182  // LCOV_EXCL_START
183  err("[GenSVM Error]: Unknown kernel type in "
184  "gensvm_make_kernel\n");
185  exit(EXIT_FAILURE);
186  // LCOV_EXCL_STOP
187  }
188  matrix_set(K, n, i, j, value);
189  matrix_set(K, n, j, i, value);
190  }
191  }
192 }
193 
215 long gensvm_kernel_eigendecomp(double *K, long n, double cutoff, double **P_ret,
216  double **Sigma_ret)
217 {
218  int M, status, LWORK, *IWORK = NULL,
219  *IFAIL = NULL;
220  long i, j, num_eigen, cutoff_idx;
221  double max_eigen, abstol, *WORK = NULL,
222  *Sigma = NULL,
223  *P = NULL;
224 
225  double *tempSigma = Malloc(double, n);
226  double *tempP = Malloc(double, n*n);
227 
228  IWORK = Malloc(int, 5*n);
229  IFAIL = Malloc(int, n);
230 
231  // highest precision eigenvalues, may reduce for speed
232  abstol = 2.0*dlamch('S');
233 
234  // first perform a workspace query to determine optimal size of the
235  // WORK array.
236  WORK = Malloc(double, 1);
237  status = dsyevx('V', 'A', 'U', n, K, n, 0, 0, 0, 0, abstol, &M,
238  tempSigma, tempP, n, WORK, -1, IWORK, IFAIL);
239  LWORK = WORK[0];
240 
241  // allocate the requested memory for the eigendecomposition
242  WORK = (double *)realloc(WORK, LWORK*sizeof(double));
243  status = dsyevx('V', 'A', 'U', n, K, n, 0, 0, 0, 0, abstol, &M,
244  tempSigma, tempP, n, WORK, LWORK, IWORK, IFAIL);
245 
246  if (status != 0) {
247  // LCOV_EXCL_START
248  err("[GenSVM Error]: Nonzero exit status from dsyevx.\n");
249  exit(EXIT_FAILURE);
250  // LCOV_EXCL_STOP
251  }
252 
253  // Select the desired number of eigenvalues, depending on their size.
254  // dsyevx sorts eigenvalues in ascending order.
255  max_eigen = tempSigma[n-1];
256  cutoff_idx = 0;
257 
258  for (i=0; i<n; i++) {
259  if (tempSigma[i]/max_eigen > cutoff) {
260  cutoff_idx = i;
261  break;
262  }
263  }
264 
265  num_eigen = n - cutoff_idx;
266 
267  Sigma = Calloc(double, num_eigen);
268  for (i=0; i<num_eigen; i++) {
269  Sigma[i] = tempSigma[n-1 - i];
270  }
271 
272  // revert P to row-major order and copy only the the columns
273  // corresponding to the selected eigenvalues
274  P = Calloc(double, n*num_eigen);
275  for (j=n-1; j>n-1-num_eigen; j--) {
276  for (i=0; i<n; i++) {
277  P[i*num_eigen + (n-1)-j] = tempP[i + j*n];
278  }
279  }
280 
281  free(tempSigma);
282  free(tempP);
283  free(IWORK);
284  free(IFAIL);
285  free(WORK);
286 
287  *Sigma_ret = Sigma;
288  *P_ret = P;
289 
290  return num_eigen;
291 }
292 
311 double *gensvm_kernel_cross(struct GenModel *model, struct GenData *data_train,
312  struct GenData *data_test)
313 {
314  long i, j;
315  long n_train = data_train->n;
316  long n_test = data_test->n;
317  long m = data_test->m;
318  double value, *x1 = NULL,
319  *x2 = NULL,
320  *K2 = Calloc(double, n_test * n_train);
321 
322  for (i=0; i<n_test; i++) {
323  for (j=0; j<n_train; j++) {
324  x1 = &data_test->RAW[i*(m+1)+1];
325  x2 = &data_train->RAW[j*(m+1)+1];
326  if (model->kerneltype == K_POLY)
327  value = gensvm_kernel_dot_poly(x1, x2, m,
328  model->gamma, model->coef,
329  model->degree);
330  else if (model->kerneltype == K_RBF)
331  value = gensvm_kernel_dot_rbf(x1, x2, m,
332  model->gamma);
333  else if (model->kerneltype == K_SIGMOID)
334  value = gensvm_kernel_dot_sigmoid(x1, x2, m,
335  model->gamma, model->coef);
336  else {
337  // LCOV_EXCL_START
338  err("[GenSVM Error]: Unknown kernel type in "
339  "gensvm_make_crosskernel\n");
340  exit(EXIT_FAILURE);
341  // LCOV_EXCL_STOP
342  }
343  matrix_set(K2, n_train, i, j, value);
344  }
345  }
346  return K2;
347 }
348 
365 void gensvm_kernel_trainfactor(struct GenData *data, double *P, double *Sigma,
366  long r)
367 {
368  long i, j, n = data->n;
369  double value;
370 
371  // allocate Z
372  data->Z = Calloc(double, n*(r+1));
373 
374  // Write data->Z = [1 M] = [1 P*Sigma]
375  for (i=0; i<n; i++) {
376  for (j=0; j<r; j++) {
377  value = matrix_get(P, r, i, j);
378  value *= matrix_get(Sigma, 1, j, 0);
379  matrix_set(data->Z, r+1, i, j+1, value);
380  }
381  matrix_set(data->Z, r+1, i, 0, 1.0);
382  }
383 
384  // Set data->r to r so data knows the width of Z
385  data->r = r;
386 }
387 
406 void gensvm_kernel_testfactor(struct GenData *testdata,
407  struct GenData *traindata, double *K2)
408 {
409  long n1, n2, r, i, j;
410  double value, *N = NULL,
411  *M = NULL;
412 
413  n1 = traindata->n;
414  n2 = testdata->n;
415  r = traindata->r;
416 
417  N = Calloc(double, n2*r);
418  M = Calloc(double, n1*r);
419 
420  // copy M from traindata->Z because we need it in dgemm without column
421  // of 1's.
422  for (i=0; i<n1; i++) {
423  for (j=0; j<r; j++) {
424  value = matrix_get(traindata->Z, r+1, i, j+1);
425  matrix_set(M, r, i, j, value);
426  }
427  }
428 
429  // Multiply K2 with M and store in N
430  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n2, r, n1, 1.0,
431  K2, n1, M, r, 0.0, N, r);
432 
433  // Multiply N with Sigma^{-2}
434  for (j=0; j<r; j++) {
435  value = pow(matrix_get(traindata->Sigma, 1, j, 0), -2.0);
436  for (i=0; i<n2; i++)
437  matrix_mul(N, r, i, j, value);
438  }
439 
440  // write N to Z with a column of ones
441  testdata->Z = Calloc(double, n2*(r+1));
442  for (i=0; i<n2; i++) {
443  for (j=0; j<r; j++) {
444  matrix_set(testdata->Z, r+1, i, j+1,
445  matrix_get(N, r, i, j));
446  }
447  matrix_set(testdata->Z, r+1, i, 0, 1.0);
448  }
449  // Set r to testdata
450  testdata->r = r;
451 
452  free(M);
453  free(N);
454 }
455 
472 double gensvm_kernel_dot_rbf(double *x1, double *x2, long n, double gamma)
473 {
474  long i;
475  double value = 0.0;
476 
477  for (i=0; i<n; i++)
478  value += (x1[i] - x2[i]) * (x1[i] - x2[i]);
479  value *= -gamma;
480  return exp(value);
481 }
482 
503 double gensvm_kernel_dot_poly(double *x1, double *x2, long n, double gamma,
504  double coef, double degree)
505 {
506  double value = cblas_ddot(n, x1, 1, x2, 1);
507  value *= gamma;
508  value += coef;
509  return pow(value, degree);
510 }
511 
530 double gensvm_kernel_dot_sigmoid(double *x1, double *x2, long n, double gamma,
531  double coef)
532 {
533  double value = cblas_ddot(n, x1, 1, x2, 1);
534  value *= gamma;
535  value += coef;
536  return tanh(value);
537 }
538 
550 int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
551  double VL, double VU, int IL, int IU, double ABSTOL, int *M,
552  double *W, double *Z, int LDZ, double *WORK, int LWORK,
553  int *IWORK, int *IFAIL)
554 {
555  extern void dsyevx_(char *JOBZ, char *RANGE, char *UPLO, int *Np,
556  double *A, int *LDAp, double *VLp, double *VUp,
557  int *ILp, int *IUp, double *ABSTOLp, int *M,
558  double *W, double *Z, int *LDZp, double *WORK,
559  int *LWORKp, int *IWORK, int *IFAIL, int *INFOp);
560  int INFO;
561  dsyevx_(&JOBZ, &RANGE, &UPLO, &N, A, &LDA, &VL, &VU, &IL, &IU, &ABSTOL,
562  M, W, Z, &LDZ, WORK, &LWORK, IWORK, IFAIL, &INFO);
563  return INFO;
564 }
565 
575 double dlamch(char CMACH)
576 {
577  extern double dlamch_(char *CMACH);
578  return dlamch_(&CMACH);
579 }
#define Calloc(type, size)
Definition: gensvm_memory.h:40
double gamma
kernel parameter for RBF, poly, and sigmoid
Definition: gensvm_base.h:80
void err(const char *fmt,...)
Parse a formatted string and write it to standard error.
Definition: gensvm_print.c:84
void gensvm_kernel_trainfactor(struct GenData *data, double *P, double *Sigma, long r)
Compute the training factor as part of kernel preprocessing.
int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA, double VL, double VU, int IL, int IU, double ABSTOL, int *M, double *W, double *Z, int LDZ, double *WORK, int LWORK, int *IWORK, int *IFAIL)
Compute the eigenvalues and optionally the eigenvectors of a symmetric matrix.
#define matrix_get(M, cols, i, j)
double degree
kernel parameter for poly
Definition: gensvm_base.h:84
void gensvm_kernel_postprocess(struct GenModel *model, struct GenData *traindata, struct GenData *testdata)
Compute the kernel postprocessing factor.
double dlamch(char CMACH)
Determine double precision machine parameters.
double * Z
Definition: gensvm_base.h:68
long gensvm_kernel_eigendecomp(double *K, long n, double cutoff, double **P_ret, double **Sigma_ret)
Find the (reduced) eigendecomposition of a kernel matrix.
#define Malloc(type, size)
Definition: gensvm_memory.h:48
A structure to represent the data.
Definition: gensvm_base.h:57
A structure to represent a single GenSVM model.
Definition: gensvm_base.h:92
double gensvm_kernel_dot_poly(double *x1, double *x2, long n, double gamma, double coef, double degree)
Compute the polynomial kernel between two vectors.
Header file for gensvm_kernel.c.
KernelType kerneltype
Definition: gensvm_base.h:77
double gensvm_kernel_dot_sigmoid(double *x1, double *x2, long n, double gamma, double coef)
Compute the sigmoid kernel between two vectors.
double * Sigma
eigenvalues from the reduced eigendecomposition
Definition: gensvm_base.h:75
void gensvm_kernel_copy_kernelparam_to_data(struct GenModel *model, struct GenData *data)
Copy the kernelparameters from GenModel to GenData.
Definition: gensvm_kernel.c:46
long r
number of eigenvalues (width of Z)
Definition: gensvm_base.h:64
void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
Do the preprocessing steps needed to perform kernel GenSVM.
Definition: gensvm_kernel.c:75
double * gensvm_kernel_cross(struct GenModel *model, struct GenData *data_train, struct GenData *data_test)
Compute the kernel crossproduct between two datasets.
double degree
kernel parameter for poly
Definition: gensvm_base.h:113
long m
number of predictors (width of RAW)
Definition: gensvm_base.h:62
double coef
kernel parameter for poly and sigmoid
Definition: gensvm_base.h:111
#define matrix_set(M, cols, i, j, val)
KernelType kerneltype
type of kernel used in the model
Definition: gensvm_base.h:136
#define matrix_mul(M, cols, i, j, val)
double gamma
kernel parameter for RBF, poly, and sigmoid
Definition: gensvm_base.h:109
double coef
kernel parameter for poly and sigmoid
Definition: gensvm_base.h:82
long n
number of instances
Definition: gensvm_base.h:60
void gensvm_kernel_testfactor(struct GenData *testdata, struct GenData *traindata, double *K2)
Calculate the matrix product for the testfactor.
double * RAW
augmented raw data matrix
Definition: gensvm_base.h:73
void gensvm_kernel_compute(struct GenModel *model, struct GenData *data, double *K)
Compute the kernel matrix.
double kernel_eigen_cutoff
cutoff value for the ratio of eigenvalues in the reduced
Definition: gensvm_base.h:138
Header file for gensvm_print.c.
double gensvm_kernel_dot_rbf(double *x1, double *x2, long n, double gamma)
Compute the RBF kernel between two vectors.