Line data Source code
1 : /**
2 : * @file gensvm_kernel.c
3 : * @author G.J.J. van den Burg
4 : * @date 2013-10-18
5 : * @brief Defines main functions for use of kernels in GenSVM.
6 : *
7 : * @details
8 : * Functions for constructing different kernels using user-supplied
9 : * parameters. Also contains the functions for decomposing the
10 : * kernel matrix using several decomposition methods.
11 : *
12 : * @copyright
13 : Copyright 2016, G.J.J. van den Burg.
14 :
15 : This file is part of GenSVM.
16 :
17 : GenSVM is free software: you can redistribute it and/or modify
18 : it under the terms of the GNU General Public License as published by
19 : the Free Software Foundation, either version 3 of the License, or
20 : (at your option) any later version.
21 :
22 : GenSVM is distributed in the hope that it will be useful,
23 : but WITHOUT ANY WARRANTY; without even the implied warranty of
24 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 : GNU General Public License for more details.
26 :
27 : You should have received a copy of the GNU General Public License
28 : along with GenSVM. If not, see <http://www.gnu.org/licenses/>.
29 :
30 : */
31 :
32 : #include "gensvm_kernel.h"
33 : #include "gensvm_print.h"
34 :
35 : /**
36 : * @brief Copy the kernelparameters from GenModel to GenData
37 : *
38 : * @details
39 : * This is a little utility function to copy the kernel type and kernel
40 : * parameters from a GenModel struct to a GenData struct.
41 : *
42 : * @param[in] model a GenModel struct
43 : * @param[in] data a GenData struct
44 : *
45 : */
46 6 : void gensvm_kernel_copy_kernelparam_to_data(struct GenModel *model,
47 : struct GenData *data)
48 : {
49 6 : data->kerneltype = model->kerneltype;
50 6 : data->gamma = model->gamma;
51 6 : data->coef = model->coef;
52 6 : data->degree = model->degree;
53 6 : }
54 :
55 : /**
56 : * @brief Do the preprocessing steps needed to perform kernel GenSVM
57 : *
58 : * @details
59 : * To achieve nonlinearity through kernels in GenSVM, a preprocessing step is
60 : * needed. This preprocessing step computes the full kernel matrix, and an
61 : * eigendecomposition of this matrix. Next, it computes a matrix @f$\textbf{M}
62 : * = \textbf{P}\boldsymbol{\Sigma}@f$ which takes the role as data matrix in
63 : * the optimization algorithm.
64 : *
65 : * @sa
66 : * gensvm_kernel_compute(), gensvm_kernel_eigendecomp(),
67 : * gensvm_kernel_trainfactor(), gensvm_kernel_postprocess()
68 : *
69 : * @param[in] model input GenSVM model
70 : * @param[in,out] data input structure with the data. On exit,
71 : * contains the training factor in GenData::Z,
72 : * and the original data in GenData::RAW
73 : *
74 : */
75 4 : void gensvm_kernel_preprocess(struct GenModel *model, struct GenData *data)
76 : {
77 4 : if (model->kerneltype == K_LINEAR) {
78 2 : data->r = data->m;
79 2 : return;
80 : }
81 :
82 2 : long r, n = data->n;
83 2 : double *P = NULL,
84 2 : *Sigma = NULL,
85 2 : *K = NULL;
86 :
87 : // build the kernel matrix
88 2 : K = Calloc(double, n*n);
89 2 : gensvm_kernel_compute(model, data, K);
90 :
91 : // generate the eigen decomposition
92 2 : r = gensvm_kernel_eigendecomp(K, n, model->kernel_eigen_cutoff, &P,
93 : &Sigma);
94 :
95 : // build M and set to data (leave RAW intact)
96 2 : gensvm_kernel_trainfactor(data, P, Sigma, r);
97 :
98 : // Set Sigma to data->Sigma (need it again for prediction)
99 2 : if (data->Sigma != NULL) {
100 1 : free(data->Sigma);
101 1 : data->Sigma = NULL;
102 : }
103 2 : data->Sigma = Sigma;
104 :
105 : // write kernel params to data
106 2 : gensvm_kernel_copy_kernelparam_to_data(model, data);
107 :
108 2 : free(K);
109 2 : free(P);
110 : }
111 :
112 : /**
113 : * @brief Compute the kernel postprocessing factor
114 : *
115 : * @details
116 : * This function computes the postprocessing factor needed to do predictions
117 : * with kernels in GenSVM. This is a wrapper around gensvm_kernel_cross() and
118 : * gensvm_kernel_testfactor().
119 : *
120 : * @param[in] model a GenSVM model
121 : * @param[in] traindata the training dataset
122 : * @param[in,out] testdata the test dataset. On exit, GenData::Z
123 : * contains the testfactor
124 : */
125 2 : void gensvm_kernel_postprocess(struct GenModel *model,
126 : struct GenData *traindata, struct GenData *testdata)
127 : {
128 2 : if (model->kerneltype == K_LINEAR) {
129 1 : testdata->r = testdata->m;
130 1 : return;
131 : }
132 :
133 : // build the cross kernel matrix between train and test
134 1 : double *K2 = gensvm_kernel_cross(model, traindata, testdata);
135 :
136 : // generate the data matrix N = K2 * M * Sigma^{-2}
137 1 : gensvm_kernel_testfactor(testdata, traindata, K2);
138 :
139 1 : free(K2);
140 : }
141 :
142 : /**
143 : * @brief Compute the kernel matrix
144 : *
145 : * @details
146 : * This function computes the kernel matrix of a data matrix based on the
147 : * requested kernel type and the kernel parameters. The potential types of
148 : * kernel functions are document in KernelType. This function uses a naive
149 : * multiplication and computes the entire upper triangle of the kernel matrix,
150 : * then copies this over to the lower triangle.
151 : *
152 : * @param[in] model a GenModel structure with the model
153 : * @param[in] data a GenData structure with the data
154 : * @param[out] K an nxn preallocated kernel matrix
155 : *
156 : */
157 5 : void gensvm_kernel_compute(struct GenModel *model, struct GenData *data,
158 : double *K)
159 : {
160 : long i, j;
161 5 : long n = data->n;
162 : double value;
163 5 : double *x1 = NULL,
164 5 : *x2 = NULL;
165 :
166 55 : for (i=0; i<n; i++) {
167 325 : for (j=i; j<n; j++) {
168 275 : x1 = &data->RAW[i*(data->m+1)+1];
169 275 : x2 = &data->RAW[j*(data->m+1)+1];
170 275 : if (model->kerneltype == K_POLY)
171 55 : value = gensvm_kernel_dot_poly(x1, x2, data->m,
172 : model->gamma, model->coef,
173 : model->degree);
174 220 : else if (model->kerneltype == K_RBF)
175 165 : value = gensvm_kernel_dot_rbf(x1, x2, data->m,
176 : model-> gamma);
177 55 : else if (model->kerneltype == K_SIGMOID)
178 55 : value = gensvm_kernel_dot_sigmoid(x1, x2,
179 : data->m, model->gamma,
180 : model->coef);
181 : else {
182 : // LCOV_EXCL_START
183 : err("[GenSVM Error]: Unknown kernel type in "
184 : "gensvm_make_kernel\n");
185 : exit(EXIT_FAILURE);
186 : // LCOV_EXCL_STOP
187 : }
188 275 : matrix_set(K, n, i, j, value);
189 275 : matrix_set(K, n, j, i, value);
190 : }
191 : }
192 5 : }
193 :
194 : /**
195 : * @brief Find the (reduced) eigendecomposition of a kernel matrix
196 : *
197 : * @details
198 : * Compute the reduced eigendecomposition of the kernel matrix. This uses the
199 : * LAPACK function dsyevx to do the computation. Only those
200 : * eigenvalues/eigenvectors are kept for which the ratio between the
201 : * eigenvalue and the largest eigenvalue is larger than cutoff. This function
202 : * uses the highest precision eigenvalues, twice the underflow threshold (see
203 : * dsyevx documentation).
204 : *
205 : * @param[in] K the kernel matrix
206 : * @param[in] n the dimension of the kernel matrix
207 : * @param[in] cutoff mimimum ratio of eigenvalue to largest
208 : * eigenvalue for the eigenvector to be
209 : * included
210 : * @param[out] P_ret on exit contains the eigenvectors
211 : * @param[out] Sigma_ret on exit contains the eigenvalues
212 : *
213 : * @return the number of eigenvalues kept
214 : */
215 3 : long gensvm_kernel_eigendecomp(double *K, long n, double cutoff, double **P_ret,
216 : double **Sigma_ret)
217 : {
218 3 : int M, status, LWORK, *IWORK = NULL,
219 3 : *IFAIL = NULL;
220 : long i, j, num_eigen, cutoff_idx;
221 3 : double max_eigen, abstol, *WORK = NULL,
222 3 : *Sigma = NULL,
223 3 : *P = NULL;
224 :
225 3 : double *tempSigma = Malloc(double, n);
226 3 : double *tempP = Malloc(double, n*n);
227 :
228 3 : IWORK = Malloc(int, 5*n);
229 3 : IFAIL = Malloc(int, n);
230 :
231 : // highest precision eigenvalues, may reduce for speed
232 3 : abstol = 2.0*dlamch('S');
233 :
234 : // first perform a workspace query to determine optimal size of the
235 : // WORK array.
236 3 : WORK = Malloc(double, 1);
237 3 : status = dsyevx('V', 'A', 'U', n, K, n, 0, 0, 0, 0, abstol, &M,
238 : tempSigma, tempP, n, WORK, -1, IWORK, IFAIL);
239 3 : LWORK = WORK[0];
240 :
241 : // allocate the requested memory for the eigendecomposition
242 3 : WORK = (double *)realloc(WORK, LWORK*sizeof(double));
243 3 : status = dsyevx('V', 'A', 'U', n, K, n, 0, 0, 0, 0, abstol, &M,
244 : tempSigma, tempP, n, WORK, LWORK, IWORK, IFAIL);
245 :
246 3 : if (status != 0) {
247 : // LCOV_EXCL_START
248 : err("[GenSVM Error]: Nonzero exit status from dsyevx.\n");
249 : exit(EXIT_FAILURE);
250 : // LCOV_EXCL_STOP
251 : }
252 :
253 : // Select the desired number of eigenvalues, depending on their size.
254 : // dsyevx sorts eigenvalues in ascending order.
255 3 : max_eigen = tempSigma[n-1];
256 3 : cutoff_idx = 0;
257 :
258 13 : for (i=0; i<n; i++) {
259 13 : if (tempSigma[i]/max_eigen > cutoff) {
260 3 : cutoff_idx = i;
261 3 : break;
262 : }
263 : }
264 :
265 3 : num_eigen = n - cutoff_idx;
266 :
267 3 : Sigma = Calloc(double, num_eigen);
268 23 : for (i=0; i<num_eigen; i++) {
269 20 : Sigma[i] = tempSigma[n-1 - i];
270 : }
271 :
272 : // revert P to row-major order and copy only the the columns
273 : // corresponding to the selected eigenvalues
274 3 : P = Calloc(double, n*num_eigen);
275 23 : for (j=n-1; j>n-1-num_eigen; j--) {
276 220 : for (i=0; i<n; i++) {
277 200 : P[i*num_eigen + (n-1)-j] = tempP[i + j*n];
278 : }
279 : }
280 :
281 3 : free(tempSigma);
282 3 : free(tempP);
283 3 : free(IWORK);
284 3 : free(IFAIL);
285 3 : free(WORK);
286 :
287 3 : *Sigma_ret = Sigma;
288 3 : *P_ret = P;
289 :
290 3 : return num_eigen;
291 : }
292 :
293 : /**
294 : * @brief Compute the kernel crossproduct between two datasets
295 : *
296 : * @details
297 : * Given a training set @f$\textbf{X}@f$ with feature space mapping
298 : * @f$\boldsymbol{\Phi}@f$ and a test set @f$\textbf{X}_2@f$ with feature
299 : * space mapping @f$\boldsymbol{\Phi}_2@f$, the crosskernel @f$\textbf{K}_2@f$
300 : * is given by @f$\textbf{K}_2 = \boldsymbol{\Phi}_2 \boldsymbol{\Phi}'@f$.
301 : * Thus, an element in row @f$i@f$ and column @f$j@f$ in @f$\textbf{K}_2@f$
302 : * equals the kernel product between the @f$i@f$-th row of @f$\textbf{X}_2@f$
303 : * and the @f$j@f$-th row of @f$\textbf{X}@f$.
304 : *
305 : * @param[in] model the GenSVM model
306 : * @param[in] data_train the training dataset
307 : * @param[in] data_test the test dataset
308 : *
309 : * @return the matrix @f$\textbf{K}_2@f$
310 : */
311 4 : double *gensvm_kernel_cross(struct GenModel *model, struct GenData *data_train,
312 : struct GenData *data_test)
313 : {
314 : long i, j;
315 4 : long n_train = data_train->n;
316 4 : long n_test = data_test->n;
317 4 : long m = data_test->m;
318 4 : double value, *x1 = NULL,
319 4 : *x2 = NULL,
320 4 : *K2 = Calloc(double, n_test * n_train);
321 :
322 27 : for (i=0; i<n_test; i++) {
323 253 : for (j=0; j<n_train; j++) {
324 230 : x1 = &data_test->RAW[i*(m+1)+1];
325 230 : x2 = &data_train->RAW[j*(m+1)+1];
326 230 : if (model->kerneltype == K_POLY)
327 50 : value = gensvm_kernel_dot_poly(x1, x2, m,
328 : model->gamma, model->coef,
329 : model->degree);
330 180 : else if (model->kerneltype == K_RBF)
331 130 : value = gensvm_kernel_dot_rbf(x1, x2, m,
332 : model->gamma);
333 50 : else if (model->kerneltype == K_SIGMOID)
334 50 : value = gensvm_kernel_dot_sigmoid(x1, x2, m,
335 : model->gamma, model->coef);
336 : else {
337 : // LCOV_EXCL_START
338 : err("[GenSVM Error]: Unknown kernel type in "
339 : "gensvm_make_crosskernel\n");
340 : exit(EXIT_FAILURE);
341 : // LCOV_EXCL_STOP
342 : }
343 230 : matrix_set(K2, n_train, i, j, value);
344 : }
345 : }
346 4 : return K2;
347 : }
348 :
349 : /**
350 : * @brief Compute the training factor as part of kernel preprocessing
351 : *
352 : * @details
353 : * This function computes the matrix product @f$\textbf{M} =
354 : * \textbf{P}\boldsymbol{\Sigma}@f$ and stores the result in GenData::Z,
355 : * preceded by a column of ones. It also sets GenData::r to the number of
356 : * eigenvectors that were includedin P and Sigma. Note that P and Sigma
357 : * correspond to the reduced eigendecomposition of the kernel matrix.
358 : *
359 : * @param[in,out] data a GenData structure. On exit, GenData::Z and
360 : * GenData::r are updated as described above.
361 : * @param[in] P the eigenvectors
362 : * @param[in] Sigma the eigenvalues
363 : * @param[in] r the number of eigenvalues and eigenvectors
364 : */
365 3 : void gensvm_kernel_trainfactor(struct GenData *data, double *P, double *Sigma,
366 : long r)
367 : {
368 3 : long i, j, n = data->n;
369 : double value;
370 :
371 : // allocate Z
372 3 : data->Z = Calloc(double, n*(r+1));
373 :
374 : // Write data->Z = [1 M] = [1 P*Sigma]
375 33 : for (i=0; i<n; i++) {
376 230 : for (j=0; j<r; j++) {
377 200 : value = matrix_get(P, r, i, j);
378 200 : value *= matrix_get(Sigma, 1, j, 0);
379 200 : matrix_set(data->Z, r+1, i, j+1, value);
380 : }
381 30 : matrix_set(data->Z, r+1, i, 0, 1.0);
382 : }
383 :
384 : // Set data->r to r so data knows the width of Z
385 3 : data->r = r;
386 3 : }
387 :
388 : /**
389 : * @brief Calculate the matrix product for the testfactor
390 : *
391 : * @details
392 : * To predict class labels when kernels are used, a transformation of the
393 : * testdata has to be performed to get the simplex space vectors. This
394 : * transformation is based on the matrix @f$\textbf{K}_2@f$ (as calculated by
395 : * gensvm_make_crosskernel()) and the matrices @f$\textbf{M} =
396 : * \textbf{P}*\boldsymbol{\Sigma}@f$) and @f$\boldsymbol{\Sigma}@f$. The
397 : * testfactor is equal to @f$\textbf{K}_2 \textbf{M}
398 : * \boldsymbol{\Sigma}^{-2}@f$.
399 : *
400 : * @param[out] testdata a GenData struct with the testdata, contains
401 : * the testfactor in GenData::Z on exit preceded
402 : * by a column of ones.
403 : * @param[in] traindata a GenData struct with the training data
404 : * @param[in] K2 crosskernel between the train and test data
405 : */
406 2 : void gensvm_kernel_testfactor(struct GenData *testdata,
407 : struct GenData *traindata, double *K2)
408 : {
409 : long n1, n2, r, i, j;
410 2 : double value, *N = NULL,
411 2 : *M = NULL;
412 :
413 2 : n1 = traindata->n;
414 2 : n2 = testdata->n;
415 2 : r = traindata->r;
416 :
417 2 : N = Calloc(double, n2*r);
418 2 : M = Calloc(double, n1*r);
419 :
420 : // copy M from traindata->Z because we need it in dgemm without column
421 : // of 1's.
422 22 : for (i=0; i<n1; i++) {
423 100 : for (j=0; j<r; j++) {
424 80 : value = matrix_get(traindata->Z, r+1, i, j+1);
425 80 : matrix_set(M, r, i, j, value);
426 : }
427 : }
428 :
429 : // Multiply K2 with M and store in N
430 2 : cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n2, r, n1, 1.0,
431 : K2, n1, M, r, 0.0, N, r);
432 :
433 : // Multiply N with Sigma^{-2}
434 10 : for (j=0; j<r; j++) {
435 8 : value = pow(matrix_get(traindata->Sigma, 1, j, 0), -2.0);
436 63 : for (i=0; i<n2; i++)
437 55 : matrix_mul(N, r, i, j, value);
438 : }
439 :
440 : // write N to Z with a column of ones
441 2 : testdata->Z = Calloc(double, n2*(r+1));
442 15 : for (i=0; i<n2; i++) {
443 68 : for (j=0; j<r; j++) {
444 55 : matrix_set(testdata->Z, r+1, i, j+1,
445 : matrix_get(N, r, i, j));
446 : }
447 13 : matrix_set(testdata->Z, r+1, i, 0, 1.0);
448 : }
449 : // Set r to testdata
450 2 : testdata->r = r;
451 :
452 2 : free(M);
453 2 : free(N);
454 2 : }
455 :
456 : /**
457 : * @brief Compute the RBF kernel between two vectors
458 : *
459 : * @details
460 : * The RBF kernel is computed between two vectors. This kernel is defined as
461 : * @f[
462 : * k(x_1, x_2) = \exp( -\gamma \| x_1 - x_2 \|^2 )
463 : * @f]
464 : * where @f$ \gamma @f$ is a kernel parameter specified.
465 : *
466 : * @param[in] x1 first vector
467 : * @param[in] x2 second vector
468 : * @param[in] n length of the vectors x1 and x2
469 : * @param[in] gamma gamma parameter of the kernel
470 : * @returns kernel evaluation
471 : */
472 297 : double gensvm_kernel_dot_rbf(double *x1, double *x2, long n, double gamma)
473 : {
474 : long i;
475 297 : double value = 0.0;
476 :
477 1572 : for (i=0; i<n; i++)
478 1275 : value += (x1[i] - x2[i]) * (x1[i] - x2[i]);
479 297 : value *= -gamma;
480 297 : return exp(value);
481 : }
482 :
483 : /**
484 : * @brief Compute the polynomial kernel between two vectors
485 : *
486 : * @details
487 : * The polynomial kernel is computed between two vectors. This kernel is
488 : * defined as
489 : * @f[
490 : * k(x_1, x_2) = ( \gamma \langle x_1, x_2 \rangle + coef)^{degree}
491 : * @f]
492 : * where @f$ \gamma @f$, @f$ coef @f$ and @f$ degree @f$ are kernel
493 : * parameters.
494 : *
495 : * @param[in] x1 first vector
496 : * @param[in] x2 second vector
497 : * @param[in] n length of the vectors x1 and x2
498 : * @param[in] gamma gamma parameter of the kernel
499 : * @param[in] coef coef parameter of the kernel
500 : * @param[in] degree degree parameter of the kernel
501 : * @returns kernel evaluation
502 : */
503 107 : double gensvm_kernel_dot_poly(double *x1, double *x2, long n, double gamma,
504 : double coef, double degree)
505 : {
506 107 : double value = cblas_ddot(n, x1, 1, x2, 1);
507 107 : value *= gamma;
508 107 : value += coef;
509 107 : return pow(value, degree);
510 : }
511 :
512 : /**
513 : * @brief Compute the sigmoid kernel between two vectors
514 : *
515 : * @details
516 : * The sigmoid kernel is computed between two vectors. This kernel is defined
517 : * as
518 : * @f[
519 : * k(x_1, x_2) = \tanh( \gamma \langle x_1 , x_2 \rangle + coef)
520 : * @f]
521 : * where @f$ \gamma @f$ and @f$ coef @f$ are kernel parameters.
522 : *
523 : * @param[in] x1 first vector
524 : * @param[in] x2 second vector
525 : * @param[in] n length of the vectors x1 and x2
526 : * @param[in] gamma gamma parameter of the kernel
527 : * @param[in] coef coef parameter of the kernel
528 : * @returns kernel evaluation
529 : */
530 107 : double gensvm_kernel_dot_sigmoid(double *x1, double *x2, long n, double gamma,
531 : double coef)
532 : {
533 107 : double value = cblas_ddot(n, x1, 1, x2, 1);
534 107 : value *= gamma;
535 107 : value += coef;
536 107 : return tanh(value);
537 : }
538 :
539 : /**
540 : * @brief Compute the eigenvalues and optionally the eigenvectors of a
541 : * symmetric matrix.
542 : *
543 : * @details
544 : * This is a wrapper function around the external LAPACK function.
545 : *
546 : * See the LAPACK documentation at:
547 : * http://www.netlib.org/lapack/explore-html/d2/d97/dsyevx_8f.html
548 : *
549 : */
550 6 : int dsyevx(char JOBZ, char RANGE, char UPLO, int N, double *A, int LDA,
551 : double VL, double VU, int IL, int IU, double ABSTOL, int *M,
552 : double *W, double *Z, int LDZ, double *WORK, int LWORK,
553 : int *IWORK, int *IFAIL)
554 : {
555 : extern void dsyevx_(char *JOBZ, char *RANGE, char *UPLO, int *Np,
556 : double *A, int *LDAp, double *VLp, double *VUp,
557 : int *ILp, int *IUp, double *ABSTOLp, int *M,
558 : double *W, double *Z, int *LDZp, double *WORK,
559 : int *LWORKp, int *IWORK, int *IFAIL, int *INFOp);
560 : int INFO;
561 6 : dsyevx_(&JOBZ, &RANGE, &UPLO, &N, A, &LDA, &VL, &VU, &IL, &IU, &ABSTOL,
562 : M, W, Z, &LDZ, WORK, &LWORK, IWORK, IFAIL, &INFO);
563 6 : return INFO;
564 : }
565 :
566 : /**
567 : * @brief Determine double precision machine parameters.
568 : *
569 : * @details
570 : * This is a wrapper function around the external LAPACK function.
571 : *
572 : * See the LAPACK documentation at:
573 : * http://www.netlib.org/lapack/explore-html/d5/dd4/dlamch_8f.html
574 : */
575 3 : double dlamch(char CMACH)
576 : {
577 : extern double dlamch_(char *CMACH);
578 3 : return dlamch_(&CMACH);
579 : }
|