Line data Source code
1 : /**
2 : * @file gensvm_init.c
3 : * @author G.J.J. van den Burg
4 : * @date 2014-01-07
5 : * @brief Functions for initializing model and data structures
6 : * @details
7 : *
8 : * This file contains functions for initializing a GenModel instance
9 : * and a GenData instance. In addition, default values for these
10 : * structures are defined here (and only here). Functions for allocating
11 : * memory for the model structure and freeing of the model and data structures
12 : * are also included.
13 : *
14 : * @copyright
15 : Copyright 2016, G.J.J. van den Burg.
16 :
17 : This file is part of GenSVM.
18 :
19 : GenSVM is free software: you can redistribute it and/or modify
20 : it under the terms of the GNU General Public License as published by
21 : the Free Software Foundation, either version 3 of the License, or
22 : (at your option) any later version.
23 :
24 : GenSVM is distributed in the hope that it will be useful,
25 : but WITHOUT ANY WARRANTY; without even the implied warranty of
26 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 : GNU General Public License for more details.
28 :
29 : You should have received a copy of the GNU General Public License
30 : along with GenSVM. If not, see <http://www.gnu.org/licenses/>.
31 :
32 : */
33 :
34 : #include "gensvm_init.h"
35 : #include "gensvm_print.h"
36 :
37 : /**
38 : * @brief Seed the matrix V from an existing model or using rand
39 : *
40 : * @details
41 : * The matrix V must be seeded before the main_loop() can start.
42 : * This can be done by either seeding it with random numbers or
43 : * using the solution from a previous model on the same dataset
44 : * as initial seed. The latter option usually allows for a
45 : * significant improvement in the number of iterations necessary
46 : * because the seeded model V is closer to the optimal V.
47 : *
48 : * When no seed model is supplied, the rows of V are seeded with random
49 : * numbers between the inverse of the minimum and the inverse of the maximum
50 : * of the corresponding column of Z. This is done to center the product of the
51 : * two in the simplex space.
52 : *
53 : * @param[in] from_model GenModel from which to copy V
54 : * @param[in,out] to_model GenModel to which V will be copied
55 : * @param[in] data GenData structure with the data
56 : */
57 6 : void gensvm_init_V(struct GenModel *from_model,
58 : struct GenModel *to_model, struct GenData *data)
59 : {
60 : long i, j, k, jj_start, jj_end, jj;
61 : double cmin, cmax, value, rnd;
62 6 : double *col_min = NULL,
63 6 : *col_max = NULL;
64 :
65 6 : long n = data->n;
66 6 : long m = data->m;
67 6 : long K = data->K;
68 :
69 6 : if (from_model == NULL) {
70 3 : col_min = Calloc(double, m+1);
71 3 : col_max = Calloc(double, m+1);
72 15 : for (j=0; j<m+1; j++) {
73 12 : col_min[j] = 1.0e100;
74 12 : col_max[j] = -1.0e100;
75 : }
76 :
77 3 : if (data->Z == NULL) {
78 : // sparse matrix
79 1 : long *visit_count = Calloc(long, m+1);
80 6 : for (i=0; i<n; i++) {
81 5 : jj_start = data->spZ->ia[i];
82 5 : jj_end = data->spZ->ia[i+1];
83 18 : for (jj=jj_start; jj<jj_end; jj++) {
84 13 : j = data->spZ->ja[jj];
85 13 : value = data->spZ->values[jj];
86 :
87 13 : col_min[j] = minimum(col_min[j], value);
88 13 : col_max[j] = maximum(col_max[j], value);
89 13 : visit_count[j]++;
90 : }
91 : }
92 : // correction in case the minimum or maximum is 0
93 4 : for (j=0; j<m+1; j++) {
94 3 : if (visit_count[j] < n) {
95 2 : col_min[j] = minimum(col_min[j], 0.0);
96 2 : col_max[j] = maximum(col_max[j], 0.0);
97 : }
98 : }
99 1 : free(visit_count);
100 : } else {
101 : // dense matrix
102 17 : for (i=0; i<n; i++) {
103 90 : for (j=0; j<m+1; j++) {
104 75 : value = matrix_get(data->Z, m+1, i, j);
105 75 : col_min[j] = minimum(col_min[j], value);
106 75 : col_max[j] = maximum(col_max[j], value);
107 : }
108 : }
109 : }
110 15 : for (j=0; j<m+1; j++) {
111 12 : cmin = (fabs(col_min[j]) < 1e-10) ? -1 : col_min[j];
112 12 : cmax = (fabs(col_max[j]) < 1e-10) ? 1 : col_max[j];
113 42 : for (k=0; k<K-1; k++) {
114 30 : rnd = ((double) rand()) / ((double) RAND_MAX);
115 30 : value = 1.0/cmin + (1.0/cmax - 1.0/cmin)*rnd;
116 30 : matrix_set(to_model->V, K-1, j, k, value);
117 : }
118 : }
119 3 : free(col_min);
120 3 : free(col_max);
121 : } else {
122 17 : for (i=0; i<m+1; i++) {
123 50 : for (j=0; j<K-1; j++) {
124 36 : value = matrix_get(from_model->V, K-1, i, j);
125 36 : matrix_set(to_model->V, K-1, i, j, value);
126 : }
127 : }
128 : }
129 6 : }
130 :
131 : /**
132 : * @brief Initialize instance weights
133 : *
134 : * @details
135 : * Instance weights can for instance be used to add additional weights to
136 : * instances of certain classes. Two default weight possibilities are
137 : * implemented here. The first is unit weights, where each instance gets
138 : * weight 1.
139 : *
140 : * The second are group size correction weights, which are calculated as
141 : * @f[
142 : * \rho_i = \frac{n}{Kn_k} ,
143 : * @f]
144 : * where @f$ n_k @f$ is the number of instances in group @f$ k @f$ and
145 : * @f$ y_i = k @f$.
146 : *
147 : * @param[in] data GenData with the dataset
148 : * @param[in,out] model GenModel with the weight specification. On
149 : * exit GenModel::rho contains the instance
150 : * weights.
151 : */
152 9 : void gensvm_initialize_weights(struct GenData *data, struct GenModel *model)
153 : {
154 9 : long *groups = NULL;
155 : long i;
156 :
157 9 : long n = model->n;
158 9 : long K = model->K;
159 :
160 9 : if (model->weight_idx == 1) {
161 66 : for (i=0; i<n; i++)
162 59 : model->rho[i] = 1.0;
163 : }
164 2 : else if (model->weight_idx == 2) {
165 2 : groups = Calloc(long, K);
166 18 : for (i=0; i<n; i++)
167 16 : groups[data->y[i]-1]++;
168 18 : for (i=0; i<n; i++)
169 32 : model->rho[i] = ((double) n)/((double) (
170 16 : groups[data->y[i]-1]*K));
171 : } else {
172 : // LCOV_EXCL_START
173 : err("[GenSVM Error]: Unknown weight specification.\n");
174 : exit(EXIT_FAILURE);
175 : // LCOV_EXCL_STOP
176 : }
177 9 : free(groups);
178 9 : }
|