Line data Source code
1 : /**
2 : * @file gensvm_checks.c
3 : * @author G.J.J. van den Burg
4 : * @date 2016-12-07
5 : * @brief Sanity checks used to ensure inputs are as expected
6 : *
7 : * @copyright
8 : Copyright 2016, G.J.J. van den Burg.
9 :
10 : This file is part of GenSVM.
11 :
12 : GenSVM is free software: you can redistribute it and/or modify
13 : it under the terms of the GNU General Public License as published by
14 : the Free Software Foundation, either version 3 of the License, or
15 : (at your option) any later version.
16 :
17 : GenSVM is distributed in the hope that it will be useful,
18 : but WITHOUT ANY WARRANTY; without even the implied warranty of
19 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 : GNU General Public License for more details.
21 :
22 : You should have received a copy of the GNU General Public License
23 : along with GenSVM. If not, see <http://www.gnu.org/licenses/>.
24 :
25 : */
26 :
27 : #include "gensvm_checks.h"
28 :
29 : /**
30 : * @brief Check if the labels are contiguous on [1 .. K]
31 : *
32 : * @details
33 : * The GenSVM library currently requires that the labels that are supplied in
34 : * a dataset are contigous on the interval [1 .. K] and have no gaps. This is
35 : * required because the dimensionality of the problem is directly related to
36 : * the maximum class label K. This function checks if the labels are indeed in
37 : * the desired range.
38 : *
39 : * @param[in] data a GenData struct with the current data
40 : *
41 : * @return whether the labels are contiguous or not
42 : */
43 4 : bool gensvm_check_outcome_contiguous(struct GenData *data)
44 : {
45 4 : bool in_uniq, is_contiguous = true;
46 4 : long i, j, K = 1;
47 4 : long max_y = -1,
48 4 : min_y = LONG_MAX;
49 4 : long *uniq_y = Calloc(long, K);
50 4 : uniq_y[0] = data->y[0];
51 :
52 40 : for (i=1; i<data->n; i++) {
53 36 : in_uniq = false;
54 87 : for (j=0; j<K; j++) {
55 77 : if (uniq_y[j] == data->y[i]) {
56 26 : in_uniq = true;
57 26 : break;
58 : }
59 : }
60 :
61 36 : if (!in_uniq) {
62 10 : uniq_y = Realloc(uniq_y, long, K+1);
63 10 : uniq_y[K++] = data->y[i];
64 : }
65 :
66 36 : max_y = maximum(max_y, data->y[i]);
67 36 : min_y = minimum(min_y, data->y[i]);
68 : }
69 :
70 4 : if (min_y < 1 || max_y > K) {
71 3 : is_contiguous = false;
72 : }
73 :
74 4 : free(uniq_y);
75 :
76 4 : return is_contiguous;
77 : }
|