61 long big_folds = N%folds;
62 long small_fold_size = N/folds;
65 for (i=0; i<small_fold_size*folds; i++)
68 if (cv_idx[idx] == 0) {
77 while (i < big_folds) {
109 long *cv_idx,
long fold_idx)
111 if (full_data->
Z == NULL)
144 long *cv_idx,
long fold_idx)
146 long i, j, k, l, test_n, train_n;
148 long n = full_data->
n;
149 long m = full_data->
m;
150 long K = full_data->
K;
156 if (cv_idx[i] == fold_idx)
158 train_n = n - test_n;
160 test_data->
n = test_n;
161 train_data->
n = train_n;
169 train_data->
y =
Calloc(
long, train_n);
170 test_data->
y =
Calloc(
long, test_n);
172 train_data->
RAW =
Calloc(
double, train_n*(m+1));
173 test_data->
RAW =
Calloc(
double, test_n*(m+1));
177 for (i=0; i<n; i++) {
178 if (cv_idx[i] == fold_idx) {
179 test_data->
y[k] = full_data->
y[i];
180 for (j=0; j<m+1; j++) {
186 train_data->
y[l] = full_data->
y[i];
187 for (j=0; j<m+1; j++) {
195 train_data->
Z = train_data->
RAW;
196 test_data->
Z = test_data->
RAW;
225 long *cv_idx,
long fold_idx)
227 long i, j, test_n, train_n, train_nnz, test_nnz, row_nnz, jj,
238 for (i=0; i<full_data->
n; i++)
239 if (cv_idx[i] == fold_idx)
241 train_n = full_data->
n - test_n;
244 train_data->
n = train_n;
245 train_data->
m = full_data->
m;
246 train_data->
K = full_data->
K;
247 test_data->
n = test_n;
248 test_data->
m = full_data->
m;
249 test_data->
K = full_data->
K;
252 train_data->
y =
Calloc(
long, train_n);
253 test_data->
y =
Calloc(
long, test_n);
258 for (i=0; i<full_data->
n; i++) {
259 row_nnz = full_data->
spZ->
ia[i+1] - full_data->
spZ->
ia[i];
260 if (cv_idx[i] == fold_idx) {
263 train_nnz += row_nnz;
272 train_data->
spZ->
nnz = train_nnz;
280 test_data->
spZ->
nnz = test_nnz;
292 test_data->
spZ->
ia[0] = 0;
293 train_data->
spZ->
ia[0] = 0;
294 for (i=0; i<full_data->
n; i++) {
295 jj_start = full_data->
spZ->
ia[i];
296 jj_end = full_data->
spZ->
ia[i+1];
298 for (jj=jj_start; jj<jj_end; jj++) {
299 j = full_data->
spZ->
ja[jj];
302 if (cv_idx[i] == fold_idx) {
303 test_data->
spZ->
values[te_nnz_idx] = value;
304 test_data->
spZ->
ja[te_nnz_idx] = j;
307 train_data->
spZ->
values[tr_nnz_idx] = value;
308 train_data->
spZ->
ja[tr_nnz_idx] = j;
313 if (cv_idx[i] == fold_idx) {
314 test_data->
y[te_row_idx] = full_data->
y[i];
315 test_data->
spZ->
ia[te_row_idx+1] = te_nnz_idx;
318 train_data->
y[tr_row_idx] = full_data->
y[i];
319 train_data->
spZ->
ia[tr_row_idx+1] = tr_nnz_idx;
#define Calloc(type, size)
long * ja
column indices, should be of length nnz
long n_col
number of columns of the original matrix
#define matrix_get(M, cols, i, j)
void gensvm_get_tt_split(struct GenData *full_data, struct GenData *train_data, struct GenData *test_data, long *cv_idx, long fold_idx)
Wrapper around sparse/dense versions of this function.
void gensvm_get_tt_split_sparse(struct GenData *full_data, struct GenData *train_data, struct GenData *test_data, long *cv_idx, long fold_idx)
Create train and test dataset for a CV split with sparse data.
long nnz
number of nonzero elements
long * y
array of class labels, 1..K
A structure to represent the data.
double * values
actual nonzero values, should be of length nnz
void gensvm_get_tt_split_dense(struct GenData *full_data, struct GenData *train_data, struct GenData *test_data, long *cv_idx, long fold_idx)
Create train and test datasets for a CV split with dense data.
long m
number of predictors (width of RAW)
#define matrix_set(M, cols, i, j, val)
long n
number of instances
struct GenSparse * gensvm_init_sparse(void)
Initialize a GenSparse structure.
long * ia
cumulative row lengths, should be of length n_row+1
double * RAW
augmented raw data matrix
struct GenSparse * spZ
sparse representation of the augmented data matrix
Header file for gensvm_cv_util.c.
void gensvm_make_cv_split(long N, long folds, long *cv_idx)
Create a cross validation split vector.
long n_row
number of rows of the original matrix