LCOV - coverage.all - src/gensvm

LCOV - code coverage report

Current view:	top level - src - gensvm_update.c (source / functions)		Hit	Total	Coverage
Test:	coverage.all	Lines:	159	174	91.4 %
Date:	2017-02-21 18:44:20	Functions:	11	11	100.0 %

          Line data    Source code

       1             : /**
       2             :  * @file gensvm_update.c
       3             :  * @author G.J.J. van den Burg
       4             :  * @date 2016-10-14
       5             :  * @brief Functions for getting an update of the majorization algorithm
       6             :  *
       7             :  * @copyright
       8             :  Copyright 2016, G.J.J. van den Burg.
       9             : 
      10             :  This file is part of GenSVM.
      11             : 
      12             :  GenSVM is free software: you can redistribute it and/or modify
      13             :  it under the terms of the GNU General Public License as published by
      14             :  the Free Software Foundation, either version 3 of the License, or
      15             :  (at your option) any later version.
      16             : 
      17             :  GenSVM is distributed in the hope that it will be useful,
      18             :  but WITHOUT ANY WARRANTY; without even the implied warranty of
      19             :  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
      20             :  GNU General Public License for more details.
      21             : 
      22             :  You should have received a copy of the GNU General Public License
      23             :  along with GenSVM. If not, see <http://www.gnu.org/licenses/>.
      24             : 
      25             :  */
      26             : 
      27             : #include "gensvm_update.h"
      28             : 
      29             : /**
      30             :  * Number of rows in a single block for the ZAZ calculation in 
      31             :  * gensvm_get_ZAZ_ZB_sparse().
      32             :  */
      33             : #ifndef GENSVM_BLOCK_SIZE
      34             :   #define GENSVM_BLOCK_SIZE 512
      35             : #endif
      36             : 
      37             : /**
      38             :  * @brief Calculate the value of omega for a single instance
      39             :  *
      40             :  * @details
      41             :  * This function calculates the value of the @f$ \omega_i @f$ variable for a
      42             :  * single instance, where
      43             :  * @f[
      44             :  *      \omega_i = \frac{1}{p} \left( \sum_{j \neq y_i} h^p\left(
      45             :  *      \overline{q}_i^{(y_i j)} \right)  \right)^{1/p-1}
      46             :  * @f]
      47             :  * Note that his function uses the precalculated values from GenModel::H and
      48             :  * GenModel::R to speed up the computation.
      49             :  *
      50             :  * @param[in]   model   GenModel structure with the current model
      51             :  * @param[in]   data    GenData structure with the data (used for y)
      52             :  * @param[in]   i       index of the instance for which to calculate omega
      53             :  * @returns             the value of omega for instance i
      54             :  *
      55             :  */
      56        3751 : double gensvm_calculate_omega(struct GenModel *model, struct GenData *data,
      57             :                 long i)
      58             : {
      59             :         long j;
      60        3751 :         double h, omega = 0.0,
      61        3751 :                p = model->p;
      62             : 
      63       18740 :         for (j=0; j<model->K; j++) {
      64       14989 :                 if (j == (data->y[i]-1))
      65        3751 :                         continue;
      66       11238 :                 h = matrix_get(model->H, model->K, i, j);
      67       11238 :                 omega += pow(h, p);
      68             :         }
      69        3751 :         omega = (1.0/p)*pow(omega, 1.0/p - 1.0);
      70             : 
      71        3751 :         return omega;
      72             : }
      73             : 
      74             : /**
      75             :  * @brief Check if we can do simple majorization for a given instance
      76             :  *
      77             :  * @details
      78             :  * A simple majorization is possible if at most one of the Huberized hinge
      79             :  * errors is nonzero for an instance. This is checked here. For this we
      80             :  * compute the product of the Huberized error for all @f$j \neq y_i@f$ and
      81             :  * check if strictly less than 2 are nonzero. See also the @ref update_math.
      82             :  *
      83             :  * @param[in]   model   GenModel structure with the current model
      84             :  * @param[in]   data    GenData structure with the data (used for y)
      85             :  * @param[in]   i       index of the instance for which to check
      86             :  * @returns             whether or not we can do simple majorization
      87             :  *
      88             :  */
      89        4399 : bool gensvm_majorize_is_simple(struct GenModel *model, struct GenData *data,
      90             :                 long i)
      91             : {
      92             :         long j;
      93        4399 :         double h, value = 0;
      94       13680 :         for (j=0; j<model->K; j++) {
      95       13029 :                 if (j == (data->y[i]-1))
      96        2721 :                         continue;
      97       10308 :                 h = matrix_get(model->H, model->K, i, j);
      98       10308 :                 value += h > 0;
      99       10308 :                 if (value > 1)
     100        3748 :                         return false;
     101             :         }
     102         651 :         return true;
     103             : }
     104             : 
     105             : /**
     106             :  * @brief Compute majorization coefficients for non-simple instance
     107             :  *
     108             :  * @details
     109             :  * In this function we compute the majorization coefficients needed for an
     110             :  * instance with a non-simple majorization (@f$\varepsilon_i = 0@f$). In this
     111             :  * function, we distinguish a number of cases depending on the value of
     112             :  * GenModel::p and the respective value of @f$\overline{q}_i^{(y_ij)}@f$. Note
     113             :  * that the linear coefficient is of the form @f$b - a\overline{q}@f$, but
     114             :  * often the second term is included in the definition of @f$b@f$, so it can
     115             :  * be optimized out. The output argument \p b_aq contains this difference
     116             :  * therefore in one go. More details on this function can be found in the @ref
     117             :  * update_math. See also gensvm_calculate_ab_simple().
     118             :  *
     119             :  * @param[in]   model   GenModel structure with the current model
     120             :  * @param[in]   i       index for the instance
     121             :  * @param[in]   j       index for the class
     122             :  * @param[out]  *a      output argument for the quadratic coefficient
     123             :  * @param[out]  *b_aq   output argument for the linear coefficient.
     124             :  *
     125             :  */
     126       11235 : void gensvm_calculate_ab_non_simple(struct GenModel *model, long i, long j,
     127             :                 double *a, double *b_aq)
     128             : {
     129       11235 :         double q = matrix_get(model->Q, model->K, i, j);
     130       11235 :         double p = model->p;
     131       11235 :         double kappa = model->kappa;
     132       11235 :         const double a2g2 = 0.25*p*(2.0*p - 1.0)*pow((kappa+1.0)/2.0,p-2.0);
     133             : 
     134       11235 :         if (2.0 - model->p < 1e-2) {
     135           3 :                 if (q <= - kappa) {
     136           1 :                         *b_aq = 0.5 - kappa/2.0 - q;
     137           2 :                 } else if ( q <= 1.0) {
     138           1 :                         *b_aq = pow(1.0 - q, 3.0)/(2.0*pow(kappa + 1.0, 2.0));
     139             :                 } else {
     140           1 :                         *b_aq = 0;
     141             :                 }
     142           3 :                 *a = 1.5;
     143             :         } else {
     144       11232 :                 if (q <= (p + kappa - 1.0)/(p - 2.0)) {
     145          27 :                         *a = 0.25*pow(p, 2.0)*pow(0.5 - kappa/2.0 - q, p - 2.0);
     146       11205 :                 } else if (q <= 1.0) {
     147       10203 :                         *a = a2g2;
     148             :                 } else {
     149        3006 :                         *a = 0.25*pow(p, 2.0)*pow((p/(p - 2.0))*(0.5 -
     150        2004 :                                                 kappa/2.0 - q), p - 2.0);
     151        2004 :                         *b_aq = (*a)*(2.0*q + kappa - 1.0)/(p - 2.0) +
     152        1002 :                                 0.5*p*pow(p/(p - 2.0)*(0.5 - kappa/2.0 - q),
     153             :                                                 p - 1.0);
     154             :                 }
     155       11232 :                 if (q <= -kappa) {
     156          53 :                         *b_aq = 0.5*p*pow(0.5 - kappa/2.0 - q, p - 1.0);
     157       11179 :                 } else if ( q <= 1.0) {
     158       10177 :                         *b_aq = p*pow(1.0 - q, 2.0*p - 1.0)/pow(2*kappa+2.0, p);
     159             :                 }
     160             :         }
     161       11235 : }
     162             : 
     163             : /**
     164             :  * @brief Compute majorization coefficients for simple instances
     165             :  *
     166             :  * @details
     167             :  * In this function we compute the majorization coefficients needed for an
     168             :  * instance with a simple majorization. This corresponds to the non-simple
     169             :  * majorization for the case where GenModel::p equals 1. Due to this condition
     170             :  * the majorization coefficients are quite simple to compute.  Note that the
     171             :  * linear coefficient of the majorization is of the form @f$b -
     172             :  * a\overline{q}@f$, but often the second term is included in the definition
     173             :  * of @f$b@f$, so it can be optimized out. For more details see the @ref
     174             :  * update_math, and gensvm_calculate_ab_non_simple().
     175             :  *
     176             :  * @param[in]   model   GenModel structure with the current model
     177             :  * @param[in]   i       index for the instance
     178             :  * @param[in]   j       index for the class
     179             :  * @param[out]  *a      output argument for the quadratic coefficient
     180             :  * @param[out]  *b_aq   output argument for the linear coefficient
     181             :  *
     182             :  */
     183        1941 : void gensvm_calculate_ab_simple(struct GenModel *model, long i, long j,
     184             :                 double *a, double *b_aq)
     185             : {
     186        1941 :         double q = matrix_get(model->Q, model->K, i, j);
     187             : 
     188        1941 :         if (q <= - model->kappa) {
     189           1 :                 *a = 0.25/(0.5 - model->kappa/2.0 - q);
     190           1 :                 *b_aq = 0.5;
     191        1940 :         } else if (q <= 1.0) {
     192         649 :                 *a = 1.0/(2.0*model->kappa + 2.0);
     193         649 :                 *b_aq = (1.0 - q)*(*a);
     194             :         } else {
     195        1291 :                 *a = -0.25/(0.5 - model->kappa/2.0 - q);
     196        1291 :                 *b_aq = 0;
     197             :         }
     198        1941 : }
     199             : 
     200             : /**
     201             :  * @brief Compute the alpha_i and beta_i for an instance
     202             :  *
     203             :  * @details
     204             :  * This computes the @f$\alpha_i@f$ value for an instance, and simultaneously
     205             :  * updating the row of the B matrix corresponding to that
     206             :  * instance (the @f$\boldsymbol{\beta}_i'@f$). The advantage of doing this at
     207             :  * the same time is that we can compute the a and b values simultaneously in
     208             :  * the gensvm_calculate_ab_simple() and gensvm_calculate_ab_non_simple()
     209             :  * functions.
     210             :  *
     211             :  * The computation is done by first checking whether simple majorization is
     212             :  * possible for this instance. If so, the @f$\omega_i@f$ value is set to 1.0,
     213             :  * otherwise this value is computed. If simple majorization is possible, the
     214             :  * coefficients a and b_aq are computed by gensvm_calculate_ab_simple(),
     215             :  * otherwise they're computed by gensvm_calculate_ab_non_simple(). Next, the
     216             :  * beta_i updated through the efficient BLAS daxpy function, and part of the
     217             :  * value of @f$\alpha_i@f$ is computed. The final value of @f$\alpha_i@f$ is
     218             :  * returned.
     219             :  *
     220             :  * @param[in]           model   GenModel structure with the current model
     221             :  * @param[in]           data    GenData structure with the data
     222             :  * @param[in]           i       index of the instance to update
     223             :  * @param[out]          beta    beta vector of linear coefficients (assumed to
     224             :  *                              be allocated elsewhere, initialized here)
     225             :  * @returns                     the @f$\alpha_i@f$ value of this instance
     226             :  *
     227             :  */
     228        4394 : double gensvm_get_alpha_beta(struct GenModel *model, struct GenData *data,
     229             :                 long i, double *beta)
     230             : {
     231             :         bool simple;
     232        4394 :         long j, K = model->K;
     233        4394 :         double omega, a, b_aq = 0.0,
     234        4394 :                alpha = 0.0;
     235        4394 :         double *uu_row = NULL;
     236        4394 :         const double in = 1.0/((double) model->n);
     237             : 
     238        4394 :         simple = gensvm_majorize_is_simple(model, data, i);
     239        4394 :         omega = simple ? 1.0 : gensvm_calculate_omega(model, data, i);
     240             : 
     241        4394 :         Memset(beta, double, K-1);
     242       21954 :         for (j=0; j<K; j++) {
     243             :                 // skip the class y_i = k
     244       17560 :                 if (j == (data->y[i]-1))
     245        4394 :                         continue;
     246             : 
     247             :                 // calculate the a_ijk and (b_ijk - a_ijk q_i^(kj)) values
     248       13166 :                 if (simple) {
     249        1938 :                         gensvm_calculate_ab_simple(model, i, j, &a, &b_aq);
     250             :                 } else {
     251       11228 :                         gensvm_calculate_ab_non_simple(model, i, j, &a, &b_aq);
     252             :                 }
     253             : 
     254             :                 // daxpy on beta and UU
     255             :                 // daxpy does: y = a*x + y
     256             :                 // so y = beta, UU_row = x, a = factor
     257       13166 :                 b_aq *= model->rho[i] * omega * in;
     258       13166 :                 uu_row = &model->UU[((data->y[i]-1)*K+j)*(K-1)];
     259       13166 :                 cblas_daxpy(K-1, b_aq, uu_row, 1, beta, 1);
     260             : 
     261             :                 // increment Avalue
     262       13166 :                 alpha += a;
     263             :         }
     264        4394 :         alpha *= omega * model->rho[i] * in;
     265        4394 :         return alpha;
     266             : }
     267             : 
     268             : /**
     269             :  * @brief Perform a single step of the majorization algorithm to update V
     270             :  *
     271             :  * @details
     272             :  * This function contains the main update calculations of the algorithm. These
     273             :  * calculations are necessary to find a new update V. The calculations exist of
     274             :  * recalculating the majorization coefficients for all instances and all
     275             :  * classes, and solving a linear system to find V.
     276             :  *
     277             :  * Because the function gensvm_get_update() is always called after a call to
     278             :  * gensvm_get_loss() with the same GenModel::V, it is unnecessary to calculate
     279             :  * the updated errors GenModel::Q and GenModel::H here too. This saves on
     280             :  * computation time.
     281             :  *
     282             :  * In calculating the majorization coefficients we calculate the elements of a
     283             :  * diagonal matrix A with elements
     284             :  * @f[
     285             :  *      A_{i, i} = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
     286             :  *              \varepsilon_i a_{ijk}^{(1)} + (1 - \varepsilon_i) \omega_i
     287             :  *              a_{ijk}^{(p)} \right],
     288             :  * @f]
     289             :  * where @f$ k = y_i @f$.
     290             :  * Since this matrix is only used to calculate the matrix @f$ Z' A Z @f$, it
     291             :  * is efficient to update a matrix ZAZ through consecutive rank 1 updates with
     292             :  * a single element of A and the corresponding row of Z. The BLAS function
     293             :  * dsyr is used for this.
     294             :  *
     295             :  * The B matrix is has rows
     296             :  * @f[
     297             :  *      \boldsymbol{\beta}_i' = \frac{1}{n} \rho_i \sum_{j \neq k} \left[
     298             :  *              \varepsilon_i \left( b_{ijk}^{(1)} - a_{ijk}^{(1)}
     299             :  *                      \overline{q}_i^{(kj)} \right) + (1 - \varepsilon_i)
     300             :  *              \omega_i \left( b_{ijk}^{(p)} - a_{ijk}^{(p)}
     301             :  *                      \overline{q}_i^{(kj)} \right) \right]
     302             :  *              \boldsymbol{\delta}_{kj}'
     303             :  * @f]
     304             :  * This is also split into two cases, one for which @f$ \varepsilon_i = 1 @f$,
     305             :  * and one for when it is 0. The 3D simplex difference matrix is used here, in
     306             :  * the form of the @f$ \boldsymbol{\delta}_{kj}' @f$.
     307             :  *
     308             :  * Finally, the following system is solved
     309             :  * @f[
     310             :  *      (\textbf{Z}'\textbf{AZ} + \lambda \textbf{J})\textbf{V} =
     311             :  *              (\textbf{Z}'\textbf{AZ}\overline{\textbf{V}} + \textbf{Z}'
     312             :  *              \textbf{B})
     313             :  * @f]
     314             :  * solving this system is done through dposv().
     315             :  *
     316             :  * @todo
     317             :  * Consider using CblasColMajor everywhere
     318             :  *
     319             :  * @param[in,out]       model   model to be updated
     320             :  * @param[in]           data    data used in model
     321             :  * @param[in]           work    allocated workspace to use
     322             :  */
     323         466 : void gensvm_get_update(struct GenModel *model, struct GenData *data,
     324             :                 struct GenWork *work)
     325             : {
     326             :         int status;
     327             :         long i, j;
     328             : 
     329         466 :         long m = model->m;
     330         466 :         long K = model->K;
     331             : 
     332             :         // compute the ZAZ and ZB matrices
     333         466 :         gensvm_get_ZAZ_ZB(model, data, work);
     334             : 
     335             :         // Calculate right-hand side of system we want to solve
     336             :         // dsymm performs ZB := 1.0 * (ZAZ) * Vbar + 1.0 * ZB
     337             :         // the right-hand side is thus stored in ZB after this call
     338             :         // Note: LDB and LDC are second dimensions of the matrices due to
     339             :         // Row-Major order
     340        1864 :         cblas_dsymm(CblasRowMajor, CblasLeft, CblasUpper, m+1, K-1, 1,
     341        1864 :                         work->ZAZ, m+1, model->V, K-1, 1.0, work->ZB, K-1);
     342             : 
     343             :         // Calculate left-hand side of system we want to solve
     344             :         // Add lambda to all diagonal elements except the first one. Recall
     345             :         // that ZAZ is of size m+1 and is symmetric.
     346        2302 :         for (i=m+2; i<=m*(m+2); i+=m+2)
     347        1836 :                 work->ZAZ[i] += model->lambda;
     348             : 
     349             :         // Lapack uses column-major order, so we transform the ZB matrix to
     350             :         // correspond to this.
     351        2768 :         for (i=0; i<m+1; i++)
     352        9200 :                 for (j=0; j<K-1; j++)
     353        6898 :                         work->ZBc[j*(m+1)+i] = work->ZB[i*(K-1)+j];
     354             : 
     355             :         // Solve the system using dposv. Note that above the upper triangular
     356             :         // part has always been used in row-major order for ZAZ. This
     357             :         // corresponds to the lower triangular part in column-major order.
     358         466 :         status = dposv('L', m+1, K-1, work->ZAZ, m+1, work->ZBc, m+1);
     359             : 
     360             :         // Use dsysv as fallback, for when the ZAZ matrix is not positive
     361             :         // semi-definite for some reason (perhaps due to rounding errors).
     362             :         // This step shouldn't be necessary but is included for safety.
     363         466 :         if (status != 0) {
     364           0 :                 err("[GenSVM Warning]: Received nonzero status from "
     365             :                                 "dposv: %i\n", status);
     366           0 :                 int *IPIV = Malloc(int, m+1);
     367           0 :                 double *WORK = Malloc(double, 1);
     368           0 :                 status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc,
     369           0 :                                 m+1, WORK, -1);
     370             : 
     371           0 :                 int LWORK = WORK[0];
     372           0 :                 WORK = Realloc(WORK, double, LWORK);
     373           0 :                 status = dsysv('L', m+1, K-1, work->ZAZ, m+1, IPIV, work->ZBc,
     374           0 :                                 m+1, WORK, LWORK);
     375           0 :                 if (status != 0)
     376           0 :                         err("[GenSVM Warning]: Received nonzero "
     377             :                                         "status from dsysv: %i\n", status);
     378           0 :                 free(WORK);
     379           0 :                 WORK = NULL;
     380           0 :                 free(IPIV);
     381           0 :                 IPIV = NULL;
     382             :         }
     383             : 
     384             :         // the solution is now stored in ZBc, in column-major order. Here we
     385             :         // convert this back to row-major order
     386        2768 :         for (i=0; i<m+1; i++)
     387        9200 :                 for (j=0; j<K-1; j++)
     388        6898 :                         work->ZB[i*(K-1)+j] = work->ZBc[j*(m+1)+i];
     389             : 
     390             :         // copy the old V to Vbar and the new solution to V
     391        2768 :         for (i=0; i<m+1; i++) {
     392        9200 :                 for (j=0; j<K-1; j++) {
     393        6898 :                         matrix_set(model->Vbar, K-1, i, j,
     394             :                                         matrix_get(model->V, K-1, i, j));
     395        6898 :                         matrix_set(model->V, K-1, i, j,
     396             :                                         matrix_get(work->ZB, K-1, i, j));
     397             :                 }
     398             :         }
     399         466 : }
     400             : 
     401             : /**
     402             :  * @brief Calculate Z'*A*Z and Z'*B for dense matrices
     403             :  *
     404             :  * @details
     405             :  * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z
     406             :  * is stored as a dense matrix. It calculates the Z'*A*Z product by
     407             :  * constructing a matrix LZ = (A^(1/2) * Z), and calculating (LZ)'*(LZ) with
     408             :  * the BLAS dsyrk function. The matrix Z'*B is calculated with successive
     409             :  * rank-1 updates using the BLAS dger function. These functions came out as
     410             :  * the most efficient way to do these computations in several simulation
     411             :  * studies.
     412             :  *
     413             :  * @param[in]           model   a GenModel holding the current model
     414             :  * @param[in]           data    a GenData with the data
     415             :  * @param[in,out]       work    an allocated GenWork structure, contains
     416             :  *                              updated ZAZ and ZB matrices on exit.
     417             :  */
     418         465 : void gensvm_get_ZAZ_ZB_dense(struct GenModel *model, struct GenData *data,
     419             :                 struct GenWork *work)
     420             : {
     421             :         long i;
     422             :         double alpha, sqalpha;
     423             : 
     424         465 :         long n = model->n;
     425         465 :         long m = model->m;
     426         465 :         long K = model->K;
     427             : 
     428             :         // generate Z'*A*Z and Z'*B by rank 1 operations
     429        4851 :         for (i=0; i<n; i++) {
     430        4386 :                 alpha = gensvm_get_alpha_beta(model, data, i, work->beta);
     431             : 
     432             :                 // calculate row of matrix LZ, which is a scalar
     433             :                 // multiplication of sqrt(alpha_i) and row z_i' of Z
     434             :                 // Note that we use the fact that the first column of Z is
     435             :                 // always 1, by only computing the product for m values and
     436             :                 // copying the first element over.
     437        4386 :                 sqalpha = sqrt(alpha);
     438        4386 :                 work->LZ[i*(m+1)] = sqalpha;
     439        4386 :                 cblas_daxpy(m, sqalpha, &data->Z[i*(m+1)+1], 1,
     440        4386 :                                 &work->LZ[i*(m+1)+1], 1);
     441             : 
     442             :                 // rank 1 update of matrix Z'*B
     443             :                 // Note: LDA is the second dimension of ZB because of
     444             :                 // Row-Major order
     445        8772 :                 cblas_dger(CblasRowMajor, m+1, K-1, 1, &data->Z[i*(m+1)], 1,
     446        8772 :                                 work->beta, 1, work->ZB, K-1);
     447             :         }
     448             : 
     449             :         // calculate Z'*A*Z by symmetric multiplication of LZ with itself
     450             :         // (ZAZ = (LZ)' * (LZ)
     451        1395 :         cblas_dsyrk(CblasRowMajor, CblasUpper, CblasTrans, m+1, n, 1.0,
     452        1395 :                         work->LZ, m+1, 0.0, work->ZAZ, m+1);
     453         465 : }
     454             : 
     455             : /**
     456             :  * @brief Calculate Z'*A*Z and Z'*B for sparse matrices
     457             :  *
     458             :  * @details
     459             :  * This function calculates the matrices Z'*A*Z and Z'*B for the case where Z 
     460             :  * is stored as a CSR sparse matrix (GenSparse structure). It computes only 
     461             :  * the products of the Z'*A*Z matrix that need to be computed, and updates the 
     462             :  * Z'*B matrix row-wise for each non-zero element of a row of Z, using a BLAS 
     463             :  * daxpy call.
     464             :  *
     465             :  * This function calculates the matrix product Z'*A*Z in separate blocks, 
     466             :  * based on the number of rows defined in the GENSVM_BLOCK_SIZE variable. This 
     467             :  * is done to improve numerical precision for very large datasets. Due to 
     468             :  * rounding errors, precision can become an issue for these large datasets, 
     469             :  * when separate blocks are used and added to the result separately, this can 
     470             :  * be alleviated a little bit. See also: http://stackoverflow.com/q/40286989
     471             :  *
     472             :  * @sa
     473             :  * gensvm_get_ZAZ_ZB()
     474             :  * gensvm_get_ZAZ_ZB_dense()
     475             :  *
     476             :  * @param[in]           model   a GenModel holding the current model
     477             :  * @param[in]           data    a GenData with the data
     478             :  * @param[in,out]       work    an allocated GenWork structure, contains
     479             :  *                              updated ZAZ and ZB matrices on exit.
     480             :  */
     481           1 : void gensvm_get_ZAZ_ZB_sparse(struct GenModel *model, struct GenData *data,
     482             :                 struct GenWork *work)
     483             : {
     484           1 :         long *Zia = NULL,
     485           1 :              *Zja = NULL;
     486             :         long b, i, j, k, K, jj, kk, jj_start, jj_end, blk_start, blk_end,
     487           1 :              rem_size, n_blocks, n_row = data->spZ->n_row,
     488           1 :              n_col = data->spZ->n_col;
     489           1 :         double temp, alpha, z_ij, *vals = NULL;
     490             : 
     491           1 :         K = model->K;
     492           1 :         Zia = data->spZ->ia;
     493           1 :         Zja = data->spZ->ja;
     494           1 :         vals = data->spZ->values;
     495             : 
     496             :         // calculate ZAZ using blocks of rows of Z. This helps avoiding 
     497             :         // rounding errors, which increases precision, and in turn helps 
     498             :         // convergence of the IM algorithm.
     499             :         // see also: http://stackoverflow.com/q/40286989/
     500           1 :         n_blocks = floor(n_row / GENSVM_BLOCK_SIZE);
     501           1 :         rem_size = n_row % GENSVM_BLOCK_SIZE;
     502             : 
     503           2 :         for (b=0; b<=n_blocks; b++) {
     504           1 :                 blk_start = b * GENSVM_BLOCK_SIZE;
     505           1 :                 blk_end = blk_start;
     506           1 :                 blk_end += (b == n_blocks) ? rem_size : GENSVM_BLOCK_SIZE;
     507             : 
     508           1 :                 Memset(work->tmpZAZ, double, n_col*n_col);
     509           9 :                 for (i=blk_start; i<blk_end; i++) {
     510           8 :                         alpha = gensvm_get_alpha_beta(model, data, i, 
     511             :                                         work->beta);
     512           8 :                         jj_start = Zia[i];
     513           8 :                         jj_end = Zia[i+1];
     514             : 
     515          40 :                         for (jj=jj_start; jj<jj_end; jj++) {
     516          32 :                                 j = Zja[jj];
     517          32 :                                 z_ij = vals[jj];
     518          32 :                                 cblas_daxpy(K-1, z_ij, work->beta, 1,
     519          32 :                                                 &work->ZB[j*(K-1)], 1);
     520          32 :                                 z_ij *= alpha;
     521         112 :                                 for (kk=jj; kk<jj_end; kk++) {
     522          80 :                                         matrix_add(work->tmpZAZ, n_col, j, 
     523             :                                                         Zja[kk], 
     524             :                                                         z_ij * vals[kk]);
     525             :                                 }
     526             :                         }
     527             :                 }
     528             : 
     529             :                 // copy the intermediate results over to the actual ZAZ matrix
     530           5 :                 for (j=0; j<n_col; j++) {
     531          14 :                         for (k=j; k<n_col; k++) {
     532          10 :                                 temp = matrix_get(work->tmpZAZ, n_col, j, k);
     533          10 :                                 matrix_add(work->ZAZ, n_col, j, k, temp);
     534             :                         }
     535             :                 }
     536             :         }
     537           1 : }
     538             : 
     539             : /**
     540             :  * @brief Wrapper around calculation of Z'*A*Z and Z'*B for sparse and dense
     541             :  *
     542             :  * @details
     543             :  * This is a wrapper around gensvm_get_ZAZ_ZB_dense() and 
     544             :  * gensvm_get_ZAZ_ZB_sparse(). See the documentation of those functions for 
     545             :  * more info.
     546             :  *
     547             :  * @param[in]    model  a GenModel struct
     548             :  * @param[in]    data   a GenData struct
     549             :  * @param[in]    work   a GenWork struct
     550             :  *
     551             :  */
     552         466 : void gensvm_get_ZAZ_ZB(struct GenModel *model, struct GenData *data,
     553             :                 struct GenWork *work)
     554             : {
     555         466 :         gensvm_reset_work(work);
     556             : 
     557         466 :         if (data->Z == NULL)
     558           1 :                 gensvm_get_ZAZ_ZB_sparse(model, data, work);
     559             :         else
     560         465 :                 gensvm_get_ZAZ_ZB_dense(model, data, work);
     561         466 : }
     562             : 
     563             : /**
     564             :  * @brief Solve AX = B where A is symmetric positive definite.
     565             :  *
     566             :  * @details
     567             :  * Solve a linear system of equations AX = B where A is symmetric positive
     568             :  * definite. This function is a wrapper for the external  LAPACK routine
     569             :  * dposv.
     570             :  *
     571             :  * @param[in]           UPLO    which triangle of A is stored
     572             :  * @param[in]           N       order of A
     573             :  * @param[in]           NRHS    number of columns of B
     574             :  * @param[in,out]       A       double precision array of size (LDA, N). On
     575             :  *                              exit contains the upper or lower factor of the
     576             :  *                              Cholesky factorization of A.
     577             :  * @param[in]           LDA     leading dimension of A
     578             :  * @param[in,out]       B       double precision array of size (LDB, NRHS). On
     579             :  *                              exit contains the N-by-NRHS solution matrix X.
     580             :  * @param[in]           LDB     the leading dimension of B
     581             :  * @returns                     info parameter which contains the status of the
     582             :  *                              computation:
     583             :  *                                      - =0:   success
     584             :  *                                      - <0:        if -i, the i-th argument had
     585             :  *                                              an illegal value
     586             :  *                                      - >0:        if i, the leading minor of A
     587             :  *                                              was not positive definite
     588             :  *
     589             :  * See the LAPACK documentation at:
     590             :  * http://www.netlib.org/lapack/explore-html/dc/de9/group__double_p_osolve.html
     591             :  */
     592         467 : int dposv(char UPLO, int N, int NRHS, double *A, int LDA, double *B,
     593             :                 int LDB)
     594             : {
     595             :         extern void dposv_(char *UPLO, int *Np, int *NRHSp, double *A,
     596             :                         int *LDAp, double *B, int *LDBp, int *INFOp);
     597             :         int INFO;
     598         467 :         dposv_(&UPLO, &N, &NRHS, A, &LDA, B, &LDB, &INFO);
     599         467 :         return INFO;
     600             : }
     601             : 
     602             : /**
     603             :  * @brief Solve a system of equations AX = B where A is symmetric.
     604             :  *
     605             :  * @details
     606             :  * Solve a linear system of equations AX = B where A is symmetric. This
     607             :  * function is a wrapper for the external LAPACK routine dsysv.
     608             :  *
     609             :  * @param[in]           UPLO    which triangle of A is stored
     610             :  * @param[in]           N       order of A
     611             :  * @param[in]           NRHS    number of columns of B
     612             :  * @param[in,out]       A       double precision array of size (LDA, N). On
     613             :  *                              exit contains the block diagonal matrix D and
     614             :  *                              the multipliers used to obtain the factor U or
     615             :  *                              L from the factorization A = U*D*U**T or
     616             :  *                              A = L*D*L**T.
     617             :  * @param[in]           LDA     leading dimension of A
     618             :  * @param[in]           IPIV    integer array containing the details of D
     619             :  * @param[in,out]       B       double precision array of size (LDB, NRHS). On
     620             :  *                              exit contains the N-by-NRHS matrix X
     621             :  * @param[in]           LDB     leading dimension of B
     622             :  * @param[out]          WORK    double precision array of size max(1,LWORK). On
     623             :  *                              exit, WORK(1) contains the optimal LWORK
     624             :  * @param[in]           LWORK   the length of WORK, can be used for determining
     625             :  *                              the optimal blocksize for dsystrf.
     626             :  * @returns                     info parameter which contains the status of the
     627             :  *                              computation:
     628             :  *                                      - =0:   success
     629             :  *                                      - <0:        if -i, the i-th argument had an
     630             :  *                                              illegal value
     631             :  *                                      - >0:        if i, D(i, i) is exactly zero,
     632             :  *                                              no solution can be computed.
     633             :  *
     634             :  * See the LAPACK documentation at:
     635             :  * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve.html
     636             :  */
     637           2 : int dsysv(char UPLO, int N, int NRHS, double *A, int LDA, int *IPIV,
     638             :                 double *B, int LDB, double *WORK, int LWORK)
     639             : {
     640             :         extern void dsysv_(char *UPLO, int *Np, int *NRHSp, double *A,
     641             :                         int *LDAp, int *IPIV, double *B, int *LDBp,
     642             :                         double *WORK, int *LWORK, int *INFOp);
     643             :         int INFO;
     644           2 :         dsysv_(&UPLO, &N, &NRHS, A, &LDA, IPIV, B, &LDB, WORK, &LWORK, &INFO);
     645           2 :         return INFO;
     646             : }

Generated by: LCOV version 1.12