00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <stdio.h>
00072 #include <stdlib.h>
00073 #include <string.h>
00074 #include <ctype.h>
00075 #include <math.h>
00076 #include "classifier/svm/gpdt.h"
00077 #include "classifier/svm/gpdtsolve.h"
00078
00079 void help_message(void);
00080 void fatalError(const char *msg1, const char *msg2);
00081
00082
00083
00084
00085
00086
00087
00088
00089 void help_message(void)
00090 {
00091 fprintf(stderr, "usage: gpdt [options] example_file model_file\n");
00092 fprintf(stderr, "options:\n");
00093 fprintf(stderr, " -? this help\n");
00094 fprintf(stderr, " -h display help message\n");
00095 fprintf(stderr, " -v [0..2] verbosity level (default 1)\n");
00096 fprintf(stderr, " -t [0..2] type of kernel function (default 2):\n");
00097 fprintf(stderr, " 0: linear (x'y)\n");
00098 fprintf(stderr, " 1: polynomial (s(x'y) + r)^d\n");
00099 fprintf(stderr, " 2: radial basis function (rbf): exp(-g||x - y||^2)\n");
00100 fprintf(stderr, " -s parameter s in polynomial kernel (default 1.0)\n");
00101 fprintf(stderr, " -r parameter r in polynomial kernel (default 1.0)\n");
00102 fprintf(stderr, " -d parameter d in polynomial kernel (default 3)\n");
00103 fprintf(stderr, " -g parameter g in rbf kernel (default 1.0)\n");
00104 fprintf(stderr, " -c parameter C for SVM classification: trade-off between\n");
00105 fprintf(stderr, " training error and margin (default 1.0)\n");
00106 fprintf(stderr, " -q size of the QP-subproblems: q >= 2 (default 400)\n");
00107 fprintf(stderr, " -n maximum number of new indices entering the working set\n");
00108 fprintf(stderr, " in each iteration: 2 <= n <= q, n even (default q/3)\n");
00109 fprintf(stderr, " -e tolerance for termination criterion (default 0.001)\n");
00110 fprintf(stderr, " -a [0, 1] gradient projection-type inner QP solver:\n");
00111 fprintf(stderr, " 0: Generalized Variable Projection method\n");
00112 fprintf(stderr, " 1: Dai-Fletcher Projected Gradient method (default)\n");
00113 fprintf(stderr, " -f projector type. 0: Pardalos, 1: Dai-Fletcher secant-based\n");
00114 fprintf(stderr, " -m cache size in MB (default 40)\n");
00115 fprintf(stderr, " -u parameter for proximal point modification (default 0)\n");
00116 exit(-1);
00117 }
00118
00119
00120
00121
00122 QPproblem::QPproblem()
00123 : CSGObject()
00124 {
00125
00126 maxmw = 40;
00127 c_const = 10.0;
00128 projection_solver = SOLVER_FLETCHER;
00129 projection_projector = 1;
00130 PreprocessMode = 0;
00131 delta = 1.0e-3;
00132 DELTAsv = EPS_SV;
00133 ker_type = 2;
00134 chunk_size = 400;
00135 q = -1;
00136 y = NULL;
00137 tau_proximal = 0.0;
00138 dim = 1;
00139 }
00140
00141
00142
00143
00144 QPproblem::~QPproblem()
00145 {
00146
00147 }
00148
00149
00150
00151
00152 void QPproblem::Subproblem(QPproblem &p, int32_t len, int32_t *perm)
00153 {
00154 int32_t k;
00155
00156 memcpy(this, &p, sizeof(QPproblem));
00157 ell = len;
00158
00159 KER->SetSubproblem(p.KER, len, perm);
00160 y = (int32_t *)malloc(len * sizeof(int32_t));
00161 for (k = 0; k < ell; k++)
00162 y[k] = p.y[perm[k]];
00163 }
00164
00165
00166
00167
00168 int32_t prescan_document(char *file, int32_t *lines, int32_t *vlen, int32_t *ll)
00169 {
00170 FILE *fl;
00171 int32_t ic;
00172 char c;
00173 int64_t current_length, current_vlen;
00174
00175 if ((fl = fopen (file, "r")) == NULL)
00176 return(-1);
00177 current_length = 0;
00178 current_vlen = 0;
00179
00180 *ll = 0;
00181
00182 *lines = 1;
00183 *vlen = 0;
00184
00185 while ((ic = getc(fl)) != EOF)
00186 {
00187 c = (char)ic;
00188 current_length++;
00189
00190 if (c == ' ')
00191 current_vlen++;
00192
00193 if (c == '\n')
00194 {
00195 (*lines)++;
00196 if (current_length > (*ll))
00197 *ll = current_length;
00198 if (current_vlen > (*vlen))
00199 *vlen = current_vlen;
00200 current_length = 0;
00201 current_vlen = 0;
00202 }
00203 }
00204 fclose(fl);
00205 return(0);
00206 }
00207
00208
00209
00210
00211 int32_t QPproblem::ReadGPDTBinary(char *fName)
00212 {
00213 int32_t i, v;
00214 int32_t **data_ix, *data_lx;
00215 float32_t **data_x;
00216 FILE *fp = fopen(fName, "rb");
00217
00218 if (fp == NULL) return -1;
00219
00220 fread(&v, 1, 4, fp);
00221 if (v != 0)
00222 {
00223 fprintf(stderr, "Wrong binary file format.\n");
00224 fclose(fp);
00225 return -2;
00226 }
00227 fread(&ell, 1, 4, fp);
00228 fread(&dim, 1, 4, fp);
00229
00230 data_lx = (int32_t *) malloc(ell*sizeof(int32_t));
00231 data_ix = (int32_t **) malloc(ell*sizeof(int32_t *));
00232 data_x = (float32_t **) malloc(ell*sizeof(float32_t *));
00233 y = (int32_t *) malloc(ell*sizeof(int32_t));
00234
00235 fread(data_lx, ell, 4, fp);
00236 fread(y, ell, 4, fp);
00237
00238 for (i = 0; i < ell; i++)
00239 {
00240 data_ix[i] = (int32_t *)malloc(data_lx[i]*sizeof(int32_t));
00241 data_x[i] = (float32_t *)malloc(data_lx[i]*sizeof(float32_t));
00242 fread(data_ix[i], data_lx[i], 4, fp);
00243 fread(data_x[i], data_lx[i], 4, fp);
00244 }
00245 fclose(fp);
00246
00247 if (chunk_size > ell)
00248 chunk_size = ell;
00249 if (q > chunk_size)
00250 q = chunk_size;
00251
00252
00253 KER->SetData(data_x, data_ix, data_lx, ell, dim);
00254
00255 return 0;
00256 }
00257
00258
00259
00260
00261 int32_t QPproblem::ReadSVMFile(char *f1_input)
00262 {
00263 char *line;
00264 int32_t i, j, ell_space, vlen, max_row_length;
00265 int32_t *line_ix;
00266 int32_t **data_ix, *data_lx;
00267 float32_t *line_x;
00268 float32_t **data_x;
00269 FILE *fp1_in;
00270
00271
00272 if (prescan_document(f1_input, &ell_space, &vlen, &max_row_length))
00273 return(-1);
00274
00275 ell_space += 10;
00276 vlen += 10;
00277 max_row_length += 10;
00278
00279
00280 dim = 0;
00281 ell = 0;
00282 data_lx = (int32_t *) malloc(ell_space*sizeof(int32_t));
00283 data_ix = (int32_t **) malloc(ell_space*sizeof(int32_t *));
00284 data_x = (float32_t **) malloc(ell_space*sizeof(float32_t * ));
00285 y = (int32_t *) malloc(ell_space*sizeof(int32_t));
00286 line = (char *) malloc(max_row_length*sizeof(char));
00287 line_ix = (int32_t *) malloc(vlen*sizeof(int32_t));
00288 line_x = (float32_t *) malloc(vlen*sizeof(float32_t ));
00289
00290
00291 fp1_in = fopen(f1_input, "r");
00292 if (fp1_in == NULL)
00293 return(-1);
00294
00295
00296 fgets(line, max_row_length, fp1_in);
00297
00298 while (!feof(fp1_in))
00299 {
00300 for (i = 0; line[i] != 0 && line[i] != '#'; i++);
00301 line[i] = 0;
00302
00303 if (sscanf(line, "%d", &j) != EOF)
00304 {
00305 if (j != -1 && j != 1)
00306 {
00307 fprintf(stderr, "ERROR line %d: label must be -1 or 1.\n", ell);
00308 exit(0);
00309 }
00310 y[ell] = j;
00311
00312 j = i = 0;
00313 while (line[i] == ' ' || line[i] == '\t') i++;
00314 while (line[i] > ' ') i++;
00315 while (sscanf(line+i, "%d:%f", &line_ix[j], &line_x[j]) != EOF)
00316 {
00317 while (line[i] == ' ' || line[i] == '\t') i++;
00318 while (line[++i] > ' ');
00319 j++;
00320 }
00321
00322 data_lx[ell] = j;
00323 if (data_lx[ell] > 0)
00324 {
00325 data_ix[ell] = (int32_t *) malloc(data_lx[ell]*sizeof(int32_t));
00326 data_x[ell] = (float32_t *) malloc(data_lx[ell]*sizeof(float32_t));
00327
00328 memcpy(data_ix[ell], line_ix, data_lx[ell]*sizeof(int32_t));
00329 memcpy(data_x[ell], line_x, data_lx[ell]*sizeof(float32_t));
00330
00331 if (dim < (data_ix[ell][data_lx[ell]-1] + 1))
00332 dim = data_ix[ell][data_lx[ell]-1] + 1;
00333 }
00334 else
00335 {
00336 data_ix[ell] = (int32_t *) malloc(sizeof(int32_t));
00337 data_x[ell] = (float32_t *) malloc(sizeof(float32_t));
00338 *(data_ix[ell]) = 0;
00339 *(data_x[ell]) = 0.0;
00340 }
00341 ell++;
00342
00343 if (verbosity > 1)
00344 if ((ell % 1000) == 0)
00345 fprintf(stderr, " %d...", ell);
00346 }
00347 fgets(line, max_row_length, fp1_in);
00348 }
00349 fclose(fp1_in);
00350
00351
00352 if (chunk_size > ell)
00353 chunk_size = ell;
00354 if (q > chunk_size)
00355 q = chunk_size;
00356
00357
00358 free(line_x);
00359 free(line_ix);
00360 free(line);
00361
00362
00363 KER->SetData(data_x, data_ix, data_lx, ell, dim);
00364
00365 return(0);
00366 }
00367
00368
00369
00370
00371 int32_t QPproblem::Check2Class(void)
00372 {
00373 int32_t i;
00374
00375 for (i = 1; i < ell; i++)
00376 if (y[i] != y[0])
00377 return 0;
00378 return 1;
00379 }
00380
00381
00382
00383
00384 void SplitParts(
00385 int32_t n, int32_t part, int32_t parts, int32_t *dim, int32_t *off)
00386 {
00387 int32_t r;
00388
00389 r = n % parts;
00390 *dim = n / parts;
00391
00392 if (part < r)
00393 {
00394 (*dim)++;
00395 *off = *dim * part;
00396 }
00397 else
00398 *off = *dim * part + r;
00399 }
00400
00401
00402
00403
00404
00405 sKernel::sKernel (CKernel* k, int32_t l)
00406 {
00407 kernel=k;
00408 ell=l;
00409 nor = NULL;
00410 vaux = NULL;
00411 lx = NULL;
00412 ix = NULL;
00413 x = NULL;
00414 IsSubproblem = 0;
00415 KernelEvaluations = 0.0;
00416 }
00417
00418
00419
00420
00421 void sKernel::SetData(
00422 float32_t **x_, int32_t **ix_, int32_t *lx_, int32_t _ell, int32_t _dim)
00423 {
00424 int32_t i, j, k;
00425
00426 dim = _dim;
00427 ell = _ell;
00428 nor = (float64_t *)malloc(ell*sizeof(float64_t));
00429 vaux = (float32_t *)malloc(dim*sizeof(float32_t ));
00430 memset(vaux, 0, dim*sizeof(float32_t));
00431
00432 IsSubproblem = 0;
00433 x = x_;
00434 ix = ix_;
00435 lx = lx_;
00436
00437
00438 vauxRow = 0;
00439 i = vauxRow;
00440 for (k = 0; k < lx[i]; k++)
00441 vaux[ix[i][k]] = x[i][k];
00442
00443
00444 for (i = 0; i < ell; i++)
00445 {
00446 nor[i] = 0.0;
00447 for (j = 0; j < lx[i]; j++)
00448 nor[i] += (float64_t)(x[i][j]*x[i][j]);
00449 }
00450 }
00451
00452
00453
00454
00455 void sKernel::SetSubproblem(sKernel* ker, int32_t len, int32_t *perm)
00456 {
00457 int32_t k;
00458
00459
00460 nor = (float64_t *) malloc(len*sizeof(float64_t));
00461 vaux = (float32_t *) malloc(ker->dim*sizeof(float32_t));
00462 memset(vaux, 0, ker->dim*sizeof(float32_t));
00463
00464 lx = (int32_t *) malloc(len * sizeof(int32_t));
00465 ix = (int32_t **) malloc(len * sizeof(int32_t *));
00466 x = (float32_t **) malloc(len * sizeof(float32_t *));
00467 IsSubproblem = 1;
00468
00469 for (k = 0; k < len; k++)
00470 {
00471 x[k] = ker->x[perm[k]];
00472 ix[k] = ker->ix[perm[k]];
00473 lx[k] = ker->lx[perm[k]];
00474 nor[k] = ker->nor[perm[k]];
00475 }
00476
00477
00478 vauxRow = 0;
00479 for (k = 0; k < lx[vauxRow]; k++)
00480 vaux[ix[vauxRow][k]] = x[vauxRow][k];
00481 }
00482
00483
00484
00485
00486 sKernel::~sKernel()
00487 {
00488 int32_t i;
00489
00490 if (nor != NULL) free(nor);
00491 if (vaux != NULL) free(vaux);
00492
00493 if (lx != NULL) free(lx);
00494 if (ix != NULL)
00495 {
00496 if (!IsSubproblem)
00497 for (i = 0; i < ell; i++)
00498 free(ix[i]);
00499 free(ix);
00500 }
00501 if (x != NULL)
00502 {
00503 if (!IsSubproblem)
00504 for (i = 0; i < ell; i++)
00505 free(x[i]);
00506 free(x);
00507 }
00508 }
00509
00510
00511
00512
00513