00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071 #include <stdio.h>
00072 #include <stdlib.h>
00073 #include <string.h>
00074 #include <ctype.h>
00075 #include <math.h>
00076 #include "classifier/svm/gpdt.h"
00077 #include "classifier/svm/gpdtsolve.h"
00078
00079 void help_message(void);
00080 void fatalError(const char *msg1, const char *msg2);
00081
00082
00083
00084
00085
00086
00087
00088
00089 void help_message(void)
00090 {
00091 fprintf(stderr, "usage: gpdt [options] example_file model_file\n");
00092 fprintf(stderr, "options:\n");
00093 fprintf(stderr, " -? this help\n");
00094 fprintf(stderr, " -h display help message\n");
00095 fprintf(stderr, " -v [0..2] verbosity level (default 1)\n");
00096 fprintf(stderr, " -t [0..2] type of kernel function (default 2):\n");
00097 fprintf(stderr, " 0: linear (x'y)\n");
00098 fprintf(stderr, " 1: polynomial (s(x'y) + r)^d\n");
00099 fprintf(stderr, " 2: radial basis function (rbf): exp(-g||x - y||^2)\n");
00100 fprintf(stderr, " -s parameter s in polynomial kernel (default 1.0)\n");
00101 fprintf(stderr, " -r parameter r in polynomial kernel (default 1.0)\n");
00102 fprintf(stderr, " -d parameter d in polynomial kernel (default 3)\n");
00103 fprintf(stderr, " -g parameter g in rbf kernel (default 1.0)\n");
00104 fprintf(stderr, " -c parameter C for SVM classification: trade-off between\n");
00105 fprintf(stderr, " training error and margin (default 1.0)\n");
00106 fprintf(stderr, " -q size of the QP-subproblems: q >= 2 (default 400)\n");
00107 fprintf(stderr, " -n maximum number of new indices entering the working set\n");
00108 fprintf(stderr, " in each iteration: 2 <= n <= q, n even (default q/3)\n");
00109 fprintf(stderr, " -e tolerance for termination criterion (default 0.001)\n");
00110 fprintf(stderr, " -a [0, 1] gradient projection-type inner QP solver:\n");
00111 fprintf(stderr, " 0: Generalized Variable Projection method\n");
00112 fprintf(stderr, " 1: Dai-Fletcher Projected Gradient method (default)\n");
00113 fprintf(stderr, " -f projector type. 0: Pardalos, 1: Dai-Fletcher secant-based\n");
00114 fprintf(stderr, " -m cache size in MB (default 40)\n");
00115 fprintf(stderr, " -u parameter for proximal point modification (default 0)\n");
00116 exit(-1);
00117 }
00118
00119
00120
00121
00122 QPproblem::QPproblem()
00123 : CSGObject()
00124 {
00125
00126 maxmw = 40;
00127 c_const = 10.0;
00128 projection_solver = SOLVER_FLETCHER;
00129 projection_projector = 1;
00130 PreprocessMode = 0;
00131 delta = 1.0e-3;
00132 DELTAsv = EPS_SV;
00133 ker_type = 2;
00134 chunk_size = 400;
00135 q = -1;
00136 y = NULL;
00137 tau_proximal = 0.0;
00138 dim = 1;
00139 }
00140
00141
00142
00143
00144 QPproblem::~QPproblem()
00145 {
00146
00147 }
00148
00149
00150
00151
00152 void QPproblem::Subproblem(QPproblem &p, int len, int *perm)
00153 {
00154 int k;
00155
00156 memcpy(this, &p, sizeof(QPproblem));
00157 ell = len;
00158
00159 KER->SetSubproblem(p.KER, len, perm);
00160 y = (int *)malloc(len * sizeof(int));
00161 for (k = 0; k < ell; k++)
00162 y[k] = p.y[perm[k]];
00163 }
00164
00165
00166
00167
00168 int prescan_document(char *file, int *lines, int *vlen, int *ll)
00169 {
00170 FILE *fl;
00171 int ic;
00172 char c;
00173 long current_length, current_vlen;
00174
00175 if ((fl = fopen (file, "r")) == NULL)
00176 return(-1);
00177 current_length = 0;
00178 current_vlen = 0;
00179
00180 *ll = 0;
00181
00182 *lines = 1;
00183 *vlen = 0;
00184
00185 while ((ic = getc(fl)) != EOF)
00186 {
00187 c = (char)ic;
00188 current_length++;
00189
00190 if (c == ' ')
00191 current_vlen++;
00192
00193 if (c == '\n')
00194 {
00195 (*lines)++;
00196 if (current_length > (*ll))
00197 *ll = current_length;
00198 if (current_vlen > (*vlen))
00199 *vlen = current_vlen;
00200 current_length = 0;
00201 current_vlen = 0;
00202 }
00203 }
00204 fclose(fl);
00205 return(0);
00206 }
00207
00208
00209
00210
00211 int QPproblem::ReadGPDTBinary(char *fName)
00212 {
00213 int i, v;
00214 int **data_ix, *data_lx;
00215 float **data_x;
00216 FILE *fp = fopen(fName, "rb");
00217
00218 if (fp == NULL) return -1;
00219
00220 fread(&v, 1, 4, fp);
00221 if (v != 0)
00222 {
00223 fprintf(stderr, "Wrong binary file format.\n");
00224 fclose(fp);
00225 return -2;
00226 }
00227 fread(&ell, 1, 4, fp);
00228 fread(&dim, 1, 4, fp);
00229
00230 data_lx = (int *)malloc(ell*sizeof(int ));
00231 data_ix = (int **)malloc(ell*sizeof(int * ));
00232 data_x = (float **)malloc(ell*sizeof(float *));
00233 y = (int *)malloc(ell*sizeof(int ));
00234
00235 fread(data_lx, ell, 4, fp);
00236 fread(y, ell, 4, fp);
00237
00238 for (i = 0; i < ell; i++)
00239 {
00240 data_ix[i] = (int *)malloc(data_lx[i]*sizeof(int ));
00241 data_x[i] = (float *)malloc(data_lx[i]*sizeof(float));
00242 fread(data_ix[i], data_lx[i], 4, fp);
00243 fread(data_x[i], data_lx[i], 4, fp);
00244 }
00245 fclose(fp);
00246
00247 if (chunk_size > ell)
00248 chunk_size = ell;
00249 if (q > chunk_size)
00250 q = chunk_size;
00251
00252
00253 KER->SetData(data_x, data_ix, data_lx, ell, dim);
00254
00255 return 0;
00256 }
00257
00258
00259
00260
00261 int QPproblem::ReadSVMFile(char *f1_input)
00262 {
00263 char *line;
00264 int i, j, ell_space, vlen, max_row_length;
00265 int *line_ix;
00266 int **data_ix, *data_lx;
00267 float *line_x;
00268 float **data_x;
00269 FILE *fp1_in;
00270
00271
00272 if (prescan_document(f1_input, &ell_space, &vlen, &max_row_length))
00273 return(-1);
00274
00275 ell_space += 10;
00276 vlen += 10;
00277 max_row_length += 10;
00278
00279
00280 dim = 0;
00281 ell = 0;
00282 data_lx = (int *)malloc(ell_space*sizeof(int ));
00283 data_ix = (int **)malloc(ell_space*sizeof(int * ));
00284 data_x = (float **)malloc(ell_space*sizeof(float * ));
00285 y = (int *)malloc(ell_space*sizeof(int ));
00286 line = (char *)malloc(max_row_length*sizeof(char));
00287 line_ix = (int *)malloc(vlen*sizeof(int ));
00288 line_x = (float *)malloc(vlen*sizeof(float ));
00289
00290
00291 fp1_in = fopen(f1_input, "r");
00292 if (fp1_in == NULL)
00293 return(-1);
00294
00295
00296 fgets(line, max_row_length, fp1_in);
00297
00298 while (!feof(fp1_in))
00299 {
00300 for (i = 0; line[i] != 0 && line[i] != '#'; i++);
00301 line[i] = 0;
00302
00303 if (sscanf(line, "%d", &j) != EOF)
00304 {
00305 if (j != -1 && j != 1)
00306 {
00307 fprintf(stderr, "ERROR line %d: label must be -1 or 1.\n", ell);
00308 exit(0);
00309 }
00310 y[ell] = j;
00311
00312 j = i = 0;
00313 while (line[i] == ' ' || line[i] == '\t') i++;
00314 while (line[i] > ' ') i++;
00315 while (sscanf(line+i, "%d:%f", &line_ix[j], &line_x[j]) != EOF)
00316 {
00317 while (line[i] == ' ' || line[i] == '\t') i++;
00318 while (line[++i] > ' ');
00319 j++;
00320 }
00321
00322 data_lx[ell] = j;
00323 if (data_lx[ell] > 0)
00324 {
00325 data_ix[ell] = (int *)malloc(data_lx[ell]*sizeof(int ));
00326 data_x[ell] = (float *)malloc(data_lx[ell]*sizeof(float));
00327
00328 memcpy(data_ix[ell], line_ix, data_lx[ell]*sizeof(int ));
00329 memcpy(data_x[ell], line_x, data_lx[ell]*sizeof(float));
00330
00331 if (dim < (data_ix[ell][data_lx[ell]-1] + 1))
00332 dim = data_ix[ell][data_lx[ell]-1] + 1;
00333 }
00334 else
00335 {
00336 data_ix[ell] = (int *)malloc(sizeof(int ));
00337 data_x[ell] = (float *)malloc(sizeof(float));
00338 *(data_ix[ell]) = 0;
00339 *(data_x[ell]) = 0.0;
00340 }
00341 ell++;
00342
00343 if (verbosity > 1)
00344 if ((ell % 1000) == 0)
00345 fprintf(stderr, " %d...", ell);
00346 }
00347 fgets(line, max_row_length, fp1_in);
00348 }
00349 fclose(fp1_in);
00350
00351
00352 if (chunk_size > ell)
00353 chunk_size = ell;
00354 if (q > chunk_size)
00355 q = chunk_size;
00356
00357
00358 free(line_x);
00359 free(line_ix);
00360 free(line);
00361
00362
00363 KER->SetData(data_x, data_ix, data_lx, ell, dim);
00364
00365 return(0);
00366 }
00367
00368
00369
00370
00371 int QPproblem::Check2Class(void)
00372 {
00373 int i;
00374
00375 for (i = 1; i < ell; i++)
00376 if (y[i] != y[0])
00377 return 0;
00378 return 1;
00379 }
00380
00381
00382
00383
00384 void SplitParts(int n, int part, int parts, int *dim, int *off)
00385 {
00386 int r;
00387
00388 r = n % parts;
00389 *dim = n / parts;
00390
00391 if (part < r)
00392 {
00393 (*dim)++;
00394 *off = *dim * part;
00395 }
00396 else
00397 *off = *dim * part + r;
00398 }
00399
00400
00401
00402
00403
00404 sKernel::sKernel (CKernel* k, int l)
00405 {
00406 kernel=k;
00407 ell=l;
00408 nor = NULL;
00409 vaux = NULL;
00410 lx = NULL;
00411 ix = NULL;
00412 x = NULL;
00413 IsSubproblem = 0;
00414 KernelEvaluations = 0.0;
00415 }
00416
00417
00418
00419
00420 void sKernel::SetData(float **x_, int **ix_, int *lx_, int _ell, int _dim)
00421 {
00422 int i, j, k;
00423
00424 dim = _dim;
00425 ell = _ell;
00426 nor = (double *)malloc(ell*sizeof(double));
00427 vaux = (float *)malloc(dim*sizeof(float ));
00428 memset(vaux, 0, dim*sizeof(float));
00429
00430 IsSubproblem = 0;
00431 x = x_;
00432 ix = ix_;
00433 lx = lx_;
00434
00435
00436 vauxRow = 0;
00437 i = vauxRow;
00438 for (k = 0; k < lx[i]; k++)
00439 vaux[ix[i][k]] = x[i][k];
00440
00441
00442 for (i = 0; i < ell; i++)
00443 {
00444 nor[i] = 0.0;
00445 for (j = 0; j < lx[i]; j++)
00446 nor[i] += (double)(x[i][j]*x[i][j]);
00447 }
00448 }
00449
00450
00451
00452
00453 void sKernel::SetSubproblem(sKernel* ker, int len, int *perm)
00454 {
00455 int k;
00456
00457
00458 nor = (double *)malloc(len*sizeof(double ));
00459 vaux = (float *)malloc(ker->dim*sizeof(float));
00460 memset(vaux, 0, ker->dim*sizeof(float));
00461
00462 lx = (int *)malloc(len * sizeof(int ));
00463 ix = (int **)malloc(len * sizeof(int *));
00464 x = (float **)malloc(len * sizeof(float *));
00465 IsSubproblem = 1;
00466
00467 for (k = 0; k < len; k++)
00468 {
00469 x[k] = ker->x[perm[k]];
00470 ix[k] = ker->ix[perm[k]];
00471 lx[k] = ker->lx[perm[k]];
00472 nor[k] = ker->nor[perm[k]];
00473 }
00474
00475
00476 vauxRow = 0;
00477 for (k = 0; k < lx[vauxRow]; k++)
00478 vaux[ix[vauxRow][k]] = x[vauxRow][k];
00479 }
00480
00481
00482
00483
00484 sKernel::~sKernel()
00485 {
00486 int i;
00487
00488 if (nor != NULL) free(nor);
00489 if (vaux != NULL) free(vaux);
00490
00491 if (lx != NULL) free(lx);
00492 if (ix != NULL)
00493 {
00494 if (!IsSubproblem)
00495 for (i = 0; i < ell; i++)
00496 free(ix[i]);
00497 free(ix);
00498 }
00499 if (x != NULL)
00500 {
00501 if (!IsSubproblem)
00502 for (i = 0; i < ell; i++)
00503 free(x[i]);
00504 free(x);
00505 }
00506 }
00507
00508
00509
00510
00511