00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include "lib/common.h"
00018 #include "lib/Mathematics.h"
00019 #include "lib/lapack.h"
00020 #include "lib/io.h"
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025
00027
00029
00030 #ifdef USE_LOGCACHE
00031
00032 #ifdef USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-6
00035 #else
00036 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00037 #define LOG_TABLE_PRECISION 1e-15
00038 #endif
00039
00040 INT CMath::LOGACCURACY = 0;
00041 #endif
00042
00043 INT CMath::LOGRANGE = 0;
00044
00045 const DREAL CMath::INFTY = -log(0.0);
00046 const DREAL CMath::ALMOST_INFTY = +1e+20;
00047 const DREAL CMath::ALMOST_NEG_INFTY = -1000;
00048 #ifdef USE_LOGCACHE
00049 DREAL* CMath::logtable = NULL;
00050 #endif
00051 CHAR* CMath::rand_state = NULL;
00052 UINT CMath::seed = 0;
00053
00054 CMath::CMath()
00055 : CSGObject()
00056 {
00057 #ifndef HAVE_SWIG
00058 CSGObject::version.print_version();
00059 #endif
00060 CMath::rand_state=new CHAR[256];
00061 init_random();
00062 #ifndef HAVE_SWIG
00063 SG_PRINT( "( seeding random number generator with %u, ", seed);
00064 #endif
00065
00066 #ifdef USE_LOGCACHE
00067 LOGRANGE=CMath::determine_logrange();
00068 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00069 #ifndef HAVE_SWIG
00070 SG_PRINT( "initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",LOGRANGE,LOGACCURACY,sizeof(DREAL),LOGRANGE*LOGACCURACY*sizeof(DREAL)/(1024.0*1024.0)) ;
00071 #endif
00072
00073 CMath::logtable=new DREAL[LOGRANGE*LOGACCURACY];
00074 init_log_table();
00075 #else
00076 INT i=0;
00077 while ((DREAL)log(1+((DREAL)exp(-DREAL(i)))))
00078 i++;
00079 #ifndef HAVE_SWIG
00080 SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", i);
00081 #endif
00082 LOGRANGE=i;
00083 #endif
00084 }
00085
00086 CMath::~CMath()
00087 {
00088 delete[] CMath::rand_state;
00089 CMath::rand_state=NULL;
00090 #ifdef USE_LOGCACHE
00091 delete[] CMath::logtable;
00092 CMath::logtable=NULL;
00093 #endif
00094 }
00095
00096 #ifdef USE_LOGCACHE
00097 INT CMath::determine_logrange()
00098 {
00099 INT i;
00100 DREAL acc=0;
00101 for (i=0; i<50; i++)
00102 {
00103 acc=((DREAL)log(1+((DREAL)exp(-DREAL(i)))));
00104 if (acc<=(DREAL)LOG_TABLE_PRECISION)
00105 break;
00106 }
00107
00108 SG_INFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00109 return i;
00110 }
00111
00112 INT CMath::determine_logaccuracy(INT range)
00113 {
00114 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(DREAL));
00115 SG_INFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00116 return range;
00117 }
00118
00119
00120 void CMath::init_log_table()
00121 {
00122 for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
00123 logtable[i]=log(1+exp(DREAL(-i)/DREAL(LOGACCURACY)));
00124 }
00125 #endif
00126
00127 void CMath::sort(INT *a, INT cols, INT sort_col)
00128 {
00129 INT changed=1;
00130 if (a[0]==-1) return ;
00131 while (changed)
00132 {
00133 changed=0; INT i=0 ;
00134 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1))
00135 {
00136 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00137 {
00138 for (INT j=0; j<cols; j++)
00139 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00140 changed=1 ;
00141 } ;
00142 i++ ;
00143 } ;
00144 } ;
00145 }
00146
00147 void CMath::sort(DREAL *a, INT* idx, INT N)
00148 {
00149
00150 INT changed=1;
00151 while (changed)
00152 {
00153 changed=0;
00154 for (INT i=0; i<N-1; i++)
00155 {
00156 if (a[i]>a[i+1])
00157 {
00158 swap(a[i],a[i+1]) ;
00159 swap(idx[i],idx[i+1]) ;
00160 changed=1 ;
00161 } ;
00162 } ;
00163 } ;
00164
00165 }
00166
00167 DREAL CMath::Align(CHAR * seq1, CHAR* seq2, INT l1, INT l2, DREAL gapCost)
00168 {
00169 DREAL actCost=0 ;
00170 INT i1, i2 ;
00171 DREAL* const gapCosts1 = new DREAL[ l1 ];
00172 DREAL* const gapCosts2 = new DREAL[ l2 ];
00173 DREAL* costs2_0 = new DREAL[ l2 + 1 ];
00174 DREAL* costs2_1 = new DREAL[ l2 + 1 ];
00175
00176
00177 for( i1 = 0; i1 < l1; ++i1 ) {
00178 gapCosts1[ i1 ] = gapCost * i1;
00179 }
00180 costs2_1[ 0 ] = 0;
00181 for( i2 = 0; i2 < l2; ++i2 ) {
00182 gapCosts2[ i2 ] = gapCost * i2;
00183 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00184 }
00185
00186 for( i1 = 0; i1 < l1; ++i1 ) {
00187 swap( costs2_0, costs2_1 );
00188 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00189 costs2_1[ 0 ] = actCost;
00190 for( i2 = 0; i2 < l2; ++i2 ) {
00191 const DREAL actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00192 const DREAL actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00193 const DREAL actGap2 = actCost + gapCosts2[ i2 ];
00194 const DREAL actGap = min( actGap1, actGap2 );
00195 actCost = min( actMatch, actGap );
00196 costs2_1[ i2+1 ] = actCost;
00197 }
00198 }
00199
00200 delete [] gapCosts1;
00201 delete [] gapCosts2;
00202 delete [] costs2_0;
00203 delete [] costs2_1;
00204
00205
00206 return actCost;
00207 }
00208
00209
00210
00211 INT CMath::calcroc(DREAL* fp, DREAL* tp, DREAL* output, INT* label, INT& size, INT& possize, INT& negsize, DREAL& tresh, FILE* rocfile)
00212 {
00213 INT left=0;
00214 INT right=size-1;
00215 INT i;
00216
00217 for (i=0; i<size; i++)
00218 {
00219 if (!(label[i]== -1 || label[i]==1))
00220 return -1;
00221 }
00222
00223
00224 while (left<right)
00225 {
00226 while ((label[left] < 0) && (left<right))
00227 left++;
00228 while ((label[right] > 0) && (left<right))
00229 right--;
00230
00231 swap(output[left],output[right]);
00232 swap(label[left], label[right]);
00233 }
00234
00235
00236 negsize=left;
00237 possize=size-left;
00238 DREAL* negout=output;
00239 DREAL* posout=&output[left];
00240
00241
00242 qsort(negout, negsize);
00243 qsort(posout, possize);
00244
00245
00246 DREAL minimum=min(negout[0], posout[0]);
00247 DREAL maximum=minimum;
00248 if (negsize>0)
00249 maximum=max(maximum,negout[negsize-1]);
00250 if (possize>0)
00251 maximum=max(maximum,posout[possize-1]);
00252
00253 DREAL treshhold=minimum;
00254 DREAL old_treshhold=treshhold;
00255
00256
00257 for (i=0; i<size; i++)
00258 {
00259 fp[i]=1.0;
00260 tp[i]=1.0;
00261 }
00262
00263
00264
00265 INT posidx=0;
00266 INT negidx=0;
00267 INT iteration=1;
00268 INT returnidx=-1;
00269
00270 DREAL minerr=10;
00271
00272 while (iteration < size && treshhold<=maximum)
00273 {
00274 old_treshhold=treshhold;
00275
00276 while (treshhold==old_treshhold && treshhold<=maximum)
00277 {
00278 if (posidx<possize && negidx<negsize)
00279 {
00280 if (posout[posidx]<negout[negidx])
00281 {
00282 if (posout[posidx]==treshhold)
00283 posidx++;
00284 else
00285 treshhold=posout[posidx];
00286 }
00287 else
00288 {
00289 if (negout[negidx]==treshhold)
00290 negidx++;
00291 else
00292 treshhold=negout[negidx];
00293 }
00294 }
00295 else
00296 {
00297 if (posidx>=possize && negidx<negsize-1)
00298 {
00299 negidx++;
00300 treshhold=negout[negidx];
00301 }
00302 else if (negidx>=negsize && posidx<possize-1)
00303 {
00304 posidx++;
00305 treshhold=posout[posidx];
00306 }
00307 else if (negidx<negsize && treshhold!=negout[negidx])
00308 treshhold=negout[negidx];
00309 else if (posidx<possize && treshhold!=posout[posidx])
00310 treshhold=posout[posidx];
00311 else
00312 {
00313 treshhold=2*(maximum+100);
00314 posidx=possize;
00315 negidx=negsize;
00316 break;
00317 }
00318 }
00319 }
00320
00321
00322 tp[iteration]=(possize-posidx)/(DREAL (possize));
00323 fp[iteration]=(negsize-negidx)/(DREAL (negsize));
00324
00325
00326 if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
00327 {
00328 minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
00329 tresh=(old_treshhold+treshhold)/2;
00330 returnidx=iteration;
00331 }
00332
00333 iteration++;
00334 }
00335
00336
00337 size=iteration;
00338
00339 if (rocfile)
00340 {
00341 const CHAR id[]="ROC";
00342 fwrite(id, sizeof(char), sizeof(id), rocfile);
00343 fwrite(fp, sizeof(DREAL), size, rocfile);
00344 fwrite(tp, sizeof(DREAL), size, rocfile);
00345 }
00346
00347 return returnidx;
00348 }
00349
00350 UINT CMath::crc32(BYTE *data, INT len)
00351 {
00352 UINT result;
00353 INT i,j;
00354 BYTE octet;
00355
00356 result = 0-1;
00357
00358 for (i=0; i<len; i++)
00359 {
00360 octet = *(data++);
00361 for (j=0; j<8; j++)
00362 {
00363 if ((octet >> 7) ^ (result >> 31))
00364 {
00365 result = (result << 1) ^ 0x04c11db7;
00366 }
00367 else
00368 {
00369 result = (result << 1);
00370 }
00371 octet <<= 1;
00372 }
00373 }
00374
00375 return ~result;
00376 }
00377
00378 double CMath::mutual_info(DREAL* p1, DREAL* p2, INT len)
00379 {
00380 double e=0;
00381
00382 for (INT i=0; i<len; i++)
00383 for (INT j=0; j<len; j++)
00384 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00385
00386 return e;
00387 }
00388
00389 double CMath::relative_entropy(DREAL* p, DREAL* q, INT len)
00390 {
00391 double e=0;
00392
00393 for (INT i=0; i<len; i++)
00394 e+=exp(p[i])*(p[i]-q[i]);
00395
00396 return e;
00397 }
00398
00399 double CMath::entropy(DREAL* p, INT len)
00400 {
00401 double e=0;
00402
00403 for (INT i=0; i<len; i++)
00404 e-=exp(p[i])*p[i];
00405
00406 return e;
00407 }
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419 #ifdef HAVE_LAPACK
00420 DREAL* CMath::pinv(DREAL* matrix, INT rows, INT cols, DREAL* target)
00421 {
00422 if (!target)
00423 target=new DREAL[rows*cols];
00424
00425 char jobu='A';
00426 char jobvt='A';
00427 int m=rows;
00428 int n=cols;
00429 int lda=m;
00430 int ldu=m;
00431 int ldvt=n;
00432 int info=-1;
00433 int lsize=CMath::min(m,n);
00434 double* s=new double[lsize];
00435 double* u=new double[m*m];
00436 double* vt=new double[n*n];
00437
00438 wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00439 ASSERT(info==0);
00440
00441 for (INT i=0; i<n; i++)
00442 {
00443 for (INT j=0; j<lsize; j++)
00444 vt[i*n+j]=vt[i*n+j]/s[j];
00445 }
00446
00447 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00448
00449 delete[] u;
00450 delete[] vt;
00451 delete[] s;
00452
00453 return target;
00454 }
00455 #endif
00456
00457 template <>
00458 void CMath::display_vector(BYTE* vector, INT n, const char* name)
00459 {
00460 ASSERT(n>=0);
00461 SG_SPRINT("%s=[", name);
00462 for (INT i=0; i<n; i++)
00463 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00464 SG_SPRINT("]\n");
00465 }
00466
00467 template <>
00468 void CMath::display_vector(INT* vector, INT n, const char* name)
00469 {
00470 ASSERT(n>=0);
00471 SG_SPRINT("%s=[", name);
00472 for (INT i=0; i<n; i++)
00473 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00474 SG_SPRINT("]\n");
00475 }
00476
00477 template <>
00478 void CMath::display_vector(LONG* vector, INT n, const char* name)
00479 {
00480 ASSERT(n>=0);
00481 SG_SPRINT("%s=[", name);
00482 for (INT i=0; i<n; i++)
00483 SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00484 SG_SPRINT("]\n");
00485 }
00486
00487 template <>
00488 void CMath::display_vector(SHORTREAL* vector, INT n, const char* name)
00489 {
00490 ASSERT(n>=0);
00491 SG_SPRINT("%s=[", name);
00492 for (INT i=0; i<n; i++)
00493 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00494 SG_SPRINT("]\n");
00495 }
00496
00497 template <>
00498 void CMath::display_vector(DREAL* vector, INT n, const char* name)
00499 {
00500 ASSERT(n>=0);
00501 SG_SPRINT("%s=[", name);
00502 for (INT i=0; i<n; i++)
00503 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00504 SG_SPRINT("]\n");
00505 }
00506
00507 template <>
00508 void CMath::display_matrix(INT* matrix, INT rows, INT cols, const char* name)
00509 {
00510 ASSERT(rows>=0 && cols>=0);
00511 SG_SPRINT("%s=[\n", name);
00512 for (INT i=0; i<rows; i++)
00513 {
00514 SG_SPRINT("[");
00515 for (INT j=0; j<cols; j++)
00516 SG_SPRINT("\t%d%s", matrix[j*rows+i],
00517 j==cols-1? "" : ",");
00518 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00519 }
00520 SG_SPRINT("]\n");
00521 }
00522
00523 template <>
00524 void CMath::display_matrix(DREAL* matrix, INT rows, INT cols, const char* name)
00525 {
00526 ASSERT(rows>=0 && cols>=0);
00527 SG_SPRINT("%s=[\n", name);
00528 for (INT i=0; i<rows; i++)
00529 {
00530 SG_SPRINT("[");
00531 for (INT j=0; j<cols; j++)
00532 SG_SPRINT("\t%lf%s", (double) matrix[j*rows+i],
00533 j==cols-1? "" : ",");
00534 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00535 }
00536 SG_SPRINT("]\n");
00537 }
00538