00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00015
00016
00017 #include "lib/common.h"
00018 #include "lib/Mathematics.h"
00019 #include "lib/lapack.h"
00020 #include "lib/io.h"
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025
00027
00029
00030 #ifdef USE_LOGCACHE
00031
00032 #ifdef USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-6
00035 #else
00036 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00037 #define LOG_TABLE_PRECISION 1e-15
00038 #endif
00039
00040 int32_t CMath::LOGACCURACY = 0;
00041 #endif
00042
00043 int32_t CMath::LOGRANGE = 0;
00044
00045 const float64_t CMath::INFTY = -log(0.0);
00046 const float64_t CMath::ALMOST_INFTY = +1e+20;
00047 const float64_t CMath::ALMOST_NEG_INFTY = -1000;
00048 #ifdef USE_LOGCACHE
00049 float64_t* CMath::logtable = NULL;
00050 #endif
00051 char* CMath::rand_state = NULL;
00052 uint32_t CMath::seed = 0;
00053
00054 CMath::CMath()
00055 : CSGObject()
00056 {
00057 #ifndef HAVE_SWIG
00058 CSGObject::version.print_version();
00059 #endif
00060 CMath::rand_state=new char[RNG_SEED_SIZE];
00061 init_random();
00062 #ifndef HAVE_SWIG
00063 SG_PRINT("( seeding random number generator with %u (seed size %d))\n", seed, RNG_SEED_SIZE);
00064 #endif
00065
00066 #ifdef USE_LOGCACHE
00067 LOGRANGE=CMath::determine_logrange();
00068 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00069 #ifndef HAVE_SWIG
00070 SG_PRINT( "initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",LOGRANGE,LOGACCURACY,sizeof(float64_t),LOGRANGE*LOGACCURACY*sizeof(float64_t)/(1024.0*1024.0)) ;
00071 #endif
00072
00073 CMath::logtable=new float64_t[LOGRANGE*LOGACCURACY];
00074 init_log_table();
00075 #else
00076 int32_t i=0;
00077 while ((float64_t)log(1+((float64_t)exp(-float64_t(i)))))
00078 i++;
00079 #ifndef HAVE_SWIG
00080 SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", i);
00081 #endif
00082 LOGRANGE=i;
00083 #endif
00084 }
00085
00086 CMath::~CMath()
00087 {
00088 delete[] CMath::rand_state;
00089 CMath::rand_state=NULL;
00090 #ifdef USE_LOGCACHE
00091 delete[] CMath::logtable;
00092 CMath::logtable=NULL;
00093 #endif
00094 }
00095
00096 #ifdef USE_LOGCACHE
00097 int32_t CMath::determine_logrange()
00098 {
00099 int32_t i;
00100 float64_t acc=0;
00101 for (i=0; i<50; i++)
00102 {
00103 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i)))));
00104 if (acc<=(float64_t)LOG_TABLE_PRECISION)
00105 break;
00106 }
00107
00108 SG_INFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00109 return i;
00110 }
00111
00112 int32_t CMath::determine_logaccuracy(int32_t range)
00113 {
00114 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t));
00115 SG_INFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00116 return range;
00117 }
00118
00119
00120 void CMath::init_log_table()
00121 {
00122 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++)
00123 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY)));
00124 }
00125 #endif
00126
00127 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col)
00128 {
00129 int32_t changed=1;
00130 if (a[0]==-1) return ;
00131 while (changed)
00132 {
00133 changed=0; int32_t i=0 ;
00134 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1))
00135 {
00136 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00137 {
00138 for (int32_t j=0; j<cols; j++)
00139 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00140 changed=1 ;
00141 } ;
00142 i++ ;
00143 } ;
00144 } ;
00145 }
00146
00147 void CMath::sort(float64_t *a, int32_t* idx, int32_t N)
00148 {
00149 int32_t changed=1;
00150 while (changed)
00151 {
00152 changed=0;
00153 for (int32_t i=0; i<N-1; i++)
00154 {
00155 if (a[i]>a[i+1])
00156 {
00157 swap(a[i],a[i+1]) ;
00158 swap(idx[i],idx[i+1]) ;
00159 changed=1 ;
00160 } ;
00161 } ;
00162 } ;
00163
00164 }
00165
00166 float64_t CMath::Align(
00167 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost)
00168 {
00169 float64_t actCost=0 ;
00170 int32_t i1, i2 ;
00171 float64_t* const gapCosts1 = new float64_t[ l1 ];
00172 float64_t* const gapCosts2 = new float64_t[ l2 ];
00173 float64_t* costs2_0 = new float64_t[ l2 + 1 ];
00174 float64_t* costs2_1 = new float64_t[ l2 + 1 ];
00175
00176
00177 for( i1 = 0; i1 < l1; ++i1 ) {
00178 gapCosts1[ i1 ] = gapCost * i1;
00179 }
00180 costs2_1[ 0 ] = 0;
00181 for( i2 = 0; i2 < l2; ++i2 ) {
00182 gapCosts2[ i2 ] = gapCost * i2;
00183 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00184 }
00185
00186 for( i1 = 0; i1 < l1; ++i1 ) {
00187 swap( costs2_0, costs2_1 );
00188 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00189 costs2_1[ 0 ] = actCost;
00190 for( i2 = 0; i2 < l2; ++i2 ) {
00191 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00192 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00193 const float64_t actGap2 = actCost + gapCosts2[ i2 ];
00194 const float64_t actGap = min( actGap1, actGap2 );
00195 actCost = min( actMatch, actGap );
00196 costs2_1[ i2+1 ] = actCost;
00197 }
00198 }
00199
00200 delete [] gapCosts1;
00201 delete [] gapCosts2;
00202 delete [] costs2_0;
00203 delete [] costs2_1;
00204
00205
00206 return actCost;
00207 }
00208
00209
00210
00211 int32_t CMath::calcroc(
00212 float64_t* fp, float64_t* tp, float64_t* output, int32_t* label,
00213 int32_t& size, int32_t& possize, int32_t& negsize, float64_t& tresh,
00214 FILE* rocfile)
00215 {
00216 int32_t left=0;
00217 int32_t right=size-1;
00218 int32_t i;
00219
00220 for (i=0; i<size; i++)
00221 {
00222 if (!(label[i]== -1 || label[i]==1))
00223 return -1;
00224 }
00225
00226
00227 while (left<right)
00228 {
00229 while ((label[left] < 0) && (left<right))
00230 left++;
00231 while ((label[right] > 0) && (left<right))
00232 right--;
00233
00234 swap(output[left],output[right]);
00235 swap(label[left], label[right]);
00236 }
00237
00238
00239 negsize=left;
00240 possize=size-left;
00241 float64_t* negout=output;
00242 float64_t* posout=&output[left];
00243
00244
00245 qsort(negout, negsize);
00246 qsort(posout, possize);
00247
00248
00249 float64_t minimum=min(negout[0], posout[0]);
00250 float64_t maximum=minimum;
00251 if (negsize>0)
00252 maximum=max(maximum,negout[negsize-1]);
00253 if (possize>0)
00254 maximum=max(maximum,posout[possize-1]);
00255
00256 float64_t treshhold=minimum;
00257 float64_t old_treshhold=treshhold;
00258
00259
00260 for (i=0; i<size; i++)
00261 {
00262 fp[i]=1.0;
00263 tp[i]=1.0;
00264 }
00265
00266
00267
00268 int32_t posidx=0;
00269 int32_t negidx=0;
00270 int32_t iteration=1;
00271 int32_t returnidx=-1;
00272
00273 float64_t minerr=10;
00274
00275 while (iteration < size && treshhold<=maximum)
00276 {
00277 old_treshhold=treshhold;
00278
00279 while (treshhold==old_treshhold && treshhold<=maximum)
00280 {
00281 if (posidx<possize && negidx<negsize)
00282 {
00283 if (posout[posidx]<negout[negidx])
00284 {
00285 if (posout[posidx]==treshhold)
00286 posidx++;
00287 else
00288 treshhold=posout[posidx];
00289 }
00290 else
00291 {
00292 if (negout[negidx]==treshhold)
00293 negidx++;
00294 else
00295 treshhold=negout[negidx];
00296 }
00297 }
00298 else
00299 {
00300 if (posidx>=possize && negidx<negsize-1)
00301 {
00302 negidx++;
00303 treshhold=negout[negidx];
00304 }
00305 else if (negidx>=negsize && posidx<possize-1)
00306 {
00307 posidx++;
00308 treshhold=posout[posidx];
00309 }
00310 else if (negidx<negsize && treshhold!=negout[negidx])
00311 treshhold=negout[negidx];
00312 else if (posidx<possize && treshhold!=posout[posidx])
00313 treshhold=posout[posidx];
00314 else
00315 {
00316 treshhold=2*(maximum+100);
00317 posidx=possize;
00318 negidx=negsize;
00319 break;
00320 }
00321 }
00322 }
00323
00324
00325 tp[iteration]=(possize-posidx)/(float64_t (possize));
00326 fp[iteration]=(negsize-negidx)/(float64_t (negsize));
00327
00328
00329 if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
00330 {
00331 minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
00332 tresh=(old_treshhold+treshhold)/2;
00333 returnidx=iteration;
00334 }
00335
00336 iteration++;
00337 }
00338
00339
00340 size=iteration;
00341
00342 if (rocfile)
00343 {
00344 const char id[]="ROC";
00345 fwrite(id, sizeof(char), sizeof(id), rocfile);
00346 fwrite(fp, sizeof(float64_t), size, rocfile);
00347 fwrite(tp, sizeof(float64_t), size, rocfile);
00348 }
00349
00350 return returnidx;
00351 }
00352
00353 uint32_t CMath::crc32(uint8_t *data, int32_t len)
00354 {
00355 uint32_t result;
00356 int32_t i,j;
00357 uint8_t octet;
00358
00359 result = 0-1;
00360 for (i=0; i<len; i++)
00361 {
00362 octet = *(data++);
00363 for (j=0; j<8; j++)
00364 {
00365 if ((octet >> 7) ^ (result >> 31))
00366 {
00367 result = (result << 1) ^ 0x04c11db7;
00368 }
00369 else
00370 {
00371 result = (result << 1);
00372 }
00373 octet <<= 1;
00374 }
00375 }
00376
00377 return ~result;
00378 }
00379
00380 float64_t CMath::mutual_info(float64_t* p1, float64_t* p2, int32_t len)
00381 {
00382 double e=0;
00383
00384 for (int32_t i=0; i<len; i++)
00385 for (int32_t j=0; j<len; j++)
00386 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00387
00388 return (float64_t) e;
00389 }
00390
00391 float64_t CMath::relative_entropy(float64_t* p, float64_t* q, int32_t len)
00392 {
00393 double e=0;
00394
00395 for (int32_t i=0; i<len; i++)
00396 e+=exp(p[i])*(p[i]-q[i]);
00397
00398 return (float64_t) e;
00399 }
00400
00401 float64_t CMath::entropy(float64_t* p, int32_t len)
00402 {
00403 double e=0;
00404
00405 for (int32_t i=0; i<len; i++)
00406 e-=exp(p[i])*p[i];
00407
00408 return (float64_t) e;
00409 }
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421 #ifdef HAVE_LAPACK
00422 float64_t* CMath::pinv(
00423 float64_t* matrix, int32_t rows, int32_t cols, float64_t* target)
00424 {
00425 if (!target)
00426 target=new float64_t[rows*cols];
00427
00428 char jobu='A';
00429 char jobvt='A';
00430 int m=rows;
00431 int n=cols;
00432 int lda=m;
00433 int ldu=m;
00434 int ldvt=n;
00435 int info=-1;
00436 int32_t lsize=CMath::min((int32_t) m, (int32_t) n);
00437 double* s=new double[lsize];
00438 double* u=new double[m*m];
00439 double* vt=new double[n*n];
00440
00441 wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00442 ASSERT(info==0);
00443
00444 for (int32_t i=0; i<n; i++)
00445 {
00446 for (int32_t j=0; j<lsize; j++)
00447 vt[i*n+j]=vt[i*n+j]/s[j];
00448 }
00449
00450 cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00451
00452 delete[] u;
00453 delete[] vt;
00454 delete[] s;
00455
00456 return target;
00457 }
00458 #endif
00459
00460 template <>
00461 void CMath::display_vector(uint8_t* vector, int32_t n, const char* name)
00462 {
00463 ASSERT(n>=0);
00464 SG_SPRINT("%s=[", name);
00465 for (int32_t i=0; i<n; i++)
00466 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00467 SG_SPRINT("]\n");
00468 }
00469
00470 template <>
00471 void CMath::display_vector(int32_t* vector, int32_t n, const char* name)
00472 {
00473 ASSERT(n>=0);
00474 SG_SPRINT("%s=[", name);
00475 for (int32_t i=0; i<n; i++)
00476 SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00477 SG_SPRINT("]\n");
00478 }
00479
00480 template <>
00481 void CMath::display_vector(int64_t* vector, int32_t n, const char* name)
00482 {
00483 ASSERT(n>=0);
00484 SG_SPRINT("%s=[", name);
00485 for (int32_t i=0; i<n; i++)
00486 SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00487 SG_SPRINT("]\n");
00488 }
00489
00490 template <>
00491 void CMath::display_vector(float32_t* vector, int32_t n, const char* name)
00492 {
00493 ASSERT(n>=0);
00494 SG_SPRINT("%s=[", name);
00495 for (int32_t i=0; i<n; i++)
00496 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00497 SG_SPRINT("]\n");
00498 }
00499
00500 template <>
00501 void CMath::display_vector(float64_t* vector, int32_t n, const char* name)
00502 {
00503 ASSERT(n>=0);
00504 SG_SPRINT("%s=[", name);
00505 for (int32_t i=0; i<n; i++)
00506 SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00507 SG_SPRINT("]\n");
00508 }
00509
00510 template <>
00511 void CMath::display_matrix(
00512 int32_t* matrix, int32_t rows, int32_t cols, const char* name)
00513 {
00514 ASSERT(rows>=0 && cols>=0);
00515 SG_SPRINT("%s=[\n", name);
00516 for (int32_t i=0; i<rows; i++)
00517 {
00518 SG_SPRINT("[");
00519 for (int32_t j=0; j<cols; j++)
00520 SG_SPRINT("\t%d%s", matrix[j*rows+i],
00521 j==cols-1? "" : ",");
00522 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00523 }
00524 SG_SPRINT("]\n");
00525 }
00526
00527 template <>
00528 void CMath::display_matrix(
00529 float64_t* matrix, int32_t rows, int32_t cols, const char* name)
00530 {
00531 ASSERT(rows>=0 && cols>=0);
00532 SG_SPRINT("%s=[\n", name);
00533 for (int32_t i=0; i<rows; i++)
00534 {
00535 SG_SPRINT("[");
00536 for (int32_t j=0; j<cols; j++)
00537 SG_SPRINT("\t%lf%s", (double) matrix[j*rows+i],
00538 j==cols-1? "" : ",");
00539 SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00540 }
00541 SG_SPRINT("]\n");
00542 }
00543