Mathematics.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 // Math.cpp: implementation of the CMath class.
00013 //
00015 
00016 
00017 #include "lib/common.h"
00018 #include "lib/Mathematics.h"
00019 #include "lib/lapack.h"
00020 #include "lib/io.h"
00021 
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <math.h>
00025 
00027 // Construction/Destruction
00029 
00030 #ifdef USE_LOGCACHE
00031 //gene/math specific constants
00032 #ifdef USE_HMMDEBUG
00033 #define MAX_LOG_TABLE_SIZE 10*1024*1024
00034 #define LOG_TABLE_PRECISION 1e-6
00035 #else
00036 #define MAX_LOG_TABLE_SIZE 123*1024*1024
00037 #define LOG_TABLE_PRECISION 1e-15
00038 #endif
00039 
00040 INT CMath::LOGACCURACY         = 0; // 100000 steps per integer
00041 #endif
00042 
00043 INT CMath::LOGRANGE            = 0; // range for logtable: log(1+exp(x))  -25 <= x <= 0
00044 
00045 const DREAL CMath::INFTY            =  -log(0.0);   // infinity
00046 const DREAL CMath::ALMOST_INFTY     =  +1e+20;      //a large number
00047 const DREAL CMath::ALMOST_NEG_INFTY =  -1000;   
00048 #ifdef USE_LOGCACHE
00049 DREAL* CMath::logtable = NULL;
00050 #endif
00051 CHAR* CMath::rand_state = NULL;
00052 UINT CMath::seed = 0;
00053 
00054 CMath::CMath()
00055 : CSGObject()
00056 {
00057 #ifndef HAVE_SWIG
00058     CSGObject::version.print_version();
00059 #endif
00060     CMath::rand_state=new CHAR[256];
00061     init_random();
00062 #ifndef HAVE_SWIG
00063     SG_PRINT( "( seeding random number generator with %u, ", seed);
00064 #endif
00065 
00066 #ifdef USE_LOGCACHE
00067     LOGRANGE=CMath::determine_logrange();
00068     LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE);
00069 #ifndef HAVE_SWIG
00070     SG_PRINT( "initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",LOGRANGE,LOGACCURACY,sizeof(DREAL),LOGRANGE*LOGACCURACY*sizeof(DREAL)/(1024.0*1024.0)) ;
00071 #endif 
00072    
00073     CMath::logtable=new DREAL[LOGRANGE*LOGACCURACY];
00074     init_log_table();
00075 #else
00076     INT i=0;
00077     while ((DREAL)log(1+((DREAL)exp(-DREAL(i)))))
00078         i++;
00079 #ifndef HAVE_SWIG
00080     SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", i);
00081 #endif 
00082     LOGRANGE=i;
00083 #endif 
00084 }
00085 
00086 CMath::~CMath()
00087 {
00088     delete[] CMath::rand_state;
00089     CMath::rand_state=NULL;
00090 #ifdef USE_LOGCACHE
00091     delete[] CMath::logtable;
00092     CMath::logtable=NULL;
00093 #endif
00094 }
00095 
00096 #ifdef USE_LOGCACHE
00097 INT CMath::determine_logrange()
00098 {
00099     INT i;
00100     DREAL acc=0;
00101     for (i=0; i<50; i++)
00102     {
00103     acc=((DREAL)log(1+((DREAL)exp(-DREAL(i)))));
00104     if (acc<=(DREAL)LOG_TABLE_PRECISION)
00105         break;
00106     }
00107 
00108     SG_INFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc);
00109     return i;
00110 }
00111 
00112 INT CMath::determine_logaccuracy(INT range)
00113 {
00114     range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(DREAL));
00115     SG_INFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range);
00116     return range;
00117 }
00118 
00119 //init log table of form log(1+exp(x))
00120 void CMath::init_log_table()
00121 {
00122   for (INT i=0; i< LOGACCURACY*LOGRANGE; i++)
00123     logtable[i]=log(1+exp(DREAL(-i)/DREAL(LOGACCURACY)));
00124 }
00125 #endif
00126 
00127 void CMath::sort(INT *a, INT cols, INT sort_col)
00128 {
00129   INT changed=1;
00130   if (a[0]==-1) return ;
00131   while (changed)
00132   {
00133       changed=0; INT i=0 ;
00134       while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure
00135       {
00136           if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col])
00137           {
00138               for (INT j=0; j<cols; j++)
00139                   CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ;
00140               changed=1 ;
00141           } ;
00142           i++ ;
00143       } ;
00144   } ;
00145 } 
00146 
00147 void CMath::sort(DREAL *a, INT* idx, INT N) 
00148 {
00149 
00150     INT changed=1;
00151     while (changed)
00152     {
00153         changed=0;
00154         for (INT i=0; i<N-1; i++)
00155         {
00156             if (a[i]>a[i+1])
00157             {
00158                 swap(a[i],a[i+1]) ;
00159                 swap(idx[i],idx[i+1]) ;
00160                 changed=1 ;
00161             } ;
00162         } ;
00163     } ;
00164      
00165 } 
00166 
00167 DREAL CMath::Align(CHAR * seq1, CHAR* seq2, INT l1, INT l2, DREAL gapCost)
00168 {
00169   DREAL actCost=0 ;
00170   INT i1, i2 ;
00171   DREAL* const gapCosts1 = new DREAL[ l1 ];
00172   DREAL* const gapCosts2 = new DREAL[ l2 ];
00173   DREAL* costs2_0 = new DREAL[ l2 + 1 ];
00174   DREAL* costs2_1 = new DREAL[ l2 + 1 ];
00175 
00176   // initialize borders
00177   for( i1 = 0; i1 < l1; ++i1 ) {
00178     gapCosts1[ i1 ] = gapCost * i1;
00179   }
00180   costs2_1[ 0 ] = 0;
00181   for( i2 = 0; i2 < l2; ++i2 ) {
00182     gapCosts2[ i2 ] = gapCost * i2;
00183     costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ];
00184   }
00185   // compute alignment
00186   for( i1 = 0; i1 < l1; ++i1 ) {
00187     swap( costs2_0, costs2_1 );
00188     actCost = costs2_0[ 0 ] + gapCosts1[ i1 ];
00189     costs2_1[ 0 ] = actCost;
00190     for( i2 = 0; i2 < l2; ++i2 ) {
00191       const DREAL actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] );
00192       const DREAL actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ];
00193       const DREAL actGap2 = actCost + gapCosts2[ i2 ];
00194       const DREAL actGap = min( actGap1, actGap2 );
00195       actCost = min( actMatch, actGap );
00196       costs2_1[ i2+1 ] = actCost;
00197     }
00198   }
00199 
00200   delete [] gapCosts1;
00201   delete [] gapCosts2;
00202   delete [] costs2_0;
00203   delete [] costs2_1;
00204   
00205   // return the final cost
00206   return actCost;
00207 }
00208 
00209 //plot x- axis false positives (fp) 1-Specificity
00210 //plot y- axis true positives (tp) Sensitivity
00211 INT CMath::calcroc(DREAL* fp, DREAL* tp, DREAL* output, INT* label, INT& size, INT& possize, INT& negsize, DREAL& tresh, FILE* rocfile)
00212 {
00213     INT left=0;
00214     INT right=size-1;
00215     INT i;
00216 
00217     for (i=0; i<size; i++)
00218     {
00219         if (!(label[i]== -1 || label[i]==1))
00220             return -1;
00221     }
00222 
00223     //sort data such that -1 labels first +1 behind
00224     while (left<right)
00225     {
00226         while ((label[left] < 0) && (left<right))
00227             left++;
00228         while ((label[right] > 0) && (left<right))  //warning: label must be either -1 or +1
00229             right--;
00230 
00231         swap(output[left],output[right]);
00232         swap(label[left], label[right]);
00233     }
00234 
00235     // neg/pos sizes
00236     negsize=left;
00237     possize=size-left;
00238     DREAL* negout=output;
00239     DREAL* posout=&output[left];
00240 
00241     // sort the pos and neg outputs separately
00242     qsort(negout, negsize);
00243     qsort(posout, possize);
00244 
00245     // set minimum+maximum values for decision-treshhold
00246     DREAL minimum=min(negout[0], posout[0]);
00247     DREAL maximum=minimum;
00248     if (negsize>0)
00249         maximum=max(maximum,negout[negsize-1]);
00250     if (possize>0)
00251         maximum=max(maximum,posout[possize-1]);
00252 
00253     DREAL treshhold=minimum;
00254     DREAL old_treshhold=treshhold;
00255 
00256     //clear array.
00257     for (i=0; i<size; i++)
00258     {
00259         fp[i]=1.0;
00260         tp[i]=1.0;
00261     }
00262 
00263     //start with fp=1.0 tp=1.0 which is posidx=0, negidx=0
00264     //everything right of {pos,neg}idx is considered to beLONG to +1
00265     INT posidx=0;
00266     INT negidx=0;
00267     INT iteration=1;
00268     INT returnidx=-1;
00269 
00270     DREAL minerr=10;
00271 
00272     while (iteration < size && treshhold<=maximum)
00273     {
00274         old_treshhold=treshhold;
00275 
00276         while (treshhold==old_treshhold && treshhold<=maximum)
00277         {
00278             if (posidx<possize && negidx<negsize)
00279             {
00280                 if (posout[posidx]<negout[negidx])
00281                 {
00282                     if (posout[posidx]==treshhold)
00283                         posidx++;
00284                     else
00285                         treshhold=posout[posidx];
00286                 }
00287                 else
00288                 {
00289                     if (negout[negidx]==treshhold)
00290                         negidx++;
00291                     else
00292                         treshhold=negout[negidx];
00293                 }
00294             }
00295             else
00296             {
00297                 if (posidx>=possize && negidx<negsize-1)
00298                 {
00299                     negidx++;
00300                     treshhold=negout[negidx];
00301                 }
00302                 else if (negidx>=negsize && posidx<possize-1)
00303                 {
00304                     posidx++;
00305                     treshhold=posout[posidx];
00306                 }
00307                 else if (negidx<negsize && treshhold!=negout[negidx])
00308                     treshhold=negout[negidx];
00309                 else if (posidx<possize && treshhold!=posout[posidx])
00310                     treshhold=posout[posidx];
00311                 else
00312                 {
00313                     treshhold=2*(maximum+100); // force termination
00314                     posidx=possize;
00315                     negidx=negsize;
00316                     break;
00317                 }
00318             }
00319         }
00320 
00321         //calc tp,fp
00322         tp[iteration]=(possize-posidx)/(DREAL (possize));
00323         fp[iteration]=(negsize-negidx)/(DREAL (negsize));
00324 
00325         //choose poINT with minimal error
00326         if (minerr > negsize*fp[iteration]/size+(1-tp[iteration])*possize/size )
00327         {
00328             minerr=negsize*fp[iteration]/size+(1-tp[iteration])*possize/size;
00329             tresh=(old_treshhold+treshhold)/2;
00330             returnidx=iteration;
00331         }
00332 
00333         iteration++;
00334     }
00335 
00336     // set new size
00337     size=iteration;
00338 
00339     if (rocfile)
00340     {
00341         const CHAR id[]="ROC";
00342         fwrite(id, sizeof(char), sizeof(id), rocfile);
00343         fwrite(fp, sizeof(DREAL), size, rocfile);
00344         fwrite(tp, sizeof(DREAL), size, rocfile);
00345     }
00346 
00347     return returnidx;
00348 }
00349 
00350 UINT CMath::crc32(BYTE *data, INT len)
00351 {
00352     UINT        result;
00353     INT                 i,j;
00354     BYTE       octet;
00355 
00356     result = 0-1;
00357 
00358     for (i=0; i<len; i++)
00359     {
00360     octet = *(data++);
00361     for (j=0; j<8; j++)
00362     {
00363         if ((octet >> 7) ^ (result >> 31))
00364         {
00365         result = (result << 1) ^ 0x04c11db7;
00366         }
00367         else
00368         {
00369         result = (result << 1);
00370         }
00371         octet <<= 1;
00372     }
00373     }
00374 
00375     return ~result; 
00376 }
00377 
00378 double CMath::mutual_info(DREAL* p1, DREAL* p2, INT len)
00379 {
00380     double e=0;
00381 
00382     for (INT i=0; i<len; i++)
00383         for (INT j=0; j<len; j++)
00384             e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]);
00385 
00386     return e;
00387 }
00388 
00389 double CMath::relative_entropy(DREAL* p, DREAL* q, INT len)
00390 {
00391     double e=0;
00392 
00393     for (INT i=0; i<len; i++)
00394         e+=exp(p[i])*(p[i]-q[i]);
00395 
00396     return e;
00397 }
00398 
00399 double CMath::entropy(DREAL* p, INT len)
00400 {
00401     double e=0;
00402 
00403     for (INT i=0; i<len; i++)
00404         e-=exp(p[i])*p[i];
00405 
00406     return e;
00407 }
00408 
00409 
00410 //Howto construct the pseudo inverse (from "The Matrix Cookbook")
00411 //
00412 //Assume A does not have full rank, i.e. A is n \times m and rank(A) = r < min(n;m).
00413 //
00414 //The matrix A+ known as the pseudo inverse is unique and does always exist.
00415 //
00416 //The pseudo inverse A+ can be constructed from the singular value
00417 //decomposition A = UDV^T , by  A^+ = V(D+)U^T.
00418 
00419 #ifdef HAVE_LAPACK
00420 DREAL* CMath::pinv(DREAL* matrix, INT rows, INT cols, DREAL* target)
00421 {
00422     if (!target)
00423         target=new DREAL[rows*cols];
00424 
00425     char jobu='A';
00426     char jobvt='A';
00427     int m=rows;
00428     int n=cols;
00429     int lda=m;
00430     int ldu=m;
00431     int ldvt=n;
00432     int info=-1;
00433     int lsize=CMath::min(m,n);
00434     double* s=new double[lsize];
00435     double* u=new double[m*m];
00436     double* vt=new double[n*n];
00437 
00438     wrap_dgesvd(jobu, jobvt, m, n, matrix, lda, s, u, ldu, vt, ldvt, &info);
00439     ASSERT(info==0);
00440 
00441     for (INT i=0; i<n; i++)
00442     {
00443         for (INT j=0; j<lsize; j++)
00444             vt[i*n+j]=vt[i*n+j]/s[j];
00445     }
00446 
00447     cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, m, n, m, 1.0, vt, ldvt, u, ldu, 0, target, m);
00448 
00449     delete[] u;
00450     delete[] vt;
00451     delete[] s;
00452 
00453     return target;
00454 }
00455 #endif
00456 
00457 template <>
00458 void CMath::display_vector(BYTE* vector, INT n, const char* name)
00459 {
00460     ASSERT(n>=0);
00461     SG_SPRINT("%s=[", name);
00462     for (INT i=0; i<n; i++)
00463         SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00464     SG_SPRINT("]\n");
00465 }
00466 
00467 template <>
00468 void CMath::display_vector(INT* vector, INT n, const char* name)
00469 {
00470     ASSERT(n>=0);
00471     SG_SPRINT("%s=[", name);
00472     for (INT i=0; i<n; i++)
00473         SG_SPRINT("%d%s", vector[i], i==n-1? "" : ",");
00474     SG_SPRINT("]\n");
00475 }
00476 
00477 template <>
00478 void CMath::display_vector(LONG* vector, INT n, const char* name)
00479 {
00480     ASSERT(n>=0);
00481     SG_SPRINT("%s=[", name);
00482     for (INT i=0; i<n; i++)
00483         SG_SPRINT("%lld%s", vector[i], i==n-1? "" : ",");
00484     SG_SPRINT("]\n");
00485 }
00486 
00487 template <>
00488 void CMath::display_vector(SHORTREAL* vector, INT n, const char* name)
00489 {
00490     ASSERT(n>=0);
00491     SG_SPRINT("%s=[", name);
00492     for (INT i=0; i<n; i++)
00493         SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00494     SG_SPRINT("]\n");
00495 }
00496 
00497 template <>
00498 void CMath::display_vector(DREAL* vector, INT n, const char* name)
00499 {
00500     ASSERT(n>=0);
00501     SG_SPRINT("%s=[", name);
00502     for (INT i=0; i<n; i++)
00503         SG_SPRINT("%10.10f%s", vector[i], i==n-1? "" : ",");
00504     SG_SPRINT("]\n");
00505 }
00506 
00507 template <>
00508 void CMath::display_matrix(INT* matrix, INT rows, INT cols, const char* name)
00509 {
00510     ASSERT(rows>=0 && cols>=0);
00511     SG_SPRINT("%s=[\n", name);
00512     for (INT i=0; i<rows; i++)
00513     {
00514         SG_SPRINT("[");
00515         for (INT j=0; j<cols; j++)
00516             SG_SPRINT("\t%d%s", matrix[j*rows+i],
00517                 j==cols-1? "" : ",");
00518         SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00519     }
00520     SG_SPRINT("]\n");
00521 }
00522 
00523 template <>
00524 void CMath::display_matrix(DREAL* matrix, INT rows, INT cols, const char* name)
00525 {
00526     ASSERT(rows>=0 && cols>=0);
00527     SG_SPRINT("%s=[\n", name);
00528     for (INT i=0; i<rows; i++)
00529     {
00530         SG_SPRINT("[");
00531         for (INT j=0; j<cols; j++)
00532             SG_SPRINT("\t%lf%s", (double) matrix[j*rows+i],
00533                 j==cols-1? "" : ",");
00534         SG_SPRINT("]%s\n", i==rows-1? "" : ",");
00535     }
00536     SG_SPRINT("]\n");
00537 }
00538 

SHOGUN Machine Learning Toolbox - Documentation