SVM_libsvm.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2000-2008 Chih-Chung Chang and Chih-Jen Lin
00003  * All rights reserved.
00004  * 
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 
00009  * 1. Redistributions of source code must retain the above copyright
00010  * notice, this list of conditions and the following disclaimer.
00011  * 
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  * notice, this list of conditions and the following disclaimer in the
00014  * documentation and/or other materials provided with the distribution.
00015  * 
00016  * 3. Neither name of copyright holders nor the names of its contributors
00017  * may be used to endorse or promote products derived from this software
00018  * without specific prior written permission.
00019  * 
00020  * 
00021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * Shogun specific adjustments (w) 2006-2008 Soeren Sonnenburg
00034  */
00035 
00036 #include "lib/memory.h"
00037 #include "classifier/svm/SVM_libsvm.h"
00038 #include "kernel/Kernel.h"
00039 #include "lib/io.h"
00040 #include "lib/common.h"
00041 #include "lib/Mathematics.h"
00042 
00043 #include <stdio.h>
00044 #include <stdlib.h>
00045 #include <ctype.h>
00046 #include <float.h>
00047 #include <string.h>
00048 #include <stdarg.h>
00049 typedef KERNELCACHE_ELEM Qfloat;
00050 typedef DREAL schar;
00051 #ifndef min
00052 template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
00053 #endif
00054 #ifndef max
00055 template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
00056 #endif
00057 template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00058 template <class S, class T> inline void clone(T*& dst, S* src, int n)
00059 {
00060     dst = new T[n];
00061     memcpy((void *)dst,(void *)src,sizeof(T)*n);
00062 }
00063 #define INF HUGE_VAL
00064 #define TAU 1e-12
00065 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00066 
00067 //
00068 // Kernel Cache
00069 //
00070 // l is the number of total data items
00071 // size is the cache size limit in bytes
00072 //
00073 class Cache
00074 {
00075 public:
00076     Cache(int l,long int size);
00077     ~Cache();
00078 
00079     // request data [0,len)
00080     // return some position p where [p,len) need to be filled
00081     // (p >= len if nothing needs to be filled)
00082     int get_data(const int index, Qfloat **data, int len);
00083     void swap_index(int i, int j);  // future_option
00084 private:
00085     int l;
00086     long int size;
00087     struct head_t
00088     {
00089         head_t *prev, *next;    // a cicular list
00090         Qfloat *data;
00091         int len;        // data[0,len) is cached in this entry
00092     };
00093 
00094     head_t *head;
00095     head_t lru_head;
00096     void lru_delete(head_t *h);
00097     void lru_insert(head_t *h);
00098 };
00099 
00100 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00101 {
00102     head = (head_t *)calloc(l,sizeof(head_t));  // initialized to 0
00103     size /= sizeof(Qfloat);
00104     size -= l * sizeof(head_t) / sizeof(Qfloat);
00105     size = max(size, (long int) 2*l);   // cache must be large enough for two columns
00106     lru_head.next = lru_head.prev = &lru_head;
00107 }
00108 
00109 Cache::~Cache()
00110 {
00111     for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00112         free(h->data);
00113     free(head);
00114 }
00115 
00116 void Cache::lru_delete(head_t *h)
00117 {
00118     // delete from current location
00119     h->prev->next = h->next;
00120     h->next->prev = h->prev;
00121 }
00122 
00123 void Cache::lru_insert(head_t *h)
00124 {
00125     // insert to last position
00126     h->next = &lru_head;
00127     h->prev = lru_head.prev;
00128     h->prev->next = h;
00129     h->next->prev = h;
00130 }
00131 
00132 int Cache::get_data(const int index, Qfloat **data, int len)
00133 {
00134     head_t *h = &head[index];
00135     if(h->len) lru_delete(h);
00136     int more = len - h->len;
00137 
00138     if(more > 0)
00139     {
00140         // free old space
00141         while(size < more)
00142         {
00143             head_t *old = lru_head.next;
00144             lru_delete(old);
00145             free(old->data);
00146             size += old->len;
00147             old->data = 0;
00148             old->len = 0;
00149         }
00150 
00151         // allocate new space
00152         h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00153         size -= more;
00154         swap(h->len,len);
00155     }
00156 
00157     lru_insert(h);
00158     *data = h->data;
00159     return len;
00160 }
00161 
00162 void Cache::swap_index(int i, int j)
00163 {
00164     if(i==j) return;
00165 
00166     if(head[i].len) lru_delete(&head[i]);
00167     if(head[j].len) lru_delete(&head[j]);
00168     swap(head[i].data,head[j].data);
00169     swap(head[i].len,head[j].len);
00170     if(head[i].len) lru_insert(&head[i]);
00171     if(head[j].len) lru_insert(&head[j]);
00172 
00173     if(i>j) swap(i,j);
00174     for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00175     {
00176         if(h->len > i)
00177         {
00178             if(h->len > j)
00179                 swap(h->data[i],h->data[j]);
00180             else
00181             {
00182                 // give up
00183                 lru_delete(h);
00184                 free(h->data);
00185                 size += h->len;
00186                 h->data = 0;
00187                 h->len = 0;
00188             }
00189         }
00190     }
00191 }
00192 
00193 //
00194 // Kernel evaluation
00195 //
00196 // the static method k_function is for doing single kernel evaluation
00197 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00198 // the member function get_Q is for getting one column from the Q Matrix
00199 //
00200 class QMatrix {
00201 public:
00202     virtual Qfloat *get_Q(int column, int len) const = 0;
00203     virtual Qfloat *get_QD() const = 0;
00204     virtual void swap_index(int i, int j) const = 0;
00205     virtual ~QMatrix() {}
00206 };
00207 
00208 class Kernel: public QMatrix {
00209 public:
00210     Kernel(int l, svm_node * const * x, const svm_parameter& param);
00211     virtual ~Kernel();
00212 
00213     virtual Qfloat *get_Q(int column, int len) const = 0;
00214     virtual Qfloat *get_QD() const = 0;
00215     virtual void swap_index(int i, int j) const // no so const...
00216     {
00217         swap(x[i],x[j]);
00218         if(x_square) swap(x_square[i],x_square[j]);
00219     }
00220 
00221     inline double kernel_function(int i, int j) const
00222     {
00223         return kernel->kernel(x[i]->index,x[j]->index);
00224     }
00225 
00226 private:
00227     CKernel* kernel;
00228     const svm_node **x;
00229     double *x_square;
00230 
00231     // svm_parameter
00232     const int kernel_type;
00233     const int degree;
00234     const double gamma;
00235     const double coef0;
00236 };
00237 
00238 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00239 :kernel_type(param.kernel_type), degree(param.degree),
00240  gamma(param.gamma), coef0(param.coef0)
00241 {
00242     clone(x,x_,l);
00243     x_square = 0;
00244     kernel=param.kernel;
00245 }
00246 
00247 Kernel::~Kernel()
00248 {
00249     delete[] x;
00250     delete[] x_square;
00251 }
00252 
00253 // Generalized SMO+SVMlight algorithm
00254 // Solves:
00255 //
00256 //  min 0.5(\alpha^T Q \alpha) + b^T \alpha
00257 //
00258 //      y^T \alpha = \delta
00259 //      y_i = +1 or -1
00260 //      0 <= alpha_i <= Cp for y_i = 1
00261 //      0 <= alpha_i <= Cn for y_i = -1
00262 //
00263 // Given:
00264 //
00265 //  Q, p, y, Cp, Cn, and an initial feasible point \alpha
00266 //  l is the size of vectors and matrices
00267 //  eps is the stopping tolerance
00268 //
00269 // solution will be put in \alpha, objective value will be put in obj
00270 //
00271 class Solver {
00272 public:
00273     Solver() {};
00274     virtual ~Solver() {};
00275 
00276     struct SolutionInfo {
00277         double obj;
00278         double rho;
00279         double upper_bound_p;
00280         double upper_bound_n;
00281         double r;   // for Solver_NU
00282     };
00283 
00284     void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00285            double *alpha_, double Cp, double Cn, double eps,
00286            SolutionInfo* si, int shrinking);
00287 protected:
00288     int active_size;
00289     schar *y;
00290     double *G;      // gradient of objective function
00291     enum { LOWER_BOUND, UPPER_BOUND, FREE };
00292     char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
00293     double *alpha;
00294     const QMatrix *Q;
00295     const Qfloat *QD;
00296     double eps;
00297     double Cp,Cn;
00298     double *p;
00299     int *active_set;
00300     double *G_bar;      // gradient, if we treat free variables as 0
00301     int l;
00302     bool unshrinked;    // XXX
00303 
00304     double get_C(int i)
00305     {
00306         return (y[i] > 0)? Cp : Cn;
00307     }
00308     void update_alpha_status(int i)
00309     {
00310         if(alpha[i] >= get_C(i))
00311             alpha_status[i] = UPPER_BOUND;
00312         else if(alpha[i] <= 0)
00313             alpha_status[i] = LOWER_BOUND;
00314         else alpha_status[i] = FREE;
00315     }
00316     bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00317     bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00318     bool is_free(int i) { return alpha_status[i] == FREE; }
00319     void swap_index(int i, int j);
00320     void reconstruct_gradient();
00321     virtual int select_working_set(int &i, int &j, double &gap);
00322     virtual double calculate_rho();
00323     virtual void do_shrinking();
00324 private:
00325     bool be_shrunken(int i, double Gmax1, double Gmax2);    
00326 };
00327 
00328 void Solver::swap_index(int i, int j)
00329 {
00330     Q->swap_index(i,j);
00331     swap(y[i],y[j]);
00332     swap(G[i],G[j]);
00333     swap(alpha_status[i],alpha_status[j]);
00334     swap(alpha[i],alpha[j]);
00335     swap(p[i],p[j]);
00336     swap(active_set[i],active_set[j]);
00337     swap(G_bar[i],G_bar[j]);
00338 }
00339 
00340 void Solver::reconstruct_gradient()
00341 {
00342     // reconstruct inactive elements of G from G_bar and free variables
00343 
00344     if(active_size == l) return;
00345 
00346     int i;
00347     for(i=active_size;i<l;i++)
00348         G[i] = G_bar[i] + p[i];
00349     
00350     for(i=0;i<active_size;i++)
00351         if(is_free(i))
00352         {
00353             const Qfloat *Q_i = Q->get_Q(i,l);
00354             double alpha_i = alpha[i];
00355             for(int j=active_size;j<l;j++)
00356                 G[j] += alpha_i * Q_i[j];
00357         }
00358 }
00359 
00360 void Solver::Solve(int p_l, const QMatrix& p_Q, const double *p_p, const schar *p_y,
00361            double *p_alpha, double p_Cp, double p_Cn, double p_eps,
00362            SolutionInfo* p_si, int shrinking)
00363 {
00364     this->l = p_l;
00365     this->Q = &p_Q;
00366     QD=Q->get_QD();
00367     clone(p, p_p,l);
00368     clone(y, p_y,l);
00369     clone(alpha,p_alpha,l);
00370     this->Cp = p_Cp;
00371     this->Cn = p_Cn;
00372     this->eps = p_eps;
00373     unshrinked = false;
00374 
00375     // initialize alpha_status
00376     {
00377         alpha_status = new char[l];
00378         for(int i=0;i<l;i++)
00379             update_alpha_status(i);
00380     }
00381 
00382     // initialize active set (for shrinking)
00383     {
00384         active_set = new int[l];
00385         for(int i=0;i<l;i++)
00386             active_set[i] = i;
00387         active_size = l;
00388     }
00389 
00390     // initialize gradient
00391     {
00392         G = new double[l];
00393         G_bar = new double[l];
00394         int i;
00395         for(i=0;i<l;i++)
00396         {
00397             G[i] = p_p[i];
00398             G_bar[i] = 0;
00399         }
00400         for(i=0;i<l;i++)
00401             if(!is_lower_bound(i))
00402             {
00403                 const Qfloat *Q_i = Q->get_Q(i,l);
00404                 double alpha_i = alpha[i];
00405                 int j;
00406                 for(j=0;j<l;j++)
00407                     G[j] += alpha_i*Q_i[j];
00408                 if(is_upper_bound(i))
00409                     for(j=0;j<l;j++)
00410                         G_bar[j] += get_C(i) * Q_i[j];
00411             }
00412     }
00413 
00414     // optimization step
00415 
00416     int iter = 0;
00417     int counter = min(l,1000)+1;
00418 
00419     while(1)
00420     {
00421         // show progress and do shrinking
00422 
00423         if(--counter == 0)
00424         {
00425             counter = min(l,1000);
00426             if(shrinking) do_shrinking();
00427             //SG_SINFO(".");
00428         }
00429 
00430         int i,j;
00431         double gap;
00432         if(select_working_set(i,j, gap)!=0)
00433         {
00434             // reconstruct the whole gradient
00435             reconstruct_gradient();
00436             // reset active set size and check
00437             active_size = l;
00438             //SG_SINFO("*");
00439             if(select_working_set(i,j, gap)!=0)
00440                 break;
00441             else
00442                 counter = 1;    // do shrinking next iteration
00443         }
00444 
00445         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00446         
00447         ++iter;
00448 
00449         // update alpha[i] and alpha[j], handle bounds carefully
00450         
00451         const Qfloat *Q_i = Q->get_Q(i,active_size);
00452         const Qfloat *Q_j = Q->get_Q(j,active_size);
00453 
00454         double C_i = get_C(i);
00455         double C_j = get_C(j);
00456 
00457         double old_alpha_i = alpha[i];
00458         double old_alpha_j = alpha[j];
00459 
00460         if(y[i]!=y[j])
00461         {
00462             double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00463             if (quad_coef <= 0)
00464                 quad_coef = TAU;
00465             double delta = (-G[i]-G[j])/quad_coef;
00466             double diff = alpha[i] - alpha[j];
00467             alpha[i] += delta;
00468             alpha[j] += delta;
00469             
00470             if(diff > 0)
00471             {
00472                 if(alpha[j] < 0)
00473                 {
00474                     alpha[j] = 0;
00475                     alpha[i] = diff;
00476                 }
00477             }
00478             else
00479             {
00480                 if(alpha[i] < 0)
00481                 {
00482                     alpha[i] = 0;
00483                     alpha[j] = -diff;
00484                 }
00485             }
00486             if(diff > C_i - C_j)
00487             {
00488                 if(alpha[i] > C_i)
00489                 {
00490                     alpha[i] = C_i;
00491                     alpha[j] = C_i - diff;
00492                 }
00493             }
00494             else
00495             {
00496                 if(alpha[j] > C_j)
00497                 {
00498                     alpha[j] = C_j;
00499                     alpha[i] = C_j + diff;
00500                 }
00501             }
00502         }
00503         else
00504         {
00505             double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00506             if (quad_coef <= 0)
00507                 quad_coef = TAU;
00508             double delta = (G[i]-G[j])/quad_coef;
00509             double sum = alpha[i] + alpha[j];
00510             alpha[i] -= delta;
00511             alpha[j] += delta;
00512 
00513             if(sum > C_i)
00514             {
00515                 if(alpha[i] > C_i)
00516                 {
00517                     alpha[i] = C_i;
00518                     alpha[j] = sum - C_i;
00519                 }
00520             }
00521             else
00522             {
00523                 if(alpha[j] < 0)
00524                 {
00525                     alpha[j] = 0;
00526                     alpha[i] = sum;
00527                 }
00528             }
00529             if(sum > C_j)
00530             {
00531                 if(alpha[j] > C_j)
00532                 {
00533                     alpha[j] = C_j;
00534                     alpha[i] = sum - C_j;
00535                 }
00536             }
00537             else
00538             {
00539                 if(alpha[i] < 0)
00540                 {
00541                     alpha[i] = 0;
00542                     alpha[j] = sum;
00543                 }
00544             }
00545         }
00546 
00547         // update G
00548 
00549         double delta_alpha_i = alpha[i] - old_alpha_i;
00550         double delta_alpha_j = alpha[j] - old_alpha_j;
00551         
00552         for(int k=0;k<active_size;k++)
00553         {
00554             G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00555         }
00556 
00557         // update alpha_status and G_bar
00558 
00559         {
00560             bool ui = is_upper_bound(i);
00561             bool uj = is_upper_bound(j);
00562             update_alpha_status(i);
00563             update_alpha_status(j);
00564             int k;
00565             if(ui != is_upper_bound(i))
00566             {
00567                 Q_i = Q->get_Q(i,l);
00568                 if(ui)
00569                     for(k=0;k<l;k++)
00570                         G_bar[k] -= C_i * Q_i[k];
00571                 else
00572                     for(k=0;k<l;k++)
00573                         G_bar[k] += C_i * Q_i[k];
00574             }
00575 
00576             if(uj != is_upper_bound(j))
00577             {
00578                 Q_j = Q->get_Q(j,l);
00579                 if(uj)
00580                     for(k=0;k<l;k++)
00581                         G_bar[k] -= C_j * Q_j[k];
00582                 else
00583                     for(k=0;k<l;k++)
00584                         G_bar[k] += C_j * Q_j[k];
00585             }
00586         }
00587     }
00588 
00589     // calculate rho
00590 
00591     p_si->rho = calculate_rho();
00592 
00593     // calculate objective value
00594     {
00595         double v = 0;
00596         int i;
00597         for(i=0;i<l;i++)
00598             v += alpha[i] * (G[i] + p[i]);
00599 
00600         p_si->obj = v/2;
00601     }
00602 
00603     // put back the solution
00604     {
00605         for(int i=0;i<l;i++)
00606             p_alpha[active_set[i]] = alpha[i];
00607     }
00608 
00609     p_si->upper_bound_p = Cp;
00610     p_si->upper_bound_n = Cn;
00611 
00612     SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00613 
00614     delete[] p;
00615     delete[] y;
00616     delete[] alpha;
00617     delete[] alpha_status;
00618     delete[] active_set;
00619     delete[] G;
00620     delete[] G_bar;
00621 }
00622 
00623 // return 1 if already optimal, return 0 otherwise
00624 int Solver::select_working_set(int &out_i, int &out_j, double &gap)
00625 {
00626     // return i,j such that
00627     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00628     // j: minimizes the decrease of obj value
00629     //    (if quadratic coefficeint <= 0, replace it with tau)
00630     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00631     
00632     double Gmax = -INF;
00633     double Gmax2 = -INF;
00634     int Gmax_idx = -1;
00635     int Gmin_idx = -1;
00636     double obj_diff_min = INF;
00637 
00638     for(int t=0;t<active_size;t++)
00639         if(y[t]==+1)    
00640         {
00641             if(!is_upper_bound(t))
00642                 if(-G[t] >= Gmax)
00643                 {
00644                     Gmax = -G[t];
00645                     Gmax_idx = t;
00646                 }
00647         }
00648         else
00649         {
00650             if(!is_lower_bound(t))
00651                 if(G[t] >= Gmax)
00652                 {
00653                     Gmax = G[t];
00654                     Gmax_idx = t;
00655                 }
00656         }
00657 
00658     int i = Gmax_idx;
00659     const Qfloat *Q_i = NULL;
00660     if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00661         Q_i = Q->get_Q(i,active_size);
00662 
00663     for(int j=0;j<active_size;j++)
00664     {
00665         if(y[j]==+1)
00666         {
00667             if (!is_lower_bound(j))
00668             {
00669                 double grad_diff=Gmax+G[j];
00670                 if (G[j] >= Gmax2)
00671                     Gmax2 = G[j];
00672                 if (grad_diff > 0)
00673                 {
00674                     double obj_diff; 
00675                     double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
00676                     if (quad_coef > 0)
00677                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00678                     else
00679                         obj_diff = -(grad_diff*grad_diff)/TAU;
00680 
00681                     if (obj_diff <= obj_diff_min)
00682                     {
00683                         Gmin_idx=j;
00684                         obj_diff_min = obj_diff;
00685                     }
00686                 }
00687             }
00688         }
00689         else
00690         {
00691             if (!is_upper_bound(j))
00692             {
00693                 double grad_diff= Gmax-G[j];
00694                 if (-G[j] >= Gmax2)
00695                     Gmax2 = -G[j];
00696                 if (grad_diff > 0)
00697                 {
00698                     double obj_diff; 
00699                     double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
00700                     if (quad_coef > 0)
00701                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00702                     else
00703                         obj_diff = -(grad_diff*grad_diff)/TAU;
00704 
00705                     if (obj_diff <= obj_diff_min)
00706                     {
00707                         Gmin_idx=j;
00708                         obj_diff_min = obj_diff;
00709                     }
00710                 }
00711             }
00712         }
00713     }
00714 
00715     gap=Gmax+Gmax2;
00716     if(gap < eps)
00717         return 1;
00718 
00719     out_i = Gmax_idx;
00720     out_j = Gmin_idx;
00721     return 0;
00722 }
00723 
00724 bool Solver::be_shrunken(int i, double Gmax1, double Gmax2)
00725 {
00726     if(is_upper_bound(i))
00727     {
00728         if(y[i]==+1)
00729             return(-G[i] > Gmax1);
00730         else
00731             return(-G[i] > Gmax2);
00732     }
00733     else if(is_lower_bound(i))
00734     {
00735         if(y[i]==+1)
00736             return(G[i] > Gmax2);
00737         else    
00738             return(G[i] > Gmax1);
00739     }
00740     else
00741         return(false);
00742 }
00743 
00744 void Solver::do_shrinking()
00745 {
00746     int i;
00747     double Gmax1 = -INF;        // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00748     double Gmax2 = -INF;        // max { y_i * grad(f)_i | i in I_low(\alpha) }
00749 
00750     // find maximal violating pair first
00751     for(i=0;i<active_size;i++)
00752     {
00753         if(y[i]==+1)    
00754         {
00755             if(!is_upper_bound(i))  
00756             {
00757                 if(-G[i] >= Gmax1)
00758                     Gmax1 = -G[i];
00759             }
00760             if(!is_lower_bound(i))  
00761             {
00762                 if(G[i] >= Gmax2)
00763                     Gmax2 = G[i];
00764             }
00765         }
00766         else    
00767         {
00768             if(!is_upper_bound(i))  
00769             {
00770                 if(-G[i] >= Gmax2)
00771                     Gmax2 = -G[i];
00772             }
00773             if(!is_lower_bound(i))  
00774             {
00775                 if(G[i] >= Gmax1)
00776                     Gmax1 = G[i];
00777             }
00778         }
00779     }
00780 
00781     // shrink
00782 
00783     for(i=0;i<active_size;i++)
00784         if (be_shrunken(i, Gmax1, Gmax2))
00785         {
00786             active_size--;
00787             while (active_size > i)
00788             {
00789                 if (!be_shrunken(active_size, Gmax1, Gmax2))
00790                 {
00791                     swap_index(i,active_size);
00792                     break;
00793                 }
00794                 active_size--;
00795             }
00796         }
00797 
00798     // unshrink, check all variables again before final iterations
00799 
00800     if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
00801     
00802     unshrinked = true;
00803     reconstruct_gradient();
00804 
00805     for(i=l-1;i>=active_size;i--)
00806         if (!be_shrunken(i, Gmax1, Gmax2))
00807         {
00808             while (active_size < i)
00809             {
00810                 if (be_shrunken(active_size, Gmax1, Gmax2))
00811                 {
00812                     swap_index(i,active_size);
00813                     break;
00814                 }
00815                 active_size++;
00816             }
00817             active_size++;
00818         }
00819 }
00820 
00821 double Solver::calculate_rho()
00822 {
00823     double r;
00824     int nr_free = 0;
00825     double ub = INF, lb = -INF, sum_free = 0;
00826     for(int i=0;i<active_size;i++)
00827     {
00828         double yG = y[i]*G[i];
00829 
00830         if(is_upper_bound(i))
00831         {
00832             if(y[i]==-1)
00833                 ub = min(ub,yG);
00834             else
00835                 lb = max(lb,yG);
00836         }
00837         else if(is_lower_bound(i))
00838         {
00839             if(y[i]==+1)
00840                 ub = min(ub,yG);
00841             else
00842                 lb = max(lb,yG);
00843         }
00844         else
00845         {
00846             ++nr_free;
00847             sum_free += yG;
00848         }
00849     }
00850 
00851     if(nr_free>0)
00852         r = sum_free/nr_free;
00853     else
00854         r = (ub+lb)/2;
00855 
00856     return r;
00857 }
00858 
00859 //
00860 // Solver for nu-svm classification and regression
00861 //
00862 // additional constraint: e^T \alpha = constant
00863 //
00864 class Solver_NU : public Solver
00865 {
00866 public:
00867     Solver_NU() {}
00868     void Solve(int p_l, const QMatrix& p_Q, const double *p_p, const schar *p_y,
00869            double* p_alpha, double p_Cp, double p_Cn, double p_eps,
00870            SolutionInfo* p_si, int shrinking)
00871     {
00872         this->si = p_si;
00873         Solver::Solve(p_l,p_Q,p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking);
00874     }
00875 private:
00876     SolutionInfo *si;
00877     int select_working_set(int &i, int &j, double &gap);
00878     double calculate_rho();
00879     bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
00880     void do_shrinking();
00881 };
00882 
00883 // return 1 if already optimal, return 0 otherwise
00884 int Solver_NU::select_working_set(int &out_i, int &out_j, double &gap)
00885 {
00886     // return i,j such that y_i = y_j and
00887     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00888     // j: minimizes the decrease of obj value
00889     //    (if quadratic coefficeint <= 0, replace it with tau)
00890     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00891 
00892     double Gmaxp = -INF;
00893     double Gmaxp2 = -INF;
00894     int Gmaxp_idx = -1;
00895 
00896     double Gmaxn = -INF;
00897     double Gmaxn2 = -INF;
00898     int Gmaxn_idx = -1;
00899 
00900     int Gmin_idx = -1;
00901     double obj_diff_min = INF;
00902 
00903     for(int t=0;t<active_size;t++)
00904         if(y[t]==+1)
00905         {
00906             if(!is_upper_bound(t))
00907                 if(-G[t] >= Gmaxp)
00908                 {
00909                     Gmaxp = -G[t];
00910                     Gmaxp_idx = t;
00911                 }
00912         }
00913         else
00914         {
00915             if(!is_lower_bound(t))
00916                 if(G[t] >= Gmaxn)
00917                 {
00918                     Gmaxn = G[t];
00919                     Gmaxn_idx = t;
00920                 }
00921         }
00922 
00923     int ip = Gmaxp_idx;
00924     int in = Gmaxn_idx;
00925     const Qfloat *Q_ip = NULL;
00926     const Qfloat *Q_in = NULL;
00927     if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
00928         Q_ip = Q->get_Q(ip,active_size);
00929     if(in != -1)
00930         Q_in = Q->get_Q(in,active_size);
00931 
00932     for(int j=0;j<active_size;j++)
00933     {
00934         if(y[j]==+1)
00935         {
00936             if (!is_lower_bound(j)) 
00937             {
00938                 double grad_diff=Gmaxp+G[j];
00939                 if (G[j] >= Gmaxp2)
00940                     Gmaxp2 = G[j];
00941                 if (grad_diff > 0)
00942                 {
00943                     double obj_diff; 
00944                     double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
00945                     if (quad_coef > 0)
00946                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00947                     else
00948                         obj_diff = -(grad_diff*grad_diff)/TAU;
00949 
00950                     if (obj_diff <= obj_diff_min)
00951                     {
00952                         Gmin_idx=j;
00953                         obj_diff_min = obj_diff;
00954                     }
00955                 }
00956             }
00957         }
00958         else
00959         {
00960             if (!is_upper_bound(j))
00961             {
00962                 double grad_diff=Gmaxn-G[j];
00963                 if (-G[j] >= Gmaxn2)
00964                     Gmaxn2 = -G[j];
00965                 if (grad_diff > 0)
00966                 {
00967                     double obj_diff; 
00968                     double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
00969                     if (quad_coef > 0)
00970                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00971                     else
00972                         obj_diff = -(grad_diff*grad_diff)/TAU;
00973 
00974                     if (obj_diff <= obj_diff_min)
00975                     {
00976                         Gmin_idx=j;
00977                         obj_diff_min = obj_diff;
00978                     }
00979                 }
00980             }
00981         }
00982     }
00983 
00984     gap=max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
00985     if(gap < eps)
00986         return 1;
00987 
00988     if (y[Gmin_idx] == +1)
00989         out_i = Gmaxp_idx;
00990     else
00991         out_i = Gmaxn_idx;
00992     out_j = Gmin_idx;
00993 
00994     return 0;
00995 }
00996 
00997 bool Solver_NU::be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
00998 {
00999     if(is_upper_bound(i))
01000     {
01001         if(y[i]==+1)
01002             return(-G[i] > Gmax1);
01003         else    
01004             return(-G[i] > Gmax4);
01005     }
01006     else if(is_lower_bound(i))
01007     {
01008         if(y[i]==+1)
01009             return(G[i] > Gmax2);
01010         else    
01011             return(G[i] > Gmax3);
01012     }
01013     else
01014         return(false);
01015 }
01016 
01017 void Solver_NU::do_shrinking()
01018 {
01019     double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01020     double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01021     double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01022     double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01023 
01024     // find maximal violating pair first
01025     int i;
01026     for(i=0;i<active_size;i++)
01027     {
01028         if(!is_upper_bound(i))
01029         {
01030             if(y[i]==+1)
01031             {
01032                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01033             }
01034             else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01035         }
01036         if(!is_lower_bound(i))
01037         {
01038             if(y[i]==+1)
01039             {   
01040                 if(G[i] > Gmax2) Gmax2 = G[i];
01041             }
01042             else    if(G[i] > Gmax3) Gmax3 = G[i];
01043         }
01044     }
01045 
01046     // shrinking
01047 
01048     for(i=0;i<active_size;i++)
01049         if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01050         {
01051             active_size--;
01052             while (active_size > i)
01053             {
01054                 if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01055                 {
01056                     swap_index(i,active_size);
01057                     break;
01058                 }
01059                 active_size--;
01060             }
01061         }
01062 
01063     // unshrink, check all variables again before final iterations
01064 
01065     if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
01066     
01067     unshrinked = true;
01068     reconstruct_gradient();
01069 
01070     for(i=l-1;i>=active_size;i--)
01071         if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
01072         {
01073             while (active_size < i)
01074             {
01075                 if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01076                 {
01077                     swap_index(i,active_size);
01078                     break;
01079                 }
01080                 active_size++;
01081             }
01082             active_size++;
01083         }
01084 }
01085 
01086 double Solver_NU::calculate_rho()
01087 {
01088     int nr_free1 = 0,nr_free2 = 0;
01089     double ub1 = INF, ub2 = INF;
01090     double lb1 = -INF, lb2 = -INF;
01091     double sum_free1 = 0, sum_free2 = 0;
01092 
01093     for(int i=0;i<active_size;i++)
01094     {
01095         if(y[i]==+1)
01096         {
01097             if(is_upper_bound(i))
01098                 lb1 = max(lb1,G[i]);
01099             else if(is_lower_bound(i))
01100                 ub1 = min(ub1,G[i]);
01101             else
01102             {
01103                 ++nr_free1;
01104                 sum_free1 += G[i];
01105             }
01106         }
01107         else
01108         {
01109             if(is_upper_bound(i))
01110                 lb2 = max(lb2,G[i]);
01111             else if(is_lower_bound(i))
01112                 ub2 = min(ub2,G[i]);
01113             else
01114             {
01115                 ++nr_free2;
01116                 sum_free2 += G[i];
01117             }
01118         }
01119     }
01120 
01121     double r1,r2;
01122     if(nr_free1 > 0)
01123         r1 = sum_free1/nr_free1;
01124     else
01125         r1 = (ub1+lb1)/2;
01126     
01127     if(nr_free2 > 0)
01128         r2 = sum_free2/nr_free2;
01129     else
01130         r2 = (ub2+lb2)/2;
01131     
01132     si->r = (r1+r2)/2;
01133     return (r1-r2)/2;
01134 }
01135 
01136 //
01137 // Q matrices for various formulations
01138 //
01139 class SVC_Q: public Kernel
01140 { 
01141 public:
01142     SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01143     :Kernel(prob.l, prob.x, param)
01144     {
01145         clone(y,y_,prob.l);
01146         cache = new Cache(prob.l,(long int)(param.cache_size*(1l<<20)));
01147         QD = new Qfloat[prob.l];
01148         for(int i=0;i<prob.l;i++)
01149             QD[i]= (Qfloat)kernel_function(i,i);
01150     }
01151     
01152     Qfloat *get_Q(int i, int len) const
01153     {
01154         Qfloat *data;
01155         int start;
01156         if((start = cache->get_data(i,&data,len)) < len)
01157         {
01158             for(int j=start;j<len;j++)
01159                 data[j] = (Qfloat) y[i]*y[j]*kernel_function(i,j);
01160         }
01161         return data;
01162     }
01163 
01164     Qfloat *get_QD() const
01165     {
01166         return QD;
01167     }
01168 
01169     void swap_index(int i, int j) const
01170     {
01171         cache->swap_index(i,j);
01172         Kernel::swap_index(i,j);
01173         swap(y[i],y[j]);
01174         swap(QD[i],QD[j]);
01175     }
01176 
01177     ~SVC_Q()
01178     {
01179         delete[] y;
01180         delete cache;
01181         delete[] QD;
01182     }
01183 private:
01184     schar *y;
01185     Cache *cache;
01186     Qfloat *QD;
01187 };
01188 
01189 class ONE_CLASS_Q: public Kernel
01190 {
01191 public:
01192     ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01193     :Kernel(prob.l, prob.x, param)
01194     {
01195         cache = new Cache(prob.l,(long int)(param.cache_size*(1l<<20)));
01196         QD = new Qfloat[prob.l];
01197         for(int i=0;i<prob.l;i++)
01198             QD[i]= (Qfloat)kernel_function(i,i);
01199     }
01200     
01201     Qfloat *get_Q(int i, int len) const
01202     {
01203         Qfloat *data;
01204         int start;
01205         if((start = cache->get_data(i,&data,len)) < len)
01206         {
01207             for(int j=start;j<len;j++)
01208                 data[j] = (Qfloat) kernel_function(i,j);
01209         }
01210         return data;
01211     }
01212 
01213     Qfloat *get_QD() const
01214     {
01215         return QD;
01216     }
01217 
01218     void swap_index(int i, int j) const
01219     {
01220         cache->swap_index(i,j);
01221         Kernel::swap_index(i,j);
01222         swap(QD[i],QD[j]);
01223     }
01224 
01225     ~ONE_CLASS_Q()
01226     {
01227         delete cache;
01228         delete[] QD;
01229     }
01230 private:
01231     Cache *cache;
01232     Qfloat *QD;
01233 };
01234 
01235 class SVR_Q: public Kernel
01236 { 
01237 public:
01238     SVR_Q(const svm_problem& prob, const svm_parameter& param)
01239     :Kernel(prob.l, prob.x, param)
01240     {
01241         l = prob.l;
01242         cache = new Cache(l,(long int)(param.cache_size*(1l<<20)));
01243         QD = new Qfloat[2*l];
01244         sign = new schar[2*l];
01245         index = new int[2*l];
01246         for(int k=0;k<l;k++)
01247         {
01248             sign[k] = 1;
01249             sign[k+l] = -1;
01250             index[k] = k;
01251             index[k+l] = k;
01252             QD[k]= (Qfloat)kernel_function(k,k);
01253             QD[k+l]=QD[k];
01254         }
01255         buffer[0] = new Qfloat[2*l];
01256         buffer[1] = new Qfloat[2*l];
01257         next_buffer = 0;
01258     }
01259 
01260     void swap_index(int i, int j) const
01261     {
01262         swap(sign[i],sign[j]);
01263         swap(index[i],index[j]);
01264         swap(QD[i],QD[j]);
01265     }
01266     
01267     Qfloat *get_Q(int i, int len) const
01268     {
01269         Qfloat *data;
01270         int real_i = index[i];
01271         if(cache->get_data(real_i,&data,l) < l)
01272         {
01273             for(int j=0;j<l;j++)
01274                 data[j] = (Qfloat)kernel_function(real_i,j);
01275         }
01276 
01277         // reorder and copy
01278         Qfloat *buf = buffer[next_buffer];
01279         next_buffer = 1 - next_buffer;
01280         schar si = sign[i];
01281         for(int j=0;j<len;j++)
01282             buf[j] = si * sign[j] * data[index[j]];
01283         return buf;
01284     }
01285 
01286     Qfloat *get_QD() const
01287     {
01288         return QD;
01289     }
01290 
01291     ~SVR_Q()
01292     {
01293         delete cache;
01294         delete[] sign;
01295         delete[] index;
01296         delete[] buffer[0];
01297         delete[] buffer[1];
01298         delete[] QD;
01299     }
01300 private:
01301     int l;
01302     Cache *cache;
01303     schar *sign;
01304     int *index;
01305     mutable int next_buffer;
01306     Qfloat *buffer[2];
01307     Qfloat *QD;
01308 };
01309 
01310 //
01311 // construct and solve various formulations
01312 //
01313 static void solve_c_svc(
01314     const svm_problem *prob, const svm_parameter* param,
01315     double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01316 {
01317     int l = prob->l;
01318     double *minus_ones = new double[l];
01319     schar *y = new schar[l];
01320 
01321     int i;
01322 
01323     for(i=0;i<l;i++)
01324     {
01325         alpha[i] = 0;
01326         minus_ones[i] = -1;
01327         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01328     }
01329 
01330     Solver s;
01331     s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01332         alpha, Cp, Cn, param->eps, si, param->shrinking);
01333 
01334     double sum_alpha=0;
01335     for(i=0;i<l;i++)
01336         sum_alpha += alpha[i];
01337 
01338     if (Cp==Cn)
01339         SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
01340 
01341     for(i=0;i<l;i++)
01342         alpha[i] *= y[i];
01343 
01344     delete[] minus_ones;
01345     delete[] y;
01346 }
01347 
01348 static void solve_nu_svc(
01349     const svm_problem *prob, const svm_parameter *param,
01350     double *alpha, Solver::SolutionInfo* si)
01351 {
01352     int i;
01353     int l = prob->l;
01354     double nu = param->nu;
01355 
01356     schar *y = new schar[l];
01357 
01358     for(i=0;i<l;i++)
01359         if(prob->y[i]>0)
01360             y[i] = +1;
01361         else
01362             y[i] = -1;
01363 
01364     double sum_pos = nu*l/2;
01365     double sum_neg = nu*l/2;
01366 
01367     for(i=0;i<l;i++)
01368         if(y[i] == +1)
01369         {
01370             alpha[i] = min(1.0,sum_pos);
01371             sum_pos -= alpha[i];
01372         }
01373         else
01374         {
01375             alpha[i] = min(1.0,sum_neg);
01376             sum_neg -= alpha[i];
01377         }
01378 
01379     double *zeros = new double[l];
01380 
01381     for(i=0;i<l;i++)
01382         zeros[i] = 0;
01383 
01384     Solver_NU s;
01385     s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01386         alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01387     double r = si->r;
01388 
01389     SG_SINFO("C = %f\n",1/r);
01390 
01391     for(i=0;i<l;i++)
01392         alpha[i] *= y[i]/r;
01393 
01394     si->rho /= r;
01395     si->obj /= (r*r);
01396     si->upper_bound_p = 1/r;
01397     si->upper_bound_n = 1/r;
01398 
01399     delete[] y;
01400     delete[] zeros;
01401 }
01402 
01403 static void solve_one_class(
01404     const svm_problem *prob, const svm_parameter *param,
01405     double *alpha, Solver::SolutionInfo* si)
01406 {
01407     int l = prob->l;
01408     double *zeros = new double[l];
01409     schar *ones = new schar[l];
01410     int i;
01411 
01412     int n = (int)(param->nu*prob->l);   // # of alpha's at upper bound
01413 
01414     for(i=0;i<n;i++)
01415         alpha[i] = 1;
01416     if(n<prob->l)
01417         alpha[n] = param->nu * prob->l - n;
01418     for(i=n+1;i<l;i++)
01419         alpha[i] = 0;
01420 
01421     for(i=0;i<l;i++)
01422     {
01423         zeros[i] = 0;
01424         ones[i] = 1;
01425     }
01426 
01427     Solver s;
01428     s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01429         alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01430 
01431     delete[] zeros;
01432     delete[] ones;
01433 }
01434 
01435 static void solve_epsilon_svr(
01436     const svm_problem *prob, const svm_parameter *param,
01437     double *alpha, Solver::SolutionInfo* si)
01438 {
01439     int l = prob->l;
01440     double *alpha2 = new double[2*l];
01441     double *linear_term = new double[2*l];
01442     schar *y = new schar[2*l];
01443     int i;
01444 
01445     for(i=0;i<l;i++)
01446     {
01447         alpha2[i] = 0;
01448         linear_term[i] = param->p - prob->y[i];
01449         y[i] = 1;
01450 
01451         alpha2[i+l] = 0;
01452         linear_term[i+l] = param->p + prob->y[i];
01453         y[i+l] = -1;
01454     }
01455 
01456     Solver s;
01457     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01458         alpha2, param->C, param->C, param->eps, si, param->shrinking);
01459 
01460     double sum_alpha = 0;
01461     for(i=0;i<l;i++)
01462     {
01463         alpha[i] = alpha2[i] - alpha2[i+l];
01464         sum_alpha += fabs(alpha[i]);
01465     }
01466     SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
01467 
01468     delete[] alpha2;
01469     delete[] linear_term;
01470     delete[] y;
01471 }
01472 
01473 static void solve_nu_svr(
01474     const svm_problem *prob, const svm_parameter *param,
01475     double *alpha, Solver::SolutionInfo* si)
01476 {
01477     int l = prob->l;
01478     double C = param->C;
01479     double *alpha2 = new double[2*l];
01480     double *linear_term = new double[2*l];
01481     schar *y = new schar[2*l];
01482     int i;
01483 
01484     double sum = C * param->nu * l / 2;
01485     for(i=0;i<l;i++)
01486     {
01487         alpha2[i] = alpha2[i+l] = min(sum,C);
01488         sum -= alpha2[i];
01489 
01490         linear_term[i] = - prob->y[i];
01491         y[i] = 1;
01492 
01493         linear_term[i+l] = prob->y[i];
01494         y[i+l] = -1;
01495     }
01496 
01497     Solver_NU s;
01498     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01499         alpha2, C, C, param->eps, si, param->shrinking);
01500 
01501     SG_SINFO("epsilon = %f\n",-si->r);
01502 
01503     for(i=0;i<l;i++)
01504         alpha[i] = alpha2[i] - alpha2[i+l];
01505 
01506     delete[] alpha2;
01507     delete[] linear_term;
01508     delete[] y;
01509 }
01510 
01511 //
01512 // decision_function
01513 //
01514 struct decision_function
01515 {
01516     double *alpha;
01517     double rho; 
01518     double objective;
01519 };
01520 
01521 decision_function svm_train_one(
01522     const svm_problem *prob, const svm_parameter *param,
01523     double Cp, double Cn)
01524 {
01525     double *alpha = Malloc(double,prob->l);
01526     Solver::SolutionInfo si;
01527     switch(param->svm_type)
01528     {
01529         case C_SVC:
01530             solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01531             break;
01532         case NU_SVC:
01533             solve_nu_svc(prob,param,alpha,&si);
01534             break;
01535         case ONE_CLASS:
01536             solve_one_class(prob,param,alpha,&si);
01537             break;
01538         case EPSILON_SVR:
01539             solve_epsilon_svr(prob,param,alpha,&si);
01540             break;
01541         case NU_SVR:
01542             solve_nu_svr(prob,param,alpha,&si);
01543             break;
01544     }
01545 
01546     SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
01547 
01548     // output SVs
01549     if (param->svm_type != ONE_CLASS)
01550     {
01551         int nSV = 0;
01552         int nBSV = 0;
01553         for(int i=0;i<prob->l;i++)
01554         {
01555             if(fabs(alpha[i]) > 0)
01556             {
01557                 ++nSV;
01558                 if(prob->y[i] > 0)
01559                 {
01560                     if(fabs(alpha[i]) >= si.upper_bound_p)
01561                         ++nBSV;
01562                 }
01563                 else
01564                 {
01565                     if(fabs(alpha[i]) >= si.upper_bound_n)
01566                         ++nBSV;
01567                 }
01568             }
01569         }
01570         SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
01571     }
01572 
01573     decision_function f;
01574     f.alpha = alpha;
01575     f.rho = si.rho;
01576     f.objective=si.obj;
01577     return f;
01578 }
01579 
01580 //
01581 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
01582 // perm, length l, must be allocated before calling this subroutine
01583 void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
01584 {
01585     int l = prob->l;
01586     int max_nr_class = 16;
01587     int nr_class = 0;
01588     int *label = Malloc(int,max_nr_class);
01589     int *count = Malloc(int,max_nr_class);
01590     int *data_label = Malloc(int,l);    
01591     int i;
01592 
01593     for(i=0;i<l;i++)
01594     {
01595         int this_label = (int)prob->y[i];
01596         int j;
01597         for(j=0;j<nr_class;j++)
01598         {
01599             if(this_label == label[j])
01600             {
01601                 ++count[j];
01602                 break;
01603             }
01604         }
01605         data_label[i] = j;
01606         if(j == nr_class)
01607         {
01608             if(nr_class == max_nr_class)
01609             {
01610                 max_nr_class *= 2;
01611                 label = (int *)realloc(label,max_nr_class*sizeof(int));
01612                 count = (int *)realloc(count,max_nr_class*sizeof(int));
01613             }
01614             label[nr_class] = this_label;
01615             count[nr_class] = 1;
01616             ++nr_class;
01617         }
01618     }
01619 
01620     int *start = Malloc(int,nr_class);
01621     start[0] = 0;
01622     for(i=1;i<nr_class;i++)
01623         start[i] = start[i-1]+count[i-1];
01624     for(i=0;i<l;i++)
01625     {
01626         perm[start[data_label[i]]] = i;
01627         ++start[data_label[i]];
01628     }
01629     start[0] = 0;
01630     for(i=1;i<nr_class;i++)
01631         start[i] = start[i-1]+count[i-1];
01632 
01633     *nr_class_ret = nr_class;
01634     *label_ret = label;
01635     *start_ret = start;
01636     *count_ret = count;
01637     free(data_label);
01638 }
01639 
01640 //
01641 // Interface functions
01642 //
01643 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
01644 {
01645     svm_model *model = Malloc(svm_model,1);
01646     model->param = *param;
01647     model->free_sv = 0; // XXX
01648 
01649     if(param->svm_type == ONE_CLASS ||
01650        param->svm_type == EPSILON_SVR ||
01651        param->svm_type == NU_SVR)
01652     {
01653         SG_SINFO("training one class svm or doing epsilon sv regression\n");
01654 
01655         // regression or one-class-svm
01656         model->nr_class = 2;
01657         model->label = NULL;
01658         model->nSV = NULL;
01659         model->sv_coef = Malloc(double *,1);
01660         decision_function f = svm_train_one(prob,param,0,0);
01661         model->rho = Malloc(double,1);
01662         model->rho[0] = f.rho;
01663         model->objective = f.objective;
01664 
01665         int nSV = 0;
01666         int i;
01667         for(i=0;i<prob->l;i++)
01668             if(fabs(f.alpha[i]) > 0) ++nSV;
01669         model->l = nSV;
01670         model->SV = Malloc(svm_node *,nSV);
01671         model->sv_coef[0] = Malloc(double,nSV);
01672         int j = 0;
01673         for(i=0;i<prob->l;i++)
01674             if(fabs(f.alpha[i]) > 0)
01675             {
01676                 model->SV[j] = prob->x[i];
01677                 model->sv_coef[0][j] = f.alpha[i];
01678                 ++j;
01679             }       
01680 
01681         free(f.alpha);
01682     }
01683     else
01684     {
01685         // classification
01686         int l = prob->l;
01687         int nr_class;
01688         int *label = NULL;
01689         int *start = NULL;
01690         int *count = NULL;
01691         int *perm = Malloc(int,l);
01692 
01693         // group training data of the same class
01694         svm_group_classes(prob,&nr_class,&label,&start,&count,perm);        
01695         svm_node **x = Malloc(svm_node *,l);
01696         int i;
01697         for(i=0;i<l;i++)
01698             x[i] = prob->x[perm[i]];
01699 
01700         // calculate weighted C
01701 
01702         double *weighted_C = Malloc(double, nr_class);
01703         for(i=0;i<nr_class;i++)
01704             weighted_C[i] = param->C;
01705         for(i=0;i<param->nr_weight;i++)
01706         {   
01707             int j;
01708             for(j=0;j<nr_class;j++)
01709                 if(param->weight_label[i] == label[j])
01710                     break;
01711             if(j == nr_class)
01712                 fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
01713             else
01714                 weighted_C[j] *= param->weight[i];
01715         }
01716 
01717         // train k*(k-1)/2 models
01718         
01719         bool *nonzero = Malloc(bool,l);
01720         for(i=0;i<l;i++)
01721             nonzero[i] = false;
01722         decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
01723 
01724         int p = 0;
01725         for(i=0;i<nr_class;i++)
01726             for(int j=i+1;j<nr_class;j++)
01727             {
01728                 svm_problem sub_prob;
01729                 int si = start[i], sj = start[j];
01730                 int ci = count[i], cj = count[j];
01731                 sub_prob.l = ci+cj;
01732                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
01733                 sub_prob.y = Malloc(double,sub_prob.l+1); //dirty hack to surpress valgrind err
01734 
01735                 int k;
01736                 for(k=0;k<ci;k++)
01737                 {
01738                     sub_prob.x[k] = x[si+k];
01739                     sub_prob.y[k] = +1;
01740                 }
01741                 for(k=0;k<cj;k++)
01742                 {
01743                     sub_prob.x[ci+k] = x[sj+k];
01744                     sub_prob.y[ci+k] = -1;
01745                 }
01746                 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
01747                 
01748                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
01749                 for(k=0;k<ci;k++)
01750                     if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
01751                         nonzero[si+k] = true;
01752                 for(k=0;k<cj;k++)
01753                     if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
01754                         nonzero[sj+k] = true;
01755                 free(sub_prob.x);
01756                 free(sub_prob.y);
01757                 ++p;
01758             }
01759 
01760         // build output
01761 
01762         model->objective = f[0].objective;
01763         model->nr_class = nr_class;
01764         
01765         model->label = Malloc(int,nr_class);
01766         for(i=0;i<nr_class;i++)
01767             model->label[i] = label[i];
01768         
01769         model->rho = Malloc(double,nr_class*(nr_class-1)/2);
01770         for(i=0;i<nr_class*(nr_class-1)/2;i++)
01771             model->rho[i] = f[i].rho;
01772 
01773         int total_sv = 0;
01774         int *nz_count = Malloc(int,nr_class);
01775         model->nSV = Malloc(int,nr_class);
01776         for(i=0;i<nr_class;i++)
01777         {
01778             int nSV = 0;
01779             for(int j=0;j<count[i];j++)
01780                 if(nonzero[start[i]+j])
01781                 {   
01782                     ++nSV;
01783                     ++total_sv;
01784                 }
01785             model->nSV[i] = nSV;
01786             nz_count[i] = nSV;
01787         }
01788         
01789         SG_SINFO("Total nSV = %d\n",total_sv);
01790 
01791         model->l = total_sv;
01792         model->SV = Malloc(svm_node *,total_sv);
01793         p = 0;
01794         for(i=0;i<l;i++)
01795             if(nonzero[i]) model->SV[p++] = x[i];
01796 
01797         int *nz_start = Malloc(int,nr_class);
01798         nz_start[0] = 0;
01799         for(i=1;i<nr_class;i++)
01800             nz_start[i] = nz_start[i-1]+nz_count[i-1];
01801 
01802         model->sv_coef = Malloc(double *,nr_class-1);
01803         for(i=0;i<nr_class-1;i++)
01804             model->sv_coef[i] = Malloc(double,total_sv);
01805 
01806         p = 0;
01807         for(i=0;i<nr_class;i++)
01808             for(int j=i+1;j<nr_class;j++)
01809             {
01810                 // classifier (i,j): coefficients with
01811                 // i are in sv_coef[j-1][nz_start[i]...],
01812                 // j are in sv_coef[i][nz_start[j]...]
01813 
01814                 int si = start[i];
01815                 int sj = start[j];
01816                 int ci = count[i];
01817                 int cj = count[j];
01818                 
01819                 int q = nz_start[i];
01820                 int k;
01821                 for(k=0;k<ci;k++)
01822                     if(nonzero[si+k])
01823                         model->sv_coef[j-1][q++] = f[p].alpha[k];
01824                 q = nz_start[j];
01825                 for(k=0;k<cj;k++)
01826                     if(nonzero[sj+k])
01827                         model->sv_coef[i][q++] = f[p].alpha[ci+k];
01828                 ++p;
01829             }
01830         
01831         free(label);
01832         free(count);
01833         free(perm);
01834         free(start);
01835         free(x);
01836         free(weighted_C);
01837         free(nonzero);
01838         for(i=0;i<nr_class*(nr_class-1)/2;i++)
01839             free(f[i].alpha);
01840         free(f);
01841         free(nz_count);
01842         free(nz_start);
01843     }
01844     return model;
01845 }
01846 
01847 const char *svm_type_table[] =
01848 {
01849     "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
01850 };
01851 
01852 const char *kernel_type_table[]=
01853 {
01854     "linear","polynomial","rbf","sigmoid","precomputed",NULL
01855 };
01856 
01857 void svm_destroy_model(svm_model* model)
01858 {
01859     if(model->free_sv && model->l > 0)
01860         free((void *)(model->SV[0]));
01861     for(int i=0;i<model->nr_class-1;i++)
01862         free(model->sv_coef[i]);
01863     free(model->SV);
01864     free(model->sv_coef);
01865     free(model->rho);
01866     free(model->label);
01867     free(model->nSV);
01868     free(model);
01869 }
01870 
01871 void svm_destroy_param(svm_parameter* param)
01872 {
01873     free(param->weight_label);
01874     free(param->weight);
01875 }
01876 
01877 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
01878 {
01879     // svm_type
01880 
01881     int svm_type = param->svm_type;
01882     if(svm_type != C_SVC &&
01883        svm_type != NU_SVC &&
01884        svm_type != ONE_CLASS &&
01885        svm_type != EPSILON_SVR &&
01886        svm_type != NU_SVR)
01887         return "unknown svm type";
01888     
01889     // kernel_type, degree
01890     
01891     int kernel_type = param->kernel_type;
01892     if(kernel_type != LINEAR &&
01893        kernel_type != POLY &&
01894        kernel_type != RBF &&
01895        kernel_type != SIGMOID &&
01896        kernel_type != PRECOMPUTED)
01897         return "unknown kernel type";
01898 
01899     if(param->degree < 0)
01900         return "degree of polynomial kernel < 0";
01901 
01902     // cache_size,eps,C,nu,p,shrinking
01903 
01904     if(param->cache_size <= 0)
01905         return "cache_size <= 0";
01906 
01907     if(param->eps <= 0)
01908         return "eps <= 0";
01909 
01910     if(svm_type == C_SVC ||
01911        svm_type == EPSILON_SVR ||
01912        svm_type == NU_SVR)
01913         if(param->C <= 0)
01914             return "C <= 0";
01915 
01916     if(svm_type == NU_SVC ||
01917        svm_type == ONE_CLASS ||
01918        svm_type == NU_SVR)
01919         if(param->nu <= 0 || param->nu > 1)
01920             return "nu <= 0 or nu > 1";
01921 
01922     if(svm_type == EPSILON_SVR)
01923         if(param->p < 0)
01924             return "p < 0";
01925 
01926     if(param->shrinking != 0 &&
01927        param->shrinking != 1)
01928         return "shrinking != 0 and shrinking != 1";
01929 
01930 
01931     // check whether nu-svc is feasible
01932     
01933     if(svm_type == NU_SVC)
01934     {
01935         int l = prob->l;
01936         int max_nr_class = 16;
01937         int nr_class = 0;
01938         int *label = Malloc(int,max_nr_class);
01939         int *count = Malloc(int,max_nr_class);
01940 
01941         int i;
01942         for(i=0;i<l;i++)
01943         {
01944             int this_label = (int)prob->y[i];
01945             int j;
01946             for(j=0;j<nr_class;j++)
01947                 if(this_label == label[j])
01948                 {
01949                     ++count[j];
01950                     break;
01951                 }
01952             if(j == nr_class)
01953             {
01954                 if(nr_class == max_nr_class)
01955                 {
01956                     max_nr_class *= 2;
01957                     label = (int *)realloc(label,max_nr_class*sizeof(int));
01958                     count = (int *)realloc(count,max_nr_class*sizeof(int));
01959                 }
01960                 label[nr_class] = this_label;
01961                 count[nr_class] = 1;
01962                 ++nr_class;
01963             }
01964         }
01965     
01966         for(i=0;i<nr_class;i++)
01967         {
01968             int n1 = count[i];
01969             for(int j=i+1;j<nr_class;j++)
01970             {
01971                 int n2 = count[j];
01972                 if(param->nu*(n1+n2)/2 > min(n1,n2))
01973                 {
01974                     free(label);
01975                     free(count);
01976                     return "specified nu is infeasible";
01977                 }
01978             }
01979         }
01980         free(label);
01981         free(count);
01982     }
01983 
01984     return NULL;
01985 }

SHOGUN Machine Learning Toolbox - Documentation