OligoKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2008 Christian Igel, Tobias Glasmachers
00008  * Copyright (C) 2008 Christian Igel, Tobias Glasmachers
00009  *
00010  * Shogun adjustments (w) 2008 Soeren Sonnenburg
00011  */
00012 #ifndef _OLIGOKERNEL_H_
00013 #define _OLIGOKERNEL_H_
00014 
00015 #include "kernel/StringKernel.h"
00016 
00017 #include <vector>
00018 #include <string>
00019 #include <utility>
00020 
00039 class COligoKernel : public CStringKernel<char>
00040 {
00041     public:
00047         COligoKernel(int32_t cache_size, int32_t k, float64_t width);
00048 
00050         ~COligoKernel();
00051 
00058         virtual bool init(CFeatures* l, CFeatures* r);
00059 
00064         virtual bool load_init(FILE*)
00065         {
00066             return false;
00067         }
00068 
00073         virtual bool save_init(FILE*)
00074         {
00075             return false;
00076         }
00077 
00082         virtual EKernelType get_kernel_type() { return K_OLIGO; }
00083 
00088         virtual const char* get_name() { return "Oligo"; }
00089 
00090 
00091         virtual float64_t compute(int32_t x, int32_t y);
00092 
00093     protected:
00107         static void encodeOligo(
00108             const std::string& sequence, uint32_t k_mer_length,
00109             const std::string& allowed_characters,
00110             std::vector< std::pair<int32_t, float64_t> >&   values);
00111 
00119         static void getSequences(
00120             const std::vector<std::string>& sequences,
00121             uint32_t k_mer_length, const std::string& allowed_characters,
00122             std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences);
00123 
00134         static void getExpFunctionCache(
00135             float64_t sigma, uint32_t sequence_length,
00136             std::vector<float64_t>&  cache);
00137 
00153         static float64_t kernelOligoFast(
00154             const std::vector< std::pair<int32_t, float64_t> >& x,
00155             const std::vector< std::pair<int32_t, float64_t> >& y,
00156             const std::vector<float64_t>& exp_cache,
00157             int32_t max_distance = -1);
00158 
00166         static float64_t kernelOligo(
00167             const std::vector< std::pair<int32_t, float64_t> >& x,
00168             const std::vector< std::pair<int32_t, float64_t> >& y,
00169             float64_t sigma_square);
00170 
00171     private: 
00172         static bool cmpOligos_(std::pair<int32_t, float64_t> a,
00173             std::pair<int32_t, float64_t> b );
00174 
00175     protected:
00177         int32_t k;
00179         float64_t width;
00180 };
00181 #endif // KERNEL_OLIGO_H

SHOGUN Machine Learning Toolbox - Documentation