Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

wvoggspeex.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * Provides a WvEncoder abstraction for Ogg Speex audio streams 00006 * suitable for encoding voice at low bitrates. 00007 * 00008 * Only monaural audio is supported for now. 00009 */ 00010 #ifndef __WVOGGSPEEX_H 00011 #define __WVOGGSPEEX_H 00012 00013 #include "wvstringlist.h" 00014 #include "wvtypedencoder.h" 00015 #include "wvspeex.h" 00016 #include <ogg/ogg.h> 00017 00018 /** 00019 * Encodes PCM audio using the Ogg Speex stream format. 00020 * 00021 * Input buffer must contain a sequence of signed 'float' type 00022 * values in machine order representing normalized PCM 00023 * audio data. 00024 * 00025 * Outbut buffer will contain part of an Ogg Speex bitstream. 00026 * 00027 * @see WvSpeexEncoder 00028 */ 00029 class WvOggSpeexEncoder : 00030 public WvTypedEncoder<float, unsigned char> 00031 { 00032 WvSpeexEncoder *speexenc; 00033 ogg_int64_t packetno; 00034 WvString _vendor; 00035 WvStringList _comments; 00036 ogg_stream_state *oggstream; 00037 bool wrote_headers; 00038 00039 WvInPlaceBuf framebuf; 00040 00041 public: 00042 static const long RANDOM_SERIALNO = 0; 00043 00044 /** 00045 * Creates an Ogg Speex Encoder. 00046 * 00047 * The special constant RANDOM_SERIALNO may be specified as the 00048 * serial number to let the encoder choose one at random. The 00049 * implementation uses the rand() function and assumes that 00050 * the PRNG was previously seeded with srand(). 00051 * 00052 * "bitrate" is the bitrate specification 00053 * "samplingrate" is the number of samples per second, 00054 * preferably one of 8000, 16000, or 32000 00055 * "channels" is number of channels (must be 1 for now), 00056 * defaults to 1 00057 * "mode" is the Speex codec mode to use or 00058 * WvSpeex::DEFAULT_MODE to select one automatically 00059 * based on the sampling rate, this is the default 00060 * "complexity" is a measure of the amount of CPU 00061 * resources that should be allocated to the encoder, 00062 * ranges from 0 to 10 or WvSpeex::DEFAULT_COMPLEXITY 00063 * the encoder default, this is the default 00064 * "serialno" is the Ogg bitstream serial number 00065 */ 00066 WvOggSpeexEncoder(const WvSpeex::BitrateSpec &bitratespec, 00067 int samplingrate, int channels = 1, 00068 WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE, 00069 int complexity = WvSpeex::DEFAULT_COMPLEXITY, 00070 long serialno = RANDOM_SERIALNO); 00071 00072 virtual ~WvOggSpeexEncoder(); 00073 00074 /** 00075 * Adds a comment to the Ogg Speex stream. 00076 * 00077 * Do not call after the first invocation of encode(). 00078 * 00079 * "comment" is the comment 00080 */ 00081 void add_comment(WvStringParm comment); 00082 00083 /** 00084 * Adds a comment to the Ogg Speex stream. 00085 * 00086 * Do not call after the first invocation of encode(). 00087 * 00088 */ 00089 void add_comment(WVSTRING_FORMAT_DECL) 00090 { add_comment(WvString(WVSTRING_FORMAT_CALL)); } 00091 00092 /** 00093 * Adds a tag to the Ogg Speex stream. 00094 * 00095 * Ogg Speex tags are special comment strings of the form 00096 * "=" and are typically used to store artist, 00097 * date, and other simple string encoded metadata. 00098 * 00099 * Do not call after the first invocation of encode(). 00100 * 00101 * "tag" is the tag name 00102 * "value" is the value 00103 */ 00104 void add_tag(WvStringParm tag, WvStringParm value); 00105 00106 /** 00107 * Returns the sampling rate. 00108 * Returns: the sampling rate 00109 */ 00110 int samplingrate() const 00111 { return speexenc->samplingrate(); } 00112 00113 /** 00114 * Returns the number of channels. 00115 * Returns: the number of channels 00116 */ 00117 int channels() const 00118 { return speexenc->channels(); } 00119 00120 /** 00121 * Returns the number of samples per frame. 00122 * Returns: the frame size 00123 */ 00124 int samplesperframe() const 00125 { return speexenc->samplesperframe(); } 00126 00127 /** 00128 * Returns the current encoding mode. 00129 * Returns: the encoding mode 00130 */ 00131 WvSpeex::CodecMode mode() const 00132 { return speexenc->mode(); } 00133 00134 /** 00135 * Returns true if variable bitrate support has been enabled. 00136 * Returns: true if it is enabled 00137 */ 00138 bool vbr() const 00139 { return speexenc->vbr(); } 00140 00141 /** 00142 * Returns the nominal bitrate. 00143 * Returns: the bitrate, or -1 if not specified or not meaningful 00144 */ 00145 int nominalbitrate() const 00146 { return speexenc->nominalbitrate(); } 00147 00148 protected: 00149 virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf, 00150 bool flush); 00151 virtual bool _typedfinish(OBuffer &outbuf); 00152 virtual bool _isok() const; 00153 virtual WvString _geterror() const; 00154 00155 private: 00156 bool write_headers(OBuffer &outbuf); 00157 bool write_eof(OBuffer &outbuf); 00158 bool write_stream(OBuffer &outbuf, bool flush = false); 00159 }; 00160 00161 00162 /** 00163 * Decodes PCM audio using the Ogg Speex stream format. 00164 * 00165 * Inbut buffer must contain part of an Ogg Speex bitstream. 00166 * 00167 * Output buffer will contain a sequence of signed 'float' type 00168 * values in machine order representing normalized PCM 00169 * audio data. 00170 * 00171 * If flush == false, then encode() will return true immediately 00172 * after isheaderok() becomes true without outputting any audio 00173 * data. This allows the client to examine the header and to 00174 * tailor the actual decoding process based on that information. 00175 * 00176 * @see WvSpeexDecoder 00177 */ 00178 class WvOggSpeexDecoder : 00179 public WvTypedEncoder<unsigned char, float> 00180 { 00181 WvSpeexDecoder *speexdec; 00182 bool forcepostfilter; 00183 WvString _vendor; 00184 WvStringList _comments; 00185 bool _vbr; // extracted from header 00186 int _nominalbitrate; // extracted from header 00187 00188 public: 00189 /** 00190 * Creates an Ogg Speex Decoder. 00191 * 00192 * For now, if the input bitstream is stereo, outputs the left 00193 * channel only. This behaviour may change later on. 00194 * 00195 */ 00196 WvOggSpeexDecoder(); 00197 virtual ~WvOggSpeexDecoder(); 00198 00199 /** 00200 * Returns true when the entire stream header has been processed 00201 * and the comments and vendor fields are valid. 00202 * 00203 * If false and isok(), try decoding more data. 00204 * 00205 * Returns: true when the header has been decoded 00206 */ 00207 bool isheaderok() const; 00208 00209 /** 00210 * Returns the Ogg Speex vendor comment string. 00211 * 00212 * Returns: the vendor comment 00213 */ 00214 WvString vendor() const 00215 { return _vendor; } 00216 00217 /** 00218 * Returns the Ogg Speex list of user comments. 00219 * 00220 * The list is owned by the encoder, do not change. 00221 * 00222 * Returns: the list of comments 00223 */ 00224 WvStringList &comments() 00225 { return _comments; } 00226 00227 /** 00228 * Returns the number of channels in the stream. 00229 * Does not return useful information unless isheaderok() == true. 00230 * Returns: the number of channels, non-negative 00231 */ 00232 int channels() const; 00233 00234 /** 00235 * Returns the sampling rate of the stream. 00236 * Does not return useful information unless isheaderok() == true. 00237 * Returns: the sampling rate 00238 */ 00239 int samplingrate() const; 00240 00241 /** 00242 * Returns the number of samples per frame. 00243 * Does not return useful information unless isheaderok() == true. 00244 * Returns: the frame size 00245 */ 00246 int samplesperframe() const; 00247 00248 /** 00249 * Returns the current encoding mode. 00250 * Does not return useful information unless isheaderok() == true. 00251 * Returns: the encoding mode 00252 */ 00253 WvSpeex::CodecMode mode() const; 00254 00255 /** 00256 * Returns true if variable bitrate support has been enabled. 00257 * Does not return useful information unless isheaderok() == true. 00258 * Returns: true if it is enabled 00259 */ 00260 bool vbr() const; 00261 00262 /** 00263 * Returns the nominal bitrate. 00264 * Does not return useful information unless isheaderok() == true. 00265 * Returns: the bitrate, or -1 if not specified or not meaningful 00266 */ 00267 int nominalbitrate() const; 00268 00269 /** 00270 * Determines if the perceptual enhancement post-filter is enabled. 00271 * Returns: true if it is enabled 00272 */ 00273 bool postfilter() const; 00274 00275 /** 00276 * Enables or disables the perceptual enhancement post-filter. 00277 * "enable" is true or false 00278 */ 00279 void setpostfilter(bool enable); 00280 00281 protected: 00282 virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf, 00283 bool flush); 00284 virtual bool _typedfinish(OBuffer &outbuf); 00285 virtual bool _isok() const; 00286 virtual WvString _geterror() const; 00287 00288 private: 00289 ogg_sync_state *oggsync; 00290 ogg_stream_state *oggstream; 00291 ogg_page *oggpage; 00292 bool need_serialno; 00293 int need_headers; 00294 00295 bool process_page(ogg_page *oggpage, OBuffer &outbuf); 00296 bool process_packet(ogg_packet *oggpacket, OBuffer &outbuf); 00297 bool prepare_stream(long serialno); 00298 00299 bool process_speex_header(ogg_packet *header); 00300 bool process_comment_header(ogg_packet *header); 00301 }; 00302 00303 #endif // __WVOGGSPEEX_H

Generated on Tue Oct 5 01:09:20 2004 for WvStreams by doxygen 1.3.7