00001 /* -*- Mode: C++ -*- 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * Provides a WvEncoder abstraction for the Speex audio packet format. 00006 * suitable for encoding voice at low bitrates. 00007 * 00008 * Only monaural audio is supported for now. 00009 */ 00010 #ifndef __WVSPEEX_H 00011 #define __WVSPEEX_H 00012 00013 #include "wvaudioencoder.h" 00014 00015 struct SpeexMode; 00016 struct SpeexBits; 00017 00018 namespace WvSpeex 00019 { 00020 /** The default encoder complexity level. */ 00021 static const int DEFAULT_COMPLEXITY = -1; 00022 00023 /** 00024 * Describes an encoding algorithm used by the Speex codec. 00025 * Might also take on values not listed in the enum at this 00026 * time due to future codec enhancements. 00027 */ 00028 enum CodecMode 00029 { 00030 DEFAULT_MODE = -1, /*!< Chosen based on the sampling rate */ 00031 NARROWBAND_MODE = 0, /*!< Narrowband ~8khz, 20ms frames */ 00032 WIDEBAND_MODE = 1, /*!< Wideband ~16khz, ?ms frames */ 00033 ULTRAWIDEBAND_MODE = 2 /*!< Ultrawideband ~32khz, ?ms frames */ 00034 }; 00035 00036 /** 00037 * Bitrate specification. 00038 * 00039 * Identifies a particular bitrate control mechanism. 00040 * Use one of the subclasses to initialize a suitable BitrateSpec. 00041 * 00042 */ 00043 class BitrateSpec 00044 { 00045 public: 00046 // TODO: check whether VBR_BITRATE is a valid mode 00047 enum Mode { VBR_QUALITY, CBR_QUALITY, CBR_BITRATE }; 00048 Mode mode; 00049 float quality_index; 00050 int nominal_bitrate; 00051 00052 protected: 00053 BitrateSpec(Mode mode) : mode(mode) { } 00054 00055 public: 00056 // allow creation of uninitialized objects for later assignment 00057 BitrateSpec() { } 00058 }; 00059 00060 /** 00061 * Specifies a variable bitrate based on a quality index ranging 00062 * from 0.0 (low quality) to 1.0 (high quality). 00063 */ 00064 class VBRQuality : public BitrateSpec 00065 { 00066 public: 00067 /** 00068 * Creates a bitrate specification. 00069 * "quality" is the quality index 00070 */ 00071 VBRQuality(float quality) : BitrateSpec(VBR_QUALITY) 00072 { 00073 quality_index = quality; 00074 } 00075 }; 00076 00077 /** 00078 * Specifies a constant bitrate specified in bits per second. 00079 * 00080 * The encoder may adjust the bitrate according to internal 00081 * constraints, but guarantees that it will not select a 00082 * bitrate larger than that specified here. 00083 */ 00084 class CBRBitrate : public BitrateSpec 00085 { 00086 public: 00087 /** 00088 * Creates a bitrate specification. 00089 * "nominal" is the nominal bitrate 00090 */ 00091 CBRBitrate(int nominal) : BitrateSpec(CBR_BITRATE) 00092 { 00093 nominal_bitrate = nominal; 00094 } 00095 }; 00096 00097 /** 00098 * Specifies a constant bitrate based on a quality index ranging 00099 * from 0.0 (low quality) to 1.0 (high quality). 00100 */ 00101 class CBRQuality : public BitrateSpec 00102 { 00103 public: 00104 /** 00105 * Creates a bitrate specification. 00106 * "bitrate" is the fixed bitrate 00107 */ 00108 CBRQuality(float quality) : BitrateSpec(CBR_QUALITY) 00109 { 00110 quality_index = quality; 00111 } 00112 }; 00113 }; // namespace 00114 00115 00116 00117 /** 00118 * Encodes PCM audio using the Speex audio packet format. 00119 * 00120 * Input buffer must contain a sequence of signed 'float' type 00121 * values in machine order representing unnormalized PCM 00122 * audio data. 00123 * 00124 * Outbut buffer will contain a sequence of Speex packets. Each 00125 * invocation of encode() with flush == false will generate 00126 * precisely one Speex packet suitable for use with unreliable 00127 * datagram transmission protocols that guarantee serial packet 00128 * order on reception. Each packet contains one frame of exactly 00129 * 20ms of encoded audio in narrowband mode (sampling rate 00130 * <= 12.5Khz). 00131 * 00132 * Warning: Never invoke encode() with flush == true unless 00133 * the input buffer contains exactly zero or one frame of audio. 00134 * Speex packets do not contain any delimiters therefore it is not 00135 * possible to locate the boundary between adjacent packets unless 00136 * they are encapsulated as individual datagrams in some fashion. 00137 * With flush == true, multiple adjacent generated packets will run 00138 * together to form one large undecodable lump. 00139 * 00140 * For archival purposes or for streaming, consider using 00141 * WvOggSpeexEncoder. 00142 * 00143 * For encoding music or other non-speech audio, consider using 00144 * WvOggVorbisEncoder. 00145 * 00146 */ 00147 class WvSpeexEncoder : public WvAudioEncoder 00148 { 00149 void *spxstate; 00150 SpeexBits *spxbits; 00151 SpeexMode *spxmode; 00152 unsigned int _channels; 00153 size_t _samplesperframe; 00154 00155 public: 00156 00157 /** 00158 * Creates a Speex Encoder. 00159 * 00160 * "bitrate" is the bitrate specification 00161 * "samplingrate" is the number of samples per second, 00162 * preferably one of 8000, 16000, or 32000 00163 * "channels" is number of channels (must be 1 for now), 00164 * defaults to 1 00165 * "mode" is the Speex codec mode to use or 00166 * WvSpeex::DEFAULT_MODE to select one automatically 00167 * based on the sampling rate, this is the default 00168 * "complexity" is a measure of the amount of CPU 00169 * resources that should be allocated to the encoder, 00170 * ranges from 0 to 10 or WvSpeex::DEFAULT_COMPLEXITY 00171 * the encoder default, this is the default 00172 */ 00173 WvSpeexEncoder(const WvSpeex::BitrateSpec &bitratespec, 00174 int samplingrate, unsigned int channels = 1, 00175 WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE, 00176 int complexity = WvSpeex::DEFAULT_COMPLEXITY); 00177 00178 virtual ~WvSpeexEncoder(); 00179 00180 /** 00181 * Returns the sampling rate. 00182 * Returns: the sampling rate 00183 */ 00184 int samplingrate() const; 00185 00186 /** 00187 * Returns the number of channels. 00188 * Returns: the number of channels 00189 */ 00190 virtual unsigned int channels() const 00191 { return _channels; } 00192 00193 /** 00194 * Returns the number of samples per frame. 00195 * Returns: the frame size 00196 */ 00197 virtual size_t samplesperframe() const 00198 { return _samplesperframe; } 00199 00200 /** 00201 * Returns the current encoding mode. 00202 * Returns: the encoding mode 00203 */ 00204 WvSpeex::CodecMode mode() const; 00205 00206 /** 00207 * Returns true if variable bitrate support has been enabled. 00208 * Returns: true if it is enabled 00209 */ 00210 bool vbr() const; 00211 00212 /** 00213 * Returns the nominal bitrate. 00214 * Returns: the bitrate, or -1 if not specified or not meaningful 00215 */ 00216 int nominalbitrate() const; 00217 00218 protected: 00219 virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf, 00220 bool flush); 00221 virtual bool _typedfinish(OBuffer &outbuf); 00222 00223 private: 00224 bool flushspxbits(OBuffer &outbuf); 00225 }; 00226 00227 00228 00229 /** 00230 * Decodes PCM audio using the Speex audio packet format. 00231 * 00232 * Inbut buffer must contain a sequence of Speex packets. 00233 * 00234 * Output buffer will contain a sequence of signed 'float' type 00235 * values in machine order representing unnormalized PCM 00236 * audio data. 00237 * 00238 * Missing audio due to lost or damaged packets may be filled in 00239 * by making predictions (guesses) based on residual energy 00240 * information from previous ones. The number of lost or damaged 00241 * packets must be known in order to calculate how much new audio 00242 * must be synthesized. This technique works well to conceal 00243 * occasional dropouts but not long strings of lost packets. 00244 * Still, Speech is still surprizingly recognizable with average 00245 * packet losses of up to 25% to 50%! 00246 * 00247 * Warning: Never invoke encode() unless the input buffer 00248 * contains exactly zero or one Speex packets. Speex packets 00249 * do not contain any delimiters therefore it is not possible to 00250 * locate the boundary between adjacent packets unless they are 00251 * encapsulated as individual datagrams in some fashion. 00252 * Multiple adjacent packets cannot be decoded at once. 00253 * 00254 * For archival purposes or for streaming, consider using 00255 * WvOggSpeexDecoder. 00256 * 00257 * For encoding music or other non-speech audio, consider using 00258 * WvOggVorbisDecoder. 00259 */ 00260 class WvSpeexDecoder : public WvAudioDecoder 00261 { 00262 int _samplingrate; 00263 unsigned int _channels; 00264 00265 void *spxstate; 00266 SpeexBits *spxbits; 00267 SpeexMode *spxmode; 00268 size_t _samplesperframe; 00269 00270 public: 00271 /** 00272 * Creates a Speex Decoder. 00273 * 00274 * For now, if the input bitstream is stereo, outputs the left 00275 * channel only. This behaviour may change later on. 00276 * 00277 * "samplingrate" is the number of samples per second, 00278 * preferably one of 8000, 16000, or 32000 00279 * "channels" is number of channels (must be 1 for now), 00280 * defaults to 1 00281 * "mode" is the Speex codec mode to use or 00282 * WvSpeex::DEFAULT_MODE to select one automatically 00283 * based on the sampling rate, this is the default 00284 */ 00285 WvSpeexDecoder(int samplingrate, unsigned int channels = 1, 00286 WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE); 00287 00288 virtual ~WvSpeexDecoder(); 00289 00290 /** 00291 * Synthesizes one audio frame to compensate for a missing packet. 00292 * "outbuf" is the output buffer 00293 * Returns: true on success 00294 * @see encode 00295 */ 00296 virtual bool missing(OBuffer &outbuf); 00297 00298 /** 00299 * Returns the number of channels in the stream. 00300 * Returns: the number of channels, non-negative 00301 */ 00302 virtual unsigned int channels() const 00303 { return _channels; } 00304 00305 /** 00306 * Returns the sampling rate of the stream. 00307 * Returns: the sampling rate 00308 */ 00309 int samplingrate() const 00310 { return _samplingrate; } 00311 00312 /** 00313 * Returns the number of samples per frame. 00314 * Returns: the frame size 00315 */ 00316 virtual size_t samplesperframe() const 00317 { return _samplesperframe; } 00318 00319 /** 00320 * Returns the current encoding mode. 00321 * Returns: the encoding mode 00322 */ 00323 WvSpeex::CodecMode mode() const; 00324 00325 /** 00326 * Determines if the perceptual enhancement post-filter is enabled. 00327 * Returns: true if it is enabled 00328 */ 00329 bool postfilter() const; 00330 00331 /** 00332 * Enables or disables the perceptual enhancement post-filter. 00333 * "enable" is true or false 00334 */ 00335 void setpostfilter(bool enable); 00336 00337 protected: 00338 virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf, 00339 bool flush); 00340 virtual bool _typedfinish(OBuffer &outbuf); 00341 }; 00342 00343 #endif // __WVSPEEX_H