Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

wvoggspeex.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*-
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  *
00005  * Provides a WvEncoder abstraction for Ogg Speex audio streams
00006  * suitable for encoding voice at low bitrates.
00007  *
00008  * Only monaural audio is supported for now.
00009  */
00010 #ifndef __WVOGGSPEEX_H
00011 #define __WVOGGSPEEX_H
00012 
00013 #include "wvstringlist.h"
00014 #include "wvtypedencoder.h"
00015 #include "wvspeex.h"
00016 #include <ogg/ogg.h>
00017 
00018 /**
00019  * Encodes PCM audio using the Ogg Speex stream format.
00020  * 
00021  * Input buffer must contain a sequence of signed 'float' type
00022  * values in machine order representing normalized PCM
00023  * audio data.
00024  * 
00025  * Outbut buffer will contain part of an Ogg Speex bitstream.
00026  * 
00027  * @see WvSpeexEncoder
00028  */
00029 class WvOggSpeexEncoder :
00030     public WvTypedEncoder<float, unsigned char>
00031 {
00032     WvSpeexEncoder *speexenc;
00033     ogg_int64_t packetno;
00034     WvString _vendor;
00035     WvStringList _comments;
00036     ogg_stream_state *oggstream;
00037     bool wrote_headers;
00038     
00039     WvInPlaceBuf framebuf;
00040 
00041 public:
00042     static const long RANDOM_SERIALNO = 0;
00043 
00044     /**
00045      * Creates an Ogg Speex Encoder.
00046      * 
00047      * The special constant RANDOM_SERIALNO may be specified as the
00048      * serial number to let the encoder choose one at random.  The
00049      * implementation uses the rand() function and assumes that
00050      * the PRNG was previously seeded with srand().
00051      * 
00052      * "bitrate" is the bitrate specification
00053      * "samplingrate" is the number of samples per second,
00054      *        preferably one of 8000, 16000, or 32000
00055      * "channels" is number of channels (must be 1 for now),
00056      *        defaults to 1
00057      * "mode" is the Speex codec mode to use or
00058      *        WvSpeex::DEFAULT_MODE to select one automatically
00059      *        based on the sampling rate, this is the default
00060      * "complexity" is a measure of the amount of CPU
00061      *        resources that should be allocated to the encoder,
00062      *        ranges from 0 to 10 or WvSpeex::DEFAULT_COMPLEXITY
00063      *        the encoder default, this is the default
00064      * "serialno" is the Ogg bitstream serial number
00065      */
00066     WvOggSpeexEncoder(const WvSpeex::BitrateSpec &bitratespec,
00067         int samplingrate, int channels = 1,
00068         WvSpeex::CodecMode mode = WvSpeex::DEFAULT_MODE,
00069         int complexity = WvSpeex::DEFAULT_COMPLEXITY,
00070         long serialno = RANDOM_SERIALNO);
00071 
00072     virtual ~WvOggSpeexEncoder();
00073 
00074     /**
00075      * Adds a comment to the Ogg Speex stream.
00076      * 
00077      * Do not call after the first invocation of encode().
00078      * 
00079      * "comment" is the comment
00080      */
00081     void add_comment(WvStringParm comment);
00082     
00083     /**
00084      * Adds a comment to the Ogg Speex stream.
00085      * 
00086      * Do not call after the first invocation of encode().
00087      * 
00088      */
00089     void add_comment(WVSTRING_FORMAT_DECL)
00090         { add_comment(WvString(WVSTRING_FORMAT_CALL)); }
00091     
00092     /**
00093      * Adds a tag to the Ogg Speex stream.
00094      * 
00095      * Ogg Speex tags are special comment strings of the form
00096      * "=" and are typically used to store artist,
00097      * date, and other simple string encoded metadata.
00098      * 
00099      * Do not call after the first invocation of encode().
00100      * 
00101      * "tag" is the tag name
00102      * "value" is the value
00103      */
00104     void add_tag(WvStringParm tag, WvStringParm value);
00105 
00106     /**
00107      * Returns the sampling rate.
00108      * Returns: the sampling rate
00109      */
00110     int samplingrate() const
00111         { return speexenc->samplingrate(); }
00112 
00113     /**
00114      * Returns the number of channels.
00115      * Returns: the number of channels
00116      */
00117     int channels() const
00118         { return speexenc->channels(); }
00119 
00120     /**
00121      * Returns the number of samples per frame.
00122      * Returns: the frame size
00123      */
00124     int samplesperframe() const
00125         { return speexenc->samplesperframe(); }
00126 
00127     /**
00128      * Returns the current encoding mode.
00129      * Returns: the encoding mode
00130      */
00131     WvSpeex::CodecMode mode() const
00132         { return speexenc->mode(); }
00133 
00134     /**
00135      * Returns true if variable bitrate support has been enabled.
00136      * Returns: true if it is enabled
00137      */
00138     bool vbr() const
00139         { return speexenc->vbr(); }
00140 
00141     /**
00142      * Returns the nominal bitrate.
00143      * Returns: the bitrate, or -1 if not specified or not meaningful
00144      */
00145     int nominalbitrate() const
00146         { return speexenc->nominalbitrate(); }
00147 
00148 protected:
00149     virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf,
00150         bool flush);
00151     virtual bool _typedfinish(OBuffer &outbuf);
00152     virtual bool _isok() const;
00153     virtual WvString _geterror() const;
00154 
00155 private:
00156     bool write_headers(OBuffer &outbuf);
00157     bool write_eof(OBuffer &outbuf);
00158     bool write_stream(OBuffer &outbuf, bool flush = false);
00159 };
00160 
00161 
00162 /**
00163  * Decodes PCM audio using the Ogg Speex stream format.
00164  * 
00165  * Inbut buffer must contain part of an Ogg Speex bitstream.
00166  * 
00167  * Output buffer will contain a sequence of signed 'float' type
00168  * values in machine order representing normalized PCM
00169  * audio data.
00170  * 
00171  * If flush == false, then encode() will return true immediately
00172  * after isheaderok() becomes true without outputting any audio
00173  * data.  This allows the client to examine the header and to
00174  * tailor the actual decoding process based on that information.
00175  * 
00176  * @see WvSpeexDecoder
00177  */
00178 class WvOggSpeexDecoder :
00179     public WvTypedEncoder<unsigned char, float>
00180 {
00181     WvSpeexDecoder *speexdec;
00182     bool forcepostfilter;
00183     WvString _vendor;
00184     WvStringList _comments;
00185     bool _vbr; // extracted from header
00186     int _nominalbitrate; // extracted from header
00187 
00188 public:
00189     /**
00190      * Creates an Ogg Speex Decoder.
00191      * 
00192      * For now, if the input bitstream is stereo, outputs the left
00193      * channel only.  This behaviour may change later on.
00194      * 
00195      */
00196     WvOggSpeexDecoder();
00197     virtual ~WvOggSpeexDecoder();
00198 
00199     /**
00200      * Returns true when the entire stream header has been processed
00201      * and the comments and vendor fields are valid.
00202      * 
00203      * If false and isok(), try decoding more data.
00204      * 
00205      * Returns: true when the header has been decoded
00206      */
00207     bool isheaderok() const;
00208 
00209     /**
00210      * Returns the Ogg Speex vendor comment string.
00211      *
00212      * Returns: the vendor comment
00213      */
00214     WvString vendor() const
00215         { return _vendor; }
00216 
00217     /**
00218      * Returns the Ogg Speex list of user comments.
00219      * 
00220      * The list is owned by the encoder, do not change.
00221      * 
00222      * Returns: the list of comments
00223      */
00224     WvStringList &comments()
00225         { return _comments; }
00226 
00227     /**
00228      * Returns the number of channels in the stream.
00229      * Does not return useful information unless isheaderok() == true.
00230      * Returns: the number of channels, non-negative
00231      */
00232     int channels() const;
00233 
00234     /**
00235      * Returns the sampling rate of the stream.
00236      * Does not return useful information unless isheaderok() == true.
00237      * Returns: the sampling rate
00238      */
00239     int samplingrate() const;
00240         
00241     /**
00242      * Returns the number of samples per frame.
00243      * Does not return useful information unless isheaderok() == true.
00244      * Returns: the frame size
00245      */
00246     int samplesperframe() const;
00247 
00248     /**
00249      * Returns the current encoding mode.
00250      * Does not return useful information unless isheaderok() == true.
00251      * Returns: the encoding mode
00252      */
00253     WvSpeex::CodecMode mode() const;
00254     
00255     /**
00256      * Returns true if variable bitrate support has been enabled.
00257      * Does not return useful information unless isheaderok() == true.
00258      * Returns: true if it is enabled
00259      */
00260     bool vbr() const;
00261 
00262     /**
00263      * Returns the nominal bitrate.
00264      * Does not return useful information unless isheaderok() == true.
00265      * Returns: the bitrate, or -1 if not specified or not meaningful
00266      */
00267     int nominalbitrate() const;
00268     
00269     /**
00270      * Determines if the perceptual enhancement post-filter is enabled.
00271      * Returns: true if it is enabled
00272      */
00273     bool postfilter() const;
00274 
00275     /**
00276      * Enables or disables the perceptual enhancement post-filter.
00277      * "enable" is true or false
00278      */
00279     void setpostfilter(bool enable);
00280 
00281 protected:
00282     virtual bool _typedencode(IBuffer &inbuf, OBuffer &outbuf,
00283         bool flush);
00284     virtual bool _typedfinish(OBuffer &outbuf);
00285     virtual bool _isok() const;
00286     virtual WvString _geterror() const;
00287 
00288 private:
00289     ogg_sync_state *oggsync;
00290     ogg_stream_state *oggstream;
00291     ogg_page *oggpage;
00292     bool need_serialno;
00293     int need_headers;
00294 
00295     bool process_page(ogg_page *oggpage, OBuffer &outbuf);
00296     bool process_packet(ogg_packet *oggpacket, OBuffer &outbuf);
00297     bool prepare_stream(long serialno);
00298 
00299     bool process_speex_header(ogg_packet *header);
00300     bool process_comment_header(ogg_packet *header);
00301 };
00302 
00303 #endif // __WVOGGSPEEX_H

Generated on Wed Dec 15 15:08:11 2004 for WvStreams by  doxygen 1.3.9.1