Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

wvoggspeex.cc

Go to the documentation of this file.
00001 /* 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * Provides a WvEncoder abstraction for Ogg Speex audio streams 00006 * suitable for encoding voice at low bitrates. 00007 * 00008 * Only monaural audio is supported for now. 00009 */ 00010 #include "wvoggspeex.h" 00011 #include <ogg/ogg.h> 00012 #include <speex.h> 00013 #include <speex_header.h> 00014 #include <unistd.h> 00015 00016 #define OGG_SPEEX_DECODER_BUF_SIZE 16384 // at most 16k at once 00017 00018 /** Extracts a little endian integer from a buffer. */ 00019 static unsigned long int getint_le(WvBuf &inbuf) 00020 { 00021 // FIXME: a little sloppy 00022 return inbuf.getch() | (inbuf.getch() << 8) | 00023 (inbuf.getch() << 16) | (inbuf.getch() << 24); 00024 } 00025 00026 00027 /** Appends a little endian integer to a buffer. */ 00028 static void putint_le(WvBuf &outbuf, unsigned long int value) 00029 { 00030 // FIXME: a little sloppy 00031 outbuf.putch(value & 255); 00032 outbuf.putch((value >> 8) & 255); 00033 outbuf.putch((value >> 16) & 255); 00034 outbuf.putch(value >> 24); 00035 } 00036 00037 00038 /***** WvOggSpeexEncoder *****/ 00039 00040 WvOggSpeexEncoder::WvOggSpeexEncoder( 00041 const WvSpeex::BitrateSpec &bitratespec, int samplingrate, 00042 int channels, WvSpeex::CodecMode mode, int complexity, 00043 long serialno) : 00044 speexenc(NULL), packetno(0), 00045 _vendor("Encoded with Speex"), 00046 oggstream(NULL), wrote_headers(false), 00047 framebuf(MAX_BYTES_PER_FRAME) 00048 { 00049 // pick a serial number 00050 if (serialno == RANDOM_SERIALNO) 00051 { 00052 serialno = rand(); 00053 } 00054 00055 // init ogg bitstream layer 00056 int retval; 00057 oggstream = new ogg_stream_state; 00058 if ((retval = ogg_stream_init(oggstream, serialno)) != 0) 00059 { 00060 seterror("error %s during ogg_stream_init", retval); 00061 return; 00062 } 00063 00064 // init speex encoder 00065 speexenc = new WvSpeexEncoder(bitratespec, samplingrate, channels, 00066 mode, complexity); 00067 } 00068 00069 00070 WvOggSpeexEncoder::~WvOggSpeexEncoder() 00071 { 00072 // destroy speex encoder 00073 delete speexenc; 00074 00075 // destroy ogg bitstream layer 00076 if (oggstream) 00077 { 00078 ogg_stream_clear(oggstream); 00079 delete oggstream; 00080 } 00081 } 00082 00083 00084 bool WvOggSpeexEncoder::_isok() const 00085 { 00086 return speexenc ? speexenc->isok() : true; 00087 } 00088 00089 00090 WvString WvOggSpeexEncoder::_geterror() const 00091 { 00092 return speexenc ? speexenc->geterror() : WvString(WvString::null); 00093 } 00094 00095 00096 void WvOggSpeexEncoder::add_comment(WvStringParm comment) 00097 { 00098 _comments.append(new WvString(comment), true); 00099 } 00100 00101 00102 void WvOggSpeexEncoder::add_tag(WvStringParm tag, WvStringParm value) 00103 { 00104 _comments.append(new WvString("%s=%s", tag, value), true); 00105 } 00106 00107 00108 bool WvOggSpeexEncoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf, 00109 bool flush) 00110 { 00111 // write header pages if needed 00112 if (! wrote_headers) 00113 { 00114 if (! write_headers(outbuf)) 00115 return false; 00116 wrote_headers = true; 00117 } 00118 00119 // write compressed audio pages 00120 for (;;) 00121 { 00122 // read in more data 00123 size_t samples = inbuf.used(); 00124 if (samples == 0) 00125 { 00126 // no more data 00127 if (flush) 00128 if (! write_stream(outbuf, true)) 00129 return false; 00130 return true; 00131 } 00132 00133 framebuf.zap(); 00134 if (! speexenc->encode(inbuf, framebuf)) 00135 return false; 00136 size_t bytes = framebuf.used(); 00137 if (bytes == 0) 00138 return false; // not enough data 00139 00140 // write out a packet 00141 ogg_packet oggpacket; 00142 oggpacket.packet = framebuf.ptr(); 00143 oggpacket.bytes = bytes; 00144 oggpacket.b_o_s = 0; 00145 oggpacket.e_o_s = 0; 00146 oggpacket.granulepos = 0; 00147 oggpacket.packetno = packetno++; 00148 ogg_stream_packetin(oggstream, &oggpacket); // always succeeds 00149 if (! write_stream(outbuf, false)) 00150 return false; 00151 } 00152 } 00153 00154 00155 bool WvOggSpeexEncoder::_typedfinish(OBuffer &outbuf) 00156 { 00157 // write header pages if needed 00158 if (! wrote_headers) 00159 { 00160 if (! write_headers(outbuf)) 00161 return false; 00162 wrote_headers = true; 00163 } 00164 return write_eof(outbuf); 00165 } 00166 00167 00168 bool WvOggSpeexEncoder::write_headers(OBuffer &outbuf) 00169 { 00170 // generate stream header 00171 ogg_packet header; 00172 SpeexHeader spxheader; 00173 SpeexMode *spxmode = speex_mode_list[mode()]; 00174 speex_init_header(&spxheader, samplingrate(), channels(), spxmode); 00175 spxheader.vbr = vbr(); 00176 spxheader.bitrate = nominalbitrate(); 00177 spxheader.frames_per_packet = 1; 00178 00179 int size; 00180 header.packet = (unsigned char*)speex_header_to_packet( 00181 &spxheader, &size); 00182 header.bytes = size; 00183 header.b_o_s = 1; 00184 header.e_o_s = 0; 00185 header.granulepos = 0; 00186 header.packetno = packetno++; 00187 ogg_stream_packetin(oggstream, &header); 00188 00189 // generate comment header 00190 WvDynBuf cbuf; 00191 putint_le(cbuf, _vendor.len()); 00192 cbuf.putstr(_vendor); 00193 putint_le(cbuf, _comments.count()); 00194 WvStringList::Iter it(_comments); 00195 for (it.rewind(); it.next(); ) 00196 { 00197 putint_le(cbuf, it->len()); 00198 cbuf.putstr(*it); 00199 } 00200 header.bytes = cbuf.used(); 00201 header.packet = const_cast<unsigned char *>(cbuf.get(header.bytes)); 00202 header.b_o_s = 0; 00203 header.e_o_s = 0; 00204 header.granulepos = 0; 00205 header.packetno = packetno++; 00206 ogg_stream_packetin(oggstream, &header); 00207 00208 // flush to ensure next data packet is in its own page 00209 return write_stream(outbuf, true /*flush*/); 00210 } 00211 00212 00213 bool WvOggSpeexEncoder::write_eof(OBuffer &outbuf) 00214 { 00215 ogg_packet oggpacket; 00216 oggpacket.packet = (unsigned char*)""; 00217 oggpacket.bytes = 0; 00218 oggpacket.b_o_s = 0; 00219 oggpacket.e_o_s = 1; 00220 oggpacket.granulepos = 0; 00221 oggpacket.packetno = packetno++; 00222 ogg_stream_packetin(oggstream, &oggpacket); 00223 return write_stream(outbuf, true /*flush*/); 00224 } 00225 00226 00227 bool WvOggSpeexEncoder::write_stream(OBuffer &outbuf, bool flush) 00228 { 00229 ogg_page oggpage; 00230 for (;;) 00231 { 00232 if (flush) 00233 { 00234 int retval = ogg_stream_flush(oggstream, & oggpage); 00235 if (retval == 0) 00236 break; // no remaining data 00237 else if (retval < 0) 00238 { 00239 seterror("error %s during ogg_stream_flush", retval); 00240 return false; 00241 } 00242 } 00243 else 00244 { 00245 int retval = ogg_stream_pageout(oggstream, & oggpage); 00246 if (retval == 0) 00247 break; // not enough data 00248 else if (retval < 0) 00249 { 00250 seterror("error %s during ogg_stream_pageout", retval); 00251 return false; 00252 } 00253 } 00254 outbuf.put(oggpage.header, oggpage.header_len); 00255 outbuf.put(oggpage.body, oggpage.body_len); 00256 } 00257 return true; 00258 } 00259 00260 00261 00262 /***** WvOggSpeexDecoder *****/ 00263 00264 WvOggSpeexDecoder::WvOggSpeexDecoder() : 00265 speexdec(NULL), forcepostfilter(false), 00266 _vbr(false), _nominalbitrate(-1), 00267 oggsync(NULL), oggstream(NULL), 00268 need_serialno(true), need_headers(2) 00269 { 00270 int retval; 00271 00272 // init ogg sync layer 00273 oggsync = new ogg_sync_state; 00274 if ((retval = ogg_sync_init(oggsync)) != 0) 00275 { 00276 seterror("error %s during ogg_sync_init", retval); 00277 return; 00278 } 00279 oggpage = new ogg_page; 00280 } 00281 00282 00283 WvOggSpeexDecoder::~WvOggSpeexDecoder() 00284 { 00285 // destroy speex decoder 00286 delete speexdec; 00287 00288 // destroy ogg bitstream layer 00289 if (oggstream) 00290 { 00291 ogg_stream_clear(oggstream); 00292 delete oggstream; 00293 } 00294 00295 // destroy ogg sync layer 00296 delete oggpage; 00297 ogg_sync_clear(oggsync); 00298 delete oggsync; 00299 } 00300 00301 00302 bool WvOggSpeexDecoder::_isok() const 00303 { 00304 return speexdec ? speexdec->isok() : true; 00305 } 00306 00307 00308 WvString WvOggSpeexDecoder::_geterror() const 00309 { 00310 return speexdec ? speexdec->geterror() : WvString(WvString::null); 00311 } 00312 00313 00314 bool WvOggSpeexDecoder::isheaderok() const 00315 { 00316 return need_headers == 0; 00317 } 00318 00319 00320 bool WvOggSpeexDecoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf, 00321 bool flush) 00322 { 00323 bool checkheaderok = ! isheaderok() && ! flush; 00324 for (;;) 00325 { 00326 // extract packets from the bitstream 00327 if (oggstream) 00328 { 00329 ogg_packet oggpacket; 00330 while (ogg_stream_packetout(oggstream, & oggpacket) > 0) 00331 { 00332 if (! process_packet(& oggpacket, outbuf)) 00333 return false; 00334 } 00335 00336 // detect end of stream 00337 if (oggstream->e_o_s) 00338 { 00339 setfinished(); 00340 return true; 00341 } 00342 } 00343 00344 // get more pages 00345 while (ogg_sync_pageseek(oggsync, oggpage) <= 0) 00346 { 00347 // read in more data 00348 size_t oggbufsize = inbuf.used(); 00349 if (oggbufsize == 0) 00350 { 00351 // no more data 00352 if (flush && oggsync->fill != 0) 00353 return false; 00354 return true; 00355 } 00356 if (oggbufsize > OGG_SPEEX_DECODER_BUF_SIZE) 00357 oggbufsize = OGG_SPEEX_DECODER_BUF_SIZE; 00358 00359 char *oggbuf = ogg_sync_buffer(oggsync, oggbufsize); 00360 if (oggbuf == NULL) 00361 { 00362 seterror("error allocating ogg sync buffer"); 00363 return false; 00364 } 00365 inbuf.move(oggbuf, oggbufsize); 00366 ogg_sync_wrote(oggsync, oggbufsize); 00367 } 00368 // we got a page! 00369 if (! process_page(oggpage, outbuf)) 00370 return false; 00371 00372 // return immediately after we see the header if not flushing 00373 // guarantee no data has been decoded yet since Ogg Speex 00374 // spec says that the audio data must begin on a fresh page 00375 // following the headers 00376 if (checkheaderok && isheaderok()) 00377 return true; 00378 } 00379 } 00380 00381 00382 bool WvOggSpeexDecoder::_typedfinish(OBuffer &outbuf) 00383 { 00384 if (! isheaderok()) 00385 { 00386 seterror("failed to detect an Ogg Speex stream"); 00387 return false; 00388 } 00389 return true; 00390 } 00391 00392 00393 bool WvOggSpeexDecoder::process_page(ogg_page *oggpage, 00394 OBuffer &outbuf) 00395 { 00396 if (need_serialno) 00397 { 00398 // attach to the first bitstream we find 00399 long serialno = ogg_page_serialno(oggpage); 00400 if (! prepare_stream(serialno)) 00401 return false; 00402 need_serialno = false; 00403 } 00404 // submit the page to the bitstream 00405 if (ogg_stream_pagein(oggstream, oggpage) != 0) 00406 { 00407 // this page was bad, or did not match the stream's 00408 // serial number exactly, skip it 00409 return true; 00410 } 00411 return true; 00412 } 00413 00414 00415 bool WvOggSpeexDecoder::process_packet(ogg_packet *oggpacket, 00416 OBuffer &outbuf) 00417 { 00418 if (need_headers > 0) 00419 { 00420 // output headers 00421 bool success = need_headers == 2 ? 00422 process_speex_header(oggpacket) : 00423 process_comment_header(oggpacket); 00424 if (! success) 00425 return false; 00426 need_headers -= 1; 00427 return true; 00428 } 00429 00430 // decode audio 00431 WvConstInPlaceBuf buf(oggpacket->packet, oggpacket->bytes); 00432 return speexdec->flush(buf, outbuf); 00433 } 00434 00435 00436 bool WvOggSpeexDecoder::process_speex_header(ogg_packet *header) 00437 { 00438 if (! header->b_o_s) 00439 { 00440 seterror("missing speex header at beginning of stream"); 00441 return false; 00442 } 00443 SpeexHeader *spxheader = speex_packet_to_header( 00444 (char*)header->packet, header->bytes); 00445 if (! spxheader) 00446 { 00447 seterror("invalid speex header"); 00448 return false; 00449 } 00450 if (spxheader->mode < 0 || spxheader->mode >= SPEEX_NB_MODES) 00451 { 00452 seterror("header contains an unrecognized or invalid codec mode"); 00453 return false; 00454 } 00455 _vbr = spxheader->vbr; 00456 _nominalbitrate = spxheader->bitrate; 00457 00458 // create the decoder 00459 speexdec = new WvSpeexDecoder(spxheader->rate, spxheader->nb_channels, 00460 WvSpeex::CodecMode(spxheader->mode)); 00461 return true; 00462 } 00463 00464 00465 bool WvOggSpeexDecoder::process_comment_header(ogg_packet *header) 00466 { 00467 if (! header->b_o_s && header->bytes >= 8) 00468 { 00469 WvConstInPlaceBuf cbuf(header->packet, header->bytes); 00470 unsigned long int length = getint_le(cbuf); 00471 if (length <= cbuf.used() - 4) 00472 { 00473 _vendor = WvString(reinterpret_cast<const char*>( 00474 cbuf.get(length))).unique(); 00475 unsigned long int count = getint_le(cbuf); 00476 while (count * 4 < cbuf.used()) 00477 { 00478 length = getint_le(cbuf); 00479 if (length > cbuf.used()) 00480 break; 00481 WvString comment(reinterpret_cast<const char*>( 00482 cbuf.get(length))); 00483 _comments.append(new WvString(comment.unique()), true); 00484 count -= 1; 00485 } 00486 if (count == 0) 00487 return true; 00488 } 00489 } 00490 seterror("invalid comment header"); 00491 return false; 00492 } 00493 00494 00495 bool WvOggSpeexDecoder::prepare_stream(long serialno) 00496 { 00497 // init ogg bitstream layer 00498 oggstream = new ogg_stream_state; 00499 int retval; 00500 if ((retval = ogg_stream_init(oggstream, serialno)) != 0) 00501 { 00502 seterror("error %s during ogg_stream_init", retval); 00503 return false; 00504 } 00505 return true; 00506 } 00507 00508 00509 int WvOggSpeexDecoder::channels() const 00510 { 00511 return speexdec ? speexdec->channels() : 0; 00512 } 00513 00514 00515 int WvOggSpeexDecoder::samplingrate() const 00516 { 00517 return speexdec ? speexdec->samplingrate() : 0; 00518 } 00519 00520 00521 int WvOggSpeexDecoder::samplesperframe() const 00522 { 00523 return speexdec ? speexdec->samplesperframe() : 0; 00524 } 00525 00526 00527 WvSpeex::CodecMode WvOggSpeexDecoder::mode() const 00528 { 00529 return speexdec ? speexdec->mode() : WvSpeex::NARROWBAND_MODE; 00530 } 00531 00532 00533 bool WvOggSpeexDecoder::vbr() const 00534 { 00535 return _vbr; 00536 } 00537 00538 00539 int WvOggSpeexDecoder::nominalbitrate() const 00540 { 00541 return _nominalbitrate; 00542 } 00543 00544 00545 bool WvOggSpeexDecoder::postfilter() const 00546 { 00547 return speexdec ? speexdec->postfilter() : forcepostfilter; 00548 } 00549 00550 00551 void WvOggSpeexDecoder::setpostfilter(bool enable) 00552 { 00553 forcepostfilter = enable; 00554 if (speexdec) 00555 speexdec->setpostfilter(enable); 00556 }

Generated on Tue Oct 5 01:09:20 2004 for WvStreams by doxygen 1.3.7