00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "wvoggspeex.h"
00011 #include <ogg/ogg.h>
00012 #include <speex.h>
00013 #include <speex_header.h>
00014 #include <unistd.h>
00015
00016 #define OGG_SPEEX_DECODER_BUF_SIZE 16384 // at most 16k at once
00017
00018
00019 static unsigned long int getint_le(WvBuf &inbuf)
00020 {
00021
00022 return inbuf.getch() | (inbuf.getch() << 8) |
00023 (inbuf.getch() << 16) | (inbuf.getch() << 24);
00024 }
00025
00026
00027
00028 static void putint_le(WvBuf &outbuf, unsigned long int value)
00029 {
00030
00031 outbuf.putch(value & 255);
00032 outbuf.putch((value >> 8) & 255);
00033 outbuf.putch((value >> 16) & 255);
00034 outbuf.putch(value >> 24);
00035 }
00036
00037
00038
00039
00040 WvOggSpeexEncoder::WvOggSpeexEncoder(
00041 const WvSpeex::BitrateSpec &bitratespec, int samplingrate,
00042 int channels, WvSpeex::CodecMode mode, int complexity,
00043 long serialno) :
00044 speexenc(NULL), packetno(0),
00045 _vendor("Encoded with Speex"),
00046 oggstream(NULL), wrote_headers(false),
00047 framebuf(MAX_BYTES_PER_FRAME)
00048 {
00049
00050 if (serialno == RANDOM_SERIALNO)
00051 {
00052 serialno = rand();
00053 }
00054
00055
00056 int retval;
00057 oggstream = new ogg_stream_state;
00058 if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00059 {
00060 seterror("error %s during ogg_stream_init", retval);
00061 return;
00062 }
00063
00064
00065 speexenc = new WvSpeexEncoder(bitratespec, samplingrate, channels,
00066 mode, complexity);
00067 }
00068
00069
00070 WvOggSpeexEncoder::~WvOggSpeexEncoder()
00071 {
00072
00073 delete speexenc;
00074
00075
00076 if (oggstream)
00077 {
00078 ogg_stream_clear(oggstream);
00079 delete oggstream;
00080 }
00081 }
00082
00083
00084 bool WvOggSpeexEncoder::_isok() const
00085 {
00086 return speexenc ? speexenc->isok() : true;
00087 }
00088
00089
00090 WvString WvOggSpeexEncoder::_geterror() const
00091 {
00092 return speexenc ? speexenc->geterror() : WvString(WvString::null);
00093 }
00094
00095
00096 void WvOggSpeexEncoder::add_comment(WvStringParm comment)
00097 {
00098 _comments.append(new WvString(comment), true);
00099 }
00100
00101
00102 void WvOggSpeexEncoder::add_tag(WvStringParm tag, WvStringParm value)
00103 {
00104 _comments.append(new WvString("%s=%s", tag, value), true);
00105 }
00106
00107
00108 bool WvOggSpeexEncoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00109 bool flush)
00110 {
00111
00112 if (! wrote_headers)
00113 {
00114 if (! write_headers(outbuf))
00115 return false;
00116 wrote_headers = true;
00117 }
00118
00119
00120 for (;;)
00121 {
00122
00123 size_t samples = inbuf.used();
00124 if (samples == 0)
00125 {
00126
00127 if (flush)
00128 if (! write_stream(outbuf, true))
00129 return false;
00130 return true;
00131 }
00132
00133 framebuf.zap();
00134 if (! speexenc->encode(inbuf, framebuf))
00135 return false;
00136 size_t bytes = framebuf.used();
00137 if (bytes == 0)
00138 return false;
00139
00140
00141 ogg_packet oggpacket;
00142 oggpacket.packet = framebuf.ptr();
00143 oggpacket.bytes = bytes;
00144 oggpacket.b_o_s = 0;
00145 oggpacket.e_o_s = 0;
00146 oggpacket.granulepos = 0;
00147 oggpacket.packetno = packetno++;
00148 ogg_stream_packetin(oggstream, &oggpacket);
00149 if (! write_stream(outbuf, false))
00150 return false;
00151 }
00152 }
00153
00154
00155 bool WvOggSpeexEncoder::_typedfinish(OBuffer &outbuf)
00156 {
00157
00158 if (! wrote_headers)
00159 {
00160 if (! write_headers(outbuf))
00161 return false;
00162 wrote_headers = true;
00163 }
00164 return write_eof(outbuf);
00165 }
00166
00167
00168 bool WvOggSpeexEncoder::write_headers(OBuffer &outbuf)
00169 {
00170
00171 ogg_packet header;
00172 SpeexHeader spxheader;
00173 SpeexMode *spxmode = speex_mode_list[mode()];
00174 speex_init_header(&spxheader, samplingrate(), channels(), spxmode);
00175 spxheader.vbr = vbr();
00176 spxheader.bitrate = nominalbitrate();
00177 spxheader.frames_per_packet = 1;
00178
00179 int size;
00180 header.packet = (unsigned char*)speex_header_to_packet(
00181 &spxheader, &size);
00182 header.bytes = size;
00183 header.b_o_s = 1;
00184 header.e_o_s = 0;
00185 header.granulepos = 0;
00186 header.packetno = packetno++;
00187 ogg_stream_packetin(oggstream, &header);
00188
00189
00190 WvDynBuf cbuf;
00191 putint_le(cbuf, _vendor.len());
00192 cbuf.putstr(_vendor);
00193 putint_le(cbuf, _comments.count());
00194 WvStringList::Iter it(_comments);
00195 for (it.rewind(); it.next(); )
00196 {
00197 putint_le(cbuf, it->len());
00198 cbuf.putstr(*it);
00199 }
00200 header.bytes = cbuf.used();
00201 header.packet = const_cast<unsigned char *>(cbuf.get(header.bytes));
00202 header.b_o_s = 0;
00203 header.e_o_s = 0;
00204 header.granulepos = 0;
00205 header.packetno = packetno++;
00206 ogg_stream_packetin(oggstream, &header);
00207
00208
00209 return write_stream(outbuf, true );
00210 }
00211
00212
00213 bool WvOggSpeexEncoder::write_eof(OBuffer &outbuf)
00214 {
00215 ogg_packet oggpacket;
00216 oggpacket.packet = (unsigned char*)"";
00217 oggpacket.bytes = 0;
00218 oggpacket.b_o_s = 0;
00219 oggpacket.e_o_s = 1;
00220 oggpacket.granulepos = 0;
00221 oggpacket.packetno = packetno++;
00222 ogg_stream_packetin(oggstream, &oggpacket);
00223 return write_stream(outbuf, true );
00224 }
00225
00226
00227 bool WvOggSpeexEncoder::write_stream(OBuffer &outbuf, bool flush)
00228 {
00229 ogg_page oggpage;
00230 for (;;)
00231 {
00232 if (flush)
00233 {
00234 int retval = ogg_stream_flush(oggstream, & oggpage);
00235 if (retval == 0)
00236 break;
00237 else if (retval < 0)
00238 {
00239 seterror("error %s during ogg_stream_flush", retval);
00240 return false;
00241 }
00242 }
00243 else
00244 {
00245 int retval = ogg_stream_pageout(oggstream, & oggpage);
00246 if (retval == 0)
00247 break;
00248 else if (retval < 0)
00249 {
00250 seterror("error %s during ogg_stream_pageout", retval);
00251 return false;
00252 }
00253 }
00254 outbuf.put(oggpage.header, oggpage.header_len);
00255 outbuf.put(oggpage.body, oggpage.body_len);
00256 }
00257 return true;
00258 }
00259
00260
00261
00262
00263
00264 WvOggSpeexDecoder::WvOggSpeexDecoder() :
00265 speexdec(NULL), forcepostfilter(false),
00266 _vbr(false), _nominalbitrate(-1),
00267 oggsync(NULL), oggstream(NULL),
00268 need_serialno(true), need_headers(2)
00269 {
00270 int retval;
00271
00272
00273 oggsync = new ogg_sync_state;
00274 if ((retval = ogg_sync_init(oggsync)) != 0)
00275 {
00276 seterror("error %s during ogg_sync_init", retval);
00277 return;
00278 }
00279 oggpage = new ogg_page;
00280 }
00281
00282
00283 WvOggSpeexDecoder::~WvOggSpeexDecoder()
00284 {
00285
00286 delete speexdec;
00287
00288
00289 if (oggstream)
00290 {
00291 ogg_stream_clear(oggstream);
00292 delete oggstream;
00293 }
00294
00295
00296 delete oggpage;
00297 ogg_sync_clear(oggsync);
00298 delete oggsync;
00299 }
00300
00301
00302 bool WvOggSpeexDecoder::_isok() const
00303 {
00304 return speexdec ? speexdec->isok() : true;
00305 }
00306
00307
00308 WvString WvOggSpeexDecoder::_geterror() const
00309 {
00310 return speexdec ? speexdec->geterror() : WvString(WvString::null);
00311 }
00312
00313
00314 bool WvOggSpeexDecoder::isheaderok() const
00315 {
00316 return need_headers == 0;
00317 }
00318
00319
00320 bool WvOggSpeexDecoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00321 bool flush)
00322 {
00323 bool checkheaderok = ! isheaderok() && ! flush;
00324 for (;;)
00325 {
00326
00327 if (oggstream)
00328 {
00329 ogg_packet oggpacket;
00330 while (ogg_stream_packetout(oggstream, & oggpacket) > 0)
00331 {
00332 if (! process_packet(& oggpacket, outbuf))
00333 return false;
00334 }
00335
00336
00337 if (oggstream->e_o_s)
00338 {
00339 setfinished();
00340 return true;
00341 }
00342 }
00343
00344
00345 while (ogg_sync_pageseek(oggsync, oggpage) <= 0)
00346 {
00347
00348 size_t oggbufsize = inbuf.used();
00349 if (oggbufsize == 0)
00350 {
00351
00352 if (flush && oggsync->fill != 0)
00353 return false;
00354 return true;
00355 }
00356 if (oggbufsize > OGG_SPEEX_DECODER_BUF_SIZE)
00357 oggbufsize = OGG_SPEEX_DECODER_BUF_SIZE;
00358
00359 char *oggbuf = ogg_sync_buffer(oggsync, oggbufsize);
00360 if (oggbuf == NULL)
00361 {
00362 seterror("error allocating ogg sync buffer");
00363 return false;
00364 }
00365 inbuf.move(oggbuf, oggbufsize);
00366 ogg_sync_wrote(oggsync, oggbufsize);
00367 }
00368
00369 if (! process_page(oggpage, outbuf))
00370 return false;
00371
00372
00373
00374
00375
00376 if (checkheaderok && isheaderok())
00377 return true;
00378 }
00379 }
00380
00381
00382 bool WvOggSpeexDecoder::_typedfinish(OBuffer &outbuf)
00383 {
00384 if (! isheaderok())
00385 {
00386 seterror("failed to detect an Ogg Speex stream");
00387 return false;
00388 }
00389 return true;
00390 }
00391
00392
00393 bool WvOggSpeexDecoder::process_page(ogg_page *oggpage,
00394 OBuffer &outbuf)
00395 {
00396 if (need_serialno)
00397 {
00398
00399 long serialno = ogg_page_serialno(oggpage);
00400 if (! prepare_stream(serialno))
00401 return false;
00402 need_serialno = false;
00403 }
00404
00405 if (ogg_stream_pagein(oggstream, oggpage) != 0)
00406 {
00407
00408
00409 return true;
00410 }
00411 return true;
00412 }
00413
00414
00415 bool WvOggSpeexDecoder::process_packet(ogg_packet *oggpacket,
00416 OBuffer &outbuf)
00417 {
00418 if (need_headers > 0)
00419 {
00420
00421 bool success = need_headers == 2 ?
00422 process_speex_header(oggpacket) :
00423 process_comment_header(oggpacket);
00424 if (! success)
00425 return false;
00426 need_headers -= 1;
00427 return true;
00428 }
00429
00430
00431 WvConstInPlaceBuf buf(oggpacket->packet, oggpacket->bytes);
00432 return speexdec->flush(buf, outbuf);
00433 }
00434
00435
00436 bool WvOggSpeexDecoder::process_speex_header(ogg_packet *header)
00437 {
00438 if (! header->b_o_s)
00439 {
00440 seterror("missing speex header at beginning of stream");
00441 return false;
00442 }
00443 SpeexHeader *spxheader = speex_packet_to_header(
00444 (char*)header->packet, header->bytes);
00445 if (! spxheader)
00446 {
00447 seterror("invalid speex header");
00448 return false;
00449 }
00450 if (spxheader->mode < 0 || spxheader->mode >= SPEEX_NB_MODES)
00451 {
00452 seterror("header contains an unrecognized or invalid codec mode");
00453 return false;
00454 }
00455 _vbr = spxheader->vbr;
00456 _nominalbitrate = spxheader->bitrate;
00457
00458
00459 speexdec = new WvSpeexDecoder(spxheader->rate, spxheader->nb_channels,
00460 WvSpeex::CodecMode(spxheader->mode));
00461 return true;
00462 }
00463
00464
00465 bool WvOggSpeexDecoder::process_comment_header(ogg_packet *header)
00466 {
00467 if (! header->b_o_s && header->bytes >= 8)
00468 {
00469 WvConstInPlaceBuf cbuf(header->packet, header->bytes);
00470 unsigned long int length = getint_le(cbuf);
00471 if (length <= cbuf.used() - 4)
00472 {
00473 _vendor = WvString(reinterpret_cast<const char*>(
00474 cbuf.get(length))).unique();
00475 unsigned long int count = getint_le(cbuf);
00476 while (count * 4 < cbuf.used())
00477 {
00478 length = getint_le(cbuf);
00479 if (length > cbuf.used())
00480 break;
00481 WvString comment(reinterpret_cast<const char*>(
00482 cbuf.get(length)));
00483 _comments.append(new WvString(comment.unique()), true);
00484 count -= 1;
00485 }
00486 if (count == 0)
00487 return true;
00488 }
00489 }
00490 seterror("invalid comment header");
00491 return false;
00492 }
00493
00494
00495 bool WvOggSpeexDecoder::prepare_stream(long serialno)
00496 {
00497
00498 oggstream = new ogg_stream_state;
00499 int retval;
00500 if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00501 {
00502 seterror("error %s during ogg_stream_init", retval);
00503 return false;
00504 }
00505 return true;
00506 }
00507
00508
00509 int WvOggSpeexDecoder::channels() const
00510 {
00511 return speexdec ? speexdec->channels() : 0;
00512 }
00513
00514
00515 int WvOggSpeexDecoder::samplingrate() const
00516 {
00517 return speexdec ? speexdec->samplingrate() : 0;
00518 }
00519
00520
00521 int WvOggSpeexDecoder::samplesperframe() const
00522 {
00523 return speexdec ? speexdec->samplesperframe() : 0;
00524 }
00525
00526
00527 WvSpeex::CodecMode WvOggSpeexDecoder::mode() const
00528 {
00529 return speexdec ? speexdec->mode() : WvSpeex::NARROWBAND_MODE;
00530 }
00531
00532
00533 bool WvOggSpeexDecoder::vbr() const
00534 {
00535 return _vbr;
00536 }
00537
00538
00539 int WvOggSpeexDecoder::nominalbitrate() const
00540 {
00541 return _nominalbitrate;
00542 }
00543
00544
00545 bool WvOggSpeexDecoder::postfilter() const
00546 {
00547 return speexdec ? speexdec->postfilter() : forcepostfilter;
00548 }
00549
00550
00551 void WvOggSpeexDecoder::setpostfilter(bool enable)
00552 {
00553 forcepostfilter = enable;
00554 if (speexdec)
00555 speexdec->setpostfilter(enable);
00556 }