00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
#include "wvoggspeex.h"
00011
#include <ogg/ogg.h>
00012
#include <speex.h>
00013
#include <speex_header.h>
00014
#include <unistd.h>
00015
00016 #define OGG_SPEEX_DECODER_BUF_SIZE 16384 // at most 16k at once
00017
00018
00019 static unsigned long int getint_le(
WvBuf &inbuf)
00020 {
00021
00022
return inbuf.
getch() | (inbuf.
getch() << 8) |
00023 (inbuf.
getch() << 16) | (inbuf.
getch() << 24);
00024 }
00025
00026
00027
00028 static void putint_le(
WvBuf &outbuf,
unsigned long int value)
00029 {
00030
00031 outbuf.
putch(value & 255);
00032 outbuf.
putch((value >> 8) & 255);
00033 outbuf.
putch((value >> 16) & 255);
00034 outbuf.
putch(value >> 24);
00035 }
00036
00037
00038
00039
00040 WvOggSpeexEncoder::WvOggSpeexEncoder(
00041
const WvSpeex::BitrateSpec &bitratespec,
int samplingrate,
00042
int channels, WvSpeex::CodecMode mode,
int complexity,
00043
long serialno) :
00044 speexenc(NULL), packetno(0),
00045 _vendor("Encoded with Speex"),
00046 oggstream(NULL), wrote_headers(false),
00047 framebuf(MAX_BYTES_PER_FRAME)
00048 {
00049
00050
if (serialno ==
RANDOM_SERIALNO)
00051 {
00052 serialno = rand();
00053 }
00054
00055
00056
int retval;
00057 oggstream =
new ogg_stream_state;
00058
if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00059 {
00060 seterror(
"error %s during ogg_stream_init", retval);
00061
return;
00062 }
00063
00064
00065 speexenc =
new WvSpeexEncoder(bitratespec, samplingrate, channels,
00066 mode, complexity);
00067 }
00068
00069
00070 WvOggSpeexEncoder::~WvOggSpeexEncoder()
00071 {
00072
00073
delete speexenc;
00074
00075
00076
if (oggstream)
00077 {
00078 ogg_stream_clear(oggstream);
00079
delete oggstream;
00080 }
00081 }
00082
00083
00084 bool WvOggSpeexEncoder::_isok()
const
00085
{
00086
return speexenc ? speexenc->
isok() :
true;
00087 }
00088
00089
00090 WvString WvOggSpeexEncoder::_geterror()
const
00091
{
00092
return speexenc ? speexenc->
geterror() :
WvString(WvString::null);
00093 }
00094
00095
00096 void WvOggSpeexEncoder::add_comment(
WvStringParm comment)
00097 {
00098 _comments.append(
new WvString(comment),
true);
00099 }
00100
00101
00102 void WvOggSpeexEncoder::add_tag(
WvStringParm tag,
WvStringParm value)
00103 {
00104 _comments.append(
new WvString(
"%s=%s", tag, value),
true);
00105 }
00106
00107
00108 bool WvOggSpeexEncoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00109
bool flush)
00110 {
00111
00112
if (! wrote_headers)
00113 {
00114
if (! write_headers(outbuf))
00115
return false;
00116 wrote_headers =
true;
00117 }
00118
00119
00120
for (;;)
00121 {
00122
00123 size_t samples = inbuf.used();
00124
if (samples == 0)
00125 {
00126
00127
if (flush)
00128
if (! write_stream(outbuf,
true))
00129
return false;
00130
return true;
00131 }
00132
00133 framebuf.
zap();
00134
if (! speexenc->
encode(inbuf, framebuf))
00135
return false;
00136 size_t bytes = framebuf.
used();
00137
if (bytes == 0)
00138
return false;
00139
00140
00141 ogg_packet oggpacket;
00142 oggpacket.packet = framebuf.
ptr();
00143 oggpacket.bytes = bytes;
00144 oggpacket.b_o_s = 0;
00145 oggpacket.e_o_s = 0;
00146 oggpacket.granulepos = 0;
00147 oggpacket.packetno = packetno++;
00148 ogg_stream_packetin(oggstream, &oggpacket);
00149
if (! write_stream(outbuf,
false))
00150
return false;
00151 }
00152 }
00153
00154
00155 bool WvOggSpeexEncoder::_typedfinish(OBuffer &outbuf)
00156 {
00157
00158
if (! wrote_headers)
00159 {
00160
if (! write_headers(outbuf))
00161
return false;
00162 wrote_headers =
true;
00163 }
00164
return write_eof(outbuf);
00165 }
00166
00167
00168
bool WvOggSpeexEncoder::write_headers(OBuffer &outbuf)
00169 {
00170
00171 ogg_packet header;
00172 SpeexHeader spxheader;
00173 SpeexMode *spxmode = speex_mode_list[
mode()];
00174 speex_init_header(&spxheader,
samplingrate(),
channels(), spxmode);
00175 spxheader.vbr =
vbr();
00176 spxheader.bitrate =
nominalbitrate();
00177 spxheader.frames_per_packet = 1;
00178
00179
int size;
00180 header.packet = (
unsigned char*)speex_header_to_packet(
00181 &spxheader, &size);
00182 header.bytes = size;
00183 header.b_o_s = 1;
00184 header.e_o_s = 0;
00185 header.granulepos = 0;
00186 header.packetno = packetno++;
00187 ogg_stream_packetin(oggstream, &header);
00188
00189
00190
WvDynBuf cbuf;
00191
putint_le(cbuf, _vendor.
len());
00192 cbuf.putstr(_vendor);
00193
putint_le(cbuf, _comments.count());
00194 WvStringList::Iter it(_comments);
00195
for (it.rewind(); it.next(); )
00196 {
00197
putint_le(cbuf, it->len());
00198 cbuf.putstr(*it);
00199 }
00200 header.bytes = cbuf.
used();
00201 header.packet = const_cast<unsigned char *>(cbuf.
get(header.bytes));
00202 header.b_o_s = 0;
00203 header.e_o_s = 0;
00204 header.granulepos = 0;
00205 header.packetno = packetno++;
00206 ogg_stream_packetin(oggstream, &header);
00207
00208
00209
return write_stream(outbuf,
true );
00210 }
00211
00212
00213
bool WvOggSpeexEncoder::write_eof(OBuffer &outbuf)
00214 {
00215 ogg_packet oggpacket;
00216 oggpacket.packet = (
unsigned char*)
"";
00217 oggpacket.bytes = 0;
00218 oggpacket.b_o_s = 0;
00219 oggpacket.e_o_s = 1;
00220 oggpacket.granulepos = 0;
00221 oggpacket.packetno = packetno++;
00222 ogg_stream_packetin(oggstream, &oggpacket);
00223
return write_stream(outbuf,
true );
00224 }
00225
00226
00227
bool WvOggSpeexEncoder::write_stream(OBuffer &outbuf,
bool flush)
00228 {
00229 ogg_page oggpage;
00230
for (;;)
00231 {
00232
if (flush)
00233 {
00234
int retval = ogg_stream_flush(oggstream, & oggpage);
00235
if (retval == 0)
00236
break;
00237
else if (retval < 0)
00238 {
00239
seterror(
"error %s during ogg_stream_flush", retval);
00240
return false;
00241 }
00242 }
00243
else
00244 {
00245
int retval = ogg_stream_pageout(oggstream, & oggpage);
00246
if (retval == 0)
00247
break;
00248
else if (retval < 0)
00249 {
00250
seterror(
"error %s during ogg_stream_pageout", retval);
00251
return false;
00252 }
00253 }
00254 outbuf.put(oggpage.header, oggpage.header_len);
00255 outbuf.put(oggpage.body, oggpage.body_len);
00256 }
00257
return true;
00258 }
00259
00260
00261
00262
00263
00264 WvOggSpeexDecoder::WvOggSpeexDecoder() :
00265 speexdec(NULL), forcepostfilter(false),
00266 _vbr(false), _nominalbitrate(-1),
00267 oggsync(NULL), oggstream(NULL),
00268 need_serialno(true), need_headers(2)
00269 {
00270
int retval;
00271
00272
00273 oggsync =
new ogg_sync_state;
00274
if ((retval = ogg_sync_init(oggsync)) != 0)
00275 {
00276 seterror(
"error %s during ogg_sync_init", retval);
00277
return;
00278 }
00279 oggpage =
new ogg_page;
00280 }
00281
00282
00283 WvOggSpeexDecoder::~WvOggSpeexDecoder()
00284 {
00285
00286
delete speexdec;
00287
00288
00289
if (oggstream)
00290 {
00291 ogg_stream_clear(oggstream);
00292
delete oggstream;
00293 }
00294
00295
00296
delete oggpage;
00297 ogg_sync_clear(oggsync);
00298
delete oggsync;
00299 }
00300
00301
00302 bool WvOggSpeexDecoder::_isok()
const
00303
{
00304
return speexdec ? speexdec->
isok() :
true;
00305 }
00306
00307
00308 WvString WvOggSpeexDecoder::_geterror()
const
00309
{
00310
return speexdec ? speexdec->
geterror() :
WvString(WvString::null);
00311 }
00312
00313
00314 bool WvOggSpeexDecoder::isheaderok()
const
00315
{
00316
return need_headers == 0;
00317 }
00318
00319
00320 bool WvOggSpeexDecoder::_typedencode(IBuffer &inbuf, OBuffer &outbuf,
00321
bool flush)
00322 {
00323
bool checkheaderok = !
isheaderok() && ! flush;
00324
for (;;)
00325 {
00326
00327
if (oggstream)
00328 {
00329 ogg_packet oggpacket;
00330
while (ogg_stream_packetout(oggstream, & oggpacket) > 0)
00331 {
00332
if (! process_packet(& oggpacket, outbuf))
00333
return false;
00334 }
00335
00336
00337
if (oggstream->e_o_s)
00338 {
00339
setfinished();
00340
return true;
00341 }
00342 }
00343
00344
00345
while (ogg_sync_pageseek(oggsync, oggpage) <= 0)
00346 {
00347
00348 size_t oggbufsize = inbuf.used();
00349
if (oggbufsize == 0)
00350 {
00351
00352
if (flush && oggsync->fill != 0)
00353
return false;
00354
return true;
00355 }
00356
if (oggbufsize >
OGG_SPEEX_DECODER_BUF_SIZE)
00357 oggbufsize =
OGG_SPEEX_DECODER_BUF_SIZE;
00358
00359
char *oggbuf = ogg_sync_buffer(oggsync, oggbufsize);
00360
if (oggbuf == NULL)
00361 {
00362 seterror(
"error allocating ogg sync buffer");
00363
return false;
00364 }
00365 inbuf.move(oggbuf, oggbufsize);
00366 ogg_sync_wrote(oggsync, oggbufsize);
00367 }
00368
00369
if (! process_page(oggpage, outbuf))
00370
return false;
00371
00372
00373
00374
00375
00376
if (checkheaderok &&
isheaderok())
00377
return true;
00378 }
00379 }
00380
00381
00382 bool WvOggSpeexDecoder::_typedfinish(OBuffer &outbuf)
00383 {
00384
if (!
isheaderok())
00385 {
00386 seterror(
"failed to detect an Ogg Speex stream");
00387
return false;
00388 }
00389
return true;
00390 }
00391
00392
00393
bool WvOggSpeexDecoder::process_page(ogg_page *oggpage,
00394 OBuffer &outbuf)
00395 {
00396
if (need_serialno)
00397 {
00398
00399
long serialno = ogg_page_serialno(oggpage);
00400
if (! prepare_stream(serialno))
00401
return false;
00402 need_serialno =
false;
00403 }
00404
00405
if (ogg_stream_pagein(oggstream, oggpage) != 0)
00406 {
00407
00408
00409
return true;
00410 }
00411
return true;
00412 }
00413
00414
00415
bool WvOggSpeexDecoder::process_packet(ogg_packet *oggpacket,
00416 OBuffer &outbuf)
00417 {
00418
if (need_headers > 0)
00419 {
00420
00421
bool success = need_headers == 2 ?
00422 process_speex_header(oggpacket) :
00423 process_comment_header(oggpacket);
00424
if (! success)
00425
return false;
00426 need_headers -= 1;
00427
return true;
00428 }
00429
00430
00431
WvConstInPlaceBuf buf(oggpacket->packet, oggpacket->bytes);
00432
return speexdec->
flush(buf, outbuf);
00433 }
00434
00435
00436
bool WvOggSpeexDecoder::process_speex_header(ogg_packet *header)
00437 {
00438
if (! header->b_o_s)
00439 {
00440
seterror(
"missing speex header at beginning of stream");
00441
return false;
00442 }
00443 SpeexHeader *spxheader = speex_packet_to_header(
00444 (
char*)header->packet, header->bytes);
00445
if (! spxheader)
00446 {
00447
seterror(
"invalid speex header");
00448
return false;
00449 }
00450
if (spxheader->mode < 0 || spxheader->mode >= SPEEX_NB_MODES)
00451 {
00452
seterror(
"header contains an unrecognized or invalid codec mode");
00453
return false;
00454 }
00455 _vbr = spxheader->vbr;
00456 _nominalbitrate = spxheader->bitrate;
00457
00458
00459 speexdec =
new WvSpeexDecoder(spxheader->rate, spxheader->nb_channels,
00460 WvSpeex::CodecMode(spxheader->mode));
00461
return true;
00462 }
00463
00464
00465
bool WvOggSpeexDecoder::process_comment_header(ogg_packet *header)
00466 {
00467
if (! header->b_o_s && header->bytes >= 8)
00468 {
00469
WvConstInPlaceBuf cbuf(header->packet, header->bytes);
00470
unsigned long int length =
getint_le(cbuf);
00471
if (length <= cbuf.
used() - 4)
00472 {
00473 _vendor =
WvString(reinterpret_cast<const char*>(
00474 cbuf.
get(length))).
unique();
00475
unsigned long int count =
getint_le(cbuf);
00476
while (count * 4 < cbuf.
used())
00477 {
00478 length =
getint_le(cbuf);
00479
if (length > cbuf.
used())
00480
break;
00481 WvString comment(reinterpret_cast<const char*>(
00482 cbuf.
get(length)));
00483 _comments.append(
new WvString(comment.
unique()),
true);
00484 count -= 1;
00485 }
00486
if (count == 0)
00487
return true;
00488 }
00489 }
00490
seterror(
"invalid comment header");
00491
return false;
00492 }
00493
00494
00495
bool WvOggSpeexDecoder::prepare_stream(
long serialno)
00496 {
00497
00498 oggstream =
new ogg_stream_state;
00499
int retval;
00500
if ((retval = ogg_stream_init(oggstream, serialno)) != 0)
00501 {
00502
seterror(
"error %s during ogg_stream_init", retval);
00503
return false;
00504 }
00505
return true;
00506 }
00507
00508
00509 int WvOggSpeexDecoder::channels()
const
00510
{
00511
return speexdec ? speexdec->
channels() : 0;
00512 }
00513
00514
00515 int WvOggSpeexDecoder::samplingrate()
const
00516
{
00517
return speexdec ? speexdec->
samplingrate() : 0;
00518 }
00519
00520
00521 int WvOggSpeexDecoder::samplesperframe()
const
00522
{
00523
return speexdec ? speexdec->
samplesperframe() : 0;
00524 }
00525
00526
00527 WvSpeex::CodecMode
WvOggSpeexDecoder::mode()
const
00528
{
00529
return speexdec ? speexdec->
mode() : WvSpeex::NARROWBAND_MODE;
00530 }
00531
00532
00533 bool WvOggSpeexDecoder::vbr()
const
00534
{
00535
return _vbr;
00536 }
00537
00538
00539 int WvOggSpeexDecoder::nominalbitrate()
const
00540
{
00541
return _nominalbitrate;
00542 }
00543
00544
00545 bool WvOggSpeexDecoder::postfilter()
const
00546
{
00547
return speexdec ? speexdec->
postfilter() : forcepostfilter;
00548 }
00549
00550
00551 void WvOggSpeexDecoder::setpostfilter(
bool enable)
00552 {
00553 forcepostfilter = enable;
00554
if (speexdec)
00555 speexdec->
setpostfilter(enable);
00556 }