Wed Aug 15 01:24:20 2007

Asterisk developer's documentation


codec_speex.c

Go to the documentation of this file.
00001 /*
00002  * Asterisk -- An open source telephony toolkit.
00003  *
00004  * Copyright (C) 1999 - 2005, Digium, Inc.
00005  *
00006  * Mark Spencer <markster@digium.com>
00007  *
00008  *
00009  * See http://www.asterisk.org for more information about
00010  * the Asterisk project. Please do not directly contact
00011  * any of the maintainers of this project for assistance;
00012  * the project provides a web site, mailing lists and IRC
00013  * channels for your use.
00014  *
00015  * This program is free software, distributed under the terms of
00016  * the GNU General Public License Version 2. See the LICENSE file
00017  * at the top of the source tree.
00018  */
00019 
00020 /*! \file
00021  *
00022  * \brief Translate between signed linear and Speex (Open Codec)
00023  *
00024  * http://www.speex.org
00025  * \note This work was motivated by Jeremy McNamara 
00026  * hacked to be configurable by anthm and bkw 9/28/2004
00027  * \ingroup codecs
00028  */
00029 
00030 /*** MODULEINFO
00031    <depend>speex</depend>
00032  ***/
00033 
00034 #include "asterisk.h"
00035 
00036 ASTERISK_FILE_VERSION(__FILE__, "$Revision: 65877 $")
00037 
00038 #include <fcntl.h>
00039 #include <stdlib.h>
00040 #include <unistd.h>
00041 #include <netinet/in.h>
00042 #include <string.h>
00043 #include <stdio.h>
00044 #include <speex/speex.h>
00045 
00046 /* We require a post 1.1.8 version of Speex to enable preprocessing
00047    and better type handling */   
00048 #ifdef _SPEEX_TYPES_H
00049 #include <speex/speex_preprocess.h>
00050 #endif
00051 
00052 #include "asterisk/lock.h"
00053 #include "asterisk/translate.h"
00054 #include "asterisk/module.h"
00055 #include "asterisk/config.h"
00056 #include "asterisk/options.h"
00057 #include "asterisk/logger.h"
00058 #include "asterisk/channel.h"
00059 #include "asterisk/utils.h"
00060 
00061 /* Sample frame data */
00062 #include "slin_speex_ex.h"
00063 #include "speex_slin_ex.h"
00064 
00065 /* codec variables */
00066 static int quality = 3;
00067 static int complexity = 2;
00068 static int enhancement = 0;
00069 static int vad = 0;
00070 static int vbr = 0;
00071 static float vbr_quality = 4;
00072 static int abr = 0;
00073 static int dtx = 0;  /* set to 1 to enable silence detection */
00074 
00075 static int preproc = 0;
00076 static int pp_vad = 0;
00077 static int pp_agc = 0;
00078 static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
00079 static int pp_denoise = 0;
00080 static int pp_dereverb = 0;
00081 static float pp_dereverb_decay = 0.4;
00082 static float pp_dereverb_level = 0.3;
00083 
00084 #define TYPE_SILENCE  0x2
00085 #define TYPE_HIGH  0x0
00086 #define TYPE_LOW   0x1
00087 #define TYPE_MASK  0x3
00088 
00089 #define  BUFFER_SAMPLES 8000
00090 #define  SPEEX_SAMPLES  160
00091 
00092 struct speex_coder_pvt {
00093    void *speex;
00094    SpeexBits bits;
00095    int framesize;
00096    int silent_state;
00097 #ifdef _SPEEX_TYPES_H
00098    SpeexPreprocessState *pp;
00099    spx_int16_t buf[BUFFER_SAMPLES];
00100 #else
00101    int16_t buf[BUFFER_SAMPLES];  /* input, waiting to be compressed */
00102 #endif
00103 };
00104 
00105 
00106 static int lintospeex_new(struct ast_trans_pvt *pvt)
00107 {
00108    struct speex_coder_pvt *tmp = pvt->pvt;
00109 
00110    if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
00111       return -1;
00112 
00113    speex_bits_init(&tmp->bits);
00114    speex_bits_reset(&tmp->bits);
00115    speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00116    speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
00117 #ifdef _SPEEX_TYPES_H
00118    if (preproc) {
00119       tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
00120       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
00121       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
00122       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
00123       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
00124       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
00125       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
00126       speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
00127    }
00128 #endif
00129    if (!abr && !vbr) {
00130       speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
00131       if (vad)
00132          speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
00133    }
00134    if (vbr) {
00135       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
00136       speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
00137    }
00138    if (abr)
00139       speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
00140    if (dtx)
00141       speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); 
00142    tmp->silent_state = 0;
00143 
00144    return 0;
00145 }
00146 
00147 static int speextolin_new(struct ast_trans_pvt *pvt)
00148 {
00149    struct speex_coder_pvt *tmp = pvt->pvt;
00150    
00151    if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
00152       return -1;
00153 
00154    speex_bits_init(&tmp->bits);
00155    speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
00156    if (enhancement)
00157       speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
00158 
00159    return 0;
00160 }
00161 
00162 static struct ast_frame *lintospeex_sample(void)
00163 {
00164    static struct ast_frame f;
00165    f.frametype = AST_FRAME_VOICE;
00166    f.subclass = AST_FORMAT_SLINEAR;
00167    f.datalen = sizeof(slin_speex_ex);
00168    /* Assume 8000 Hz */
00169    f.samples = sizeof(slin_speex_ex)/2;
00170    f.mallocd = 0;
00171    f.offset = 0;
00172    f.src = __PRETTY_FUNCTION__;
00173    f.data = slin_speex_ex;
00174    return &f;
00175 }
00176 
00177 static struct ast_frame *speextolin_sample(void)
00178 {
00179    static struct ast_frame f;
00180    f.frametype = AST_FRAME_VOICE;
00181    f.subclass = AST_FORMAT_SPEEX;
00182    f.datalen = sizeof(speex_slin_ex);
00183    /* All frames are 20 ms long */
00184    f.samples = SPEEX_SAMPLES;
00185    f.mallocd = 0;
00186    f.offset = 0;
00187    f.src = __PRETTY_FUNCTION__;
00188    f.data = speex_slin_ex;
00189    return &f;
00190 }
00191 
00192 /*! \brief convert and store into outbuf */
00193 static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00194 {
00195    struct speex_coder_pvt *tmp = pvt->pvt;
00196 
00197    /* Assuming there's space left, decode into the current buffer at
00198       the tail location.  Read in as many frames as there are */
00199    int x;
00200    int res;
00201    int16_t *dst = (int16_t *)pvt->outbuf;
00202    /* XXX fout is a temporary buffer, may have different types */
00203 #ifdef _SPEEX_TYPES_H
00204    spx_int16_t fout[1024];
00205 #else
00206    float fout[1024];
00207 #endif
00208 
00209    if (f->datalen == 0) {  /* Native PLC interpolation */
00210       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00211          ast_log(LOG_WARNING, "Out of buffer space\n");
00212          return -1;
00213       }
00214 #ifdef _SPEEX_TYPES_H
00215       speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
00216 #else
00217       speex_decode(tmp->speex, NULL, fout);
00218       for (x=0;x<tmp->framesize;x++) {
00219          dst[pvt->samples + x] = (int16_t)fout[x];
00220       }
00221 #endif
00222       pvt->samples += tmp->framesize;
00223       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00224       return 0;
00225    }
00226 
00227    /* Read in bits */
00228    speex_bits_read_from(&tmp->bits, f->data, f->datalen);
00229    for (;;) {
00230 #ifdef _SPEEX_TYPES_H
00231       res = speex_decode_int(tmp->speex, &tmp->bits, fout);
00232 #else
00233       res = speex_decode(tmp->speex, &tmp->bits, fout);
00234 #endif
00235       if (res < 0)
00236          break;
00237       if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
00238          ast_log(LOG_WARNING, "Out of buffer space\n");
00239          return -1;
00240       }
00241       for (x = 0 ; x < tmp->framesize; x++)
00242          dst[pvt->samples + x] = (int16_t)fout[x];
00243       pvt->samples += tmp->framesize;
00244       pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
00245    }
00246    return 0;
00247 }
00248 
00249 /*! \brief store input frame in work buffer */
00250 static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
00251 {
00252    struct speex_coder_pvt *tmp = pvt->pvt;
00253 
00254    /* XXX We should look at how old the rest of our stream is, and if it
00255       is too old, then we should overwrite it entirely, otherwise we can
00256       get artifacts of earlier talk that do not belong */
00257    memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
00258    pvt->samples += f->samples;
00259    return 0;
00260 }
00261 
00262 /*! \brief convert work buffer and produce output frame */
00263 static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
00264 {
00265    struct speex_coder_pvt *tmp = pvt->pvt;
00266    int is_speech=1;
00267    int datalen = 0;  /* output bytes */
00268    int samples = 0;  /* output samples */
00269 
00270    /* We can't work on anything less than a frame in size */
00271    if (pvt->samples < tmp->framesize)
00272       return NULL;
00273    speex_bits_reset(&tmp->bits);
00274    while (pvt->samples >= tmp->framesize) {
00275 #ifdef _SPEEX_TYPES_H
00276       /* Preprocess audio */
00277       if (preproc)
00278          is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
00279       /* Encode a frame of data */
00280       if (is_speech) {
00281          /* If DTX enabled speex_encode returns 0 during silence */
00282          is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
00283       } else {
00284          /* 5 zeros interpreted by Speex as silence (submode 0) */
00285          speex_bits_pack(&tmp->bits, 0, 5);
00286       }
00287 #else
00288       {
00289          float fbuf[1024];
00290          int x;
00291          /* Convert to floating point */
00292          for (x = 0; x < tmp->framesize; x++)
00293             fbuf[x] = tmp->buf[samples + x];
00294          /* Encode a frame of data */
00295          is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
00296       }
00297 #endif
00298       samples += tmp->framesize;
00299       pvt->samples -= tmp->framesize;
00300    }
00301 
00302    /* Move the data at the end of the buffer to the front */
00303    if (pvt->samples)
00304       memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
00305 
00306    /* Use AST_FRAME_CNG to signify the start of any silence period */
00307    if (is_speech) {
00308       tmp->silent_state = 0;
00309    } else {
00310       if (tmp->silent_state) {
00311          return NULL;
00312       } else {
00313          tmp->silent_state = 1;
00314          speex_bits_reset(&tmp->bits);
00315          memset(&pvt->f, 0, sizeof(pvt->f));
00316          pvt->f.frametype = AST_FRAME_CNG;
00317          pvt->f.samples = samples;
00318          /* XXX what now ? format etc... */
00319       }
00320    }
00321 
00322    /* Terminate bit stream */
00323    speex_bits_pack(&tmp->bits, 15, 5);
00324    datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
00325    return ast_trans_frameout(pvt, datalen, samples);
00326 }
00327 
00328 static void speextolin_destroy(struct ast_trans_pvt *arg)
00329 {
00330    struct speex_coder_pvt *pvt = arg->pvt;
00331 
00332    speex_decoder_destroy(pvt->speex);
00333    speex_bits_destroy(&pvt->bits);
00334 }
00335 
00336 static void lintospeex_destroy(struct ast_trans_pvt *arg)
00337 {
00338    struct speex_coder_pvt *pvt = arg->pvt;
00339 #ifdef _SPEEX_TYPES_H
00340    if (preproc)
00341       speex_preprocess_state_destroy(pvt->pp);
00342 #endif
00343    speex_encoder_destroy(pvt->speex);
00344    speex_bits_destroy(&pvt->bits);
00345 }
00346 
00347 static struct ast_translator speextolin = {
00348    .name = "speextolin", 
00349    .srcfmt = AST_FORMAT_SPEEX,
00350    .dstfmt =  AST_FORMAT_SLINEAR,
00351    .newpvt = speextolin_new,
00352    .framein = speextolin_framein,
00353    .destroy = speextolin_destroy,
00354    .sample = speextolin_sample,
00355    .desc_size = sizeof(struct speex_coder_pvt),
00356    .buffer_samples = BUFFER_SAMPLES,
00357    .buf_size = BUFFER_SAMPLES * 2,
00358    .native_plc = 1,
00359 };
00360 
00361 static struct ast_translator lintospeex = {
00362    .name = "lintospeex", 
00363    .srcfmt = AST_FORMAT_SLINEAR,
00364    .dstfmt = AST_FORMAT_SPEEX,
00365    .newpvt = lintospeex_new,
00366    .framein = lintospeex_framein,
00367    .frameout = lintospeex_frameout,
00368    .destroy = lintospeex_destroy,
00369    .sample = lintospeex_sample,
00370    .desc_size = sizeof(struct speex_coder_pvt),
00371    .buffer_samples = BUFFER_SAMPLES,
00372    .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
00373 };
00374 
00375 static void parse_config(void) 
00376 {
00377    struct ast_config *cfg = ast_config_load("codecs.conf");
00378    struct ast_variable *var;
00379    int res;
00380    float res_f;
00381 
00382    if (cfg == NULL)
00383       return;
00384 
00385    for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
00386       if (!strcasecmp(var->name, "quality")) {
00387          res = abs(atoi(var->value));
00388          if (res > -1 && res < 11) {
00389             if (option_verbose > 2)
00390                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Quality to %d\n",res);
00391             quality = res;
00392          } else 
00393             ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
00394       } else if (!strcasecmp(var->name, "complexity")) {
00395          res = abs(atoi(var->value));
00396          if (res > -1 && res < 11) {
00397             if (option_verbose > 2)
00398                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting Complexity to %d\n",res);
00399             complexity = res;
00400          } else 
00401             ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
00402       } else if (!strcasecmp(var->name, "vbr_quality")) {
00403          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
00404             if (option_verbose > 2)
00405                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
00406             vbr_quality = res_f;
00407          } else
00408             ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
00409       } else if (!strcasecmp(var->name, "abr_quality")) {
00410          ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
00411       } else if (!strcasecmp(var->name, "enhancement")) {
00412          enhancement = ast_true(var->value) ? 1 : 0;
00413          if (option_verbose > 2)
00414             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
00415       } else if (!strcasecmp(var->name, "vbr")) {
00416          vbr = ast_true(var->value) ? 1 : 0;
00417          if (option_verbose > 2)
00418             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
00419       } else if (!strcasecmp(var->name, "abr")) {
00420          res = abs(atoi(var->value));
00421          if (res >= 0) {
00422             if (option_verbose > 2) {
00423                if (res > 0)
00424                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
00425                else
00426                   ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Disabling ABR\n");
00427             }
00428             abr = res;
00429          } else 
00430             ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
00431       } else if (!strcasecmp(var->name, "vad")) {
00432          vad = ast_true(var->value) ? 1 : 0;
00433          if (option_verbose > 2)
00434             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
00435       } else if (!strcasecmp(var->name, "dtx")) {
00436          dtx = ast_true(var->value) ? 1 : 0;
00437          if (option_verbose > 2)
00438             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
00439       } else if (!strcasecmp(var->name, "preprocess")) {
00440          preproc = ast_true(var->value) ? 1 : 0;
00441          if (option_verbose > 2)
00442             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
00443       } else if (!strcasecmp(var->name, "pp_vad")) {
00444          pp_vad = ast_true(var->value) ? 1 : 0;
00445          if (option_verbose > 2)
00446             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
00447       } else if (!strcasecmp(var->name, "pp_agc")) {
00448          pp_agc = ast_true(var->value) ? 1 : 0;
00449          if (option_verbose > 2)
00450             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
00451       } else if (!strcasecmp(var->name, "pp_agc_level")) {
00452          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00453             if (option_verbose > 2)
00454                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
00455             pp_agc_level = res_f;
00456          } else
00457             ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
00458       } else if (!strcasecmp(var->name, "pp_denoise")) {
00459          pp_denoise = ast_true(var->value) ? 1 : 0;
00460          if (option_verbose > 2)
00461             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
00462       } else if (!strcasecmp(var->name, "pp_dereverb")) {
00463          pp_dereverb = ast_true(var->value) ? 1 : 0;
00464          if (option_verbose > 2)
00465             ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
00466       } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
00467          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00468             if (option_verbose > 2)
00469                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
00470             pp_dereverb_decay = res_f;
00471          } else
00472             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
00473       } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
00474          if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
00475             if (option_verbose > 2)
00476                ast_verbose(VERBOSE_PREFIX_3 "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
00477             pp_dereverb_level = res_f;
00478          } else
00479             ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
00480       }
00481    }
00482    ast_config_destroy(cfg);
00483 }
00484 
00485 static int reload(void) 
00486 {
00487    parse_config();
00488 
00489    return 0;
00490 }
00491 
00492 static int unload_module(void)
00493 {
00494    int res;
00495 
00496    res = ast_unregister_translator(&lintospeex);
00497    res |= ast_unregister_translator(&speextolin);
00498 
00499    return res;
00500 }
00501 
00502 static int load_module(void)
00503 {
00504    int res;
00505 
00506    parse_config();
00507    res=ast_register_translator(&speextolin);
00508    if (!res) 
00509       res=ast_register_translator(&lintospeex);
00510    else
00511       ast_unregister_translator(&speextolin);
00512 
00513    return res;
00514 }
00515 
00516 AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
00517       .load = load_module,
00518       .unload = unload_module,
00519       .reload = reload,
00520           );

Generated on Wed Aug 15 01:24:20 2007 for Asterisk - the Open Source PBX by  doxygen 1.5.3