Libav
|
00001 /* 00002 * DSP Group TrueSpeech compatible decoder 00003 * Copyright (c) 2005 Konstantin Shishkov 00004 * 00005 * This file is part of FFmpeg. 00006 * 00007 * FFmpeg is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * FFmpeg is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with FFmpeg; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "libavutil/intreadwrite.h" 00023 #include "avcodec.h" 00024 00025 #include "truespeech_data.h" 00034 typedef struct { 00035 /* input data */ 00036 int16_t vector[8]; //< input vector: 5/5/4/4/4/3/3/3 00037 int offset1[2]; //< 8-bit value, used in one copying offset 00038 int offset2[4]; //< 7-bit value, encodes offsets for copying and for two-point filter 00039 int pulseoff[4]; //< 4-bit offset of pulse values block 00040 int pulsepos[4]; //< 27-bit variable, encodes 7 pulse positions 00041 int pulseval[4]; //< 7x2-bit pulse values 00042 int flag; //< 1-bit flag, shows how to choose filters 00043 /* temporary data */ 00044 int filtbuf[146]; // some big vector used for storing filters 00045 int prevfilt[8]; // filter from previous frame 00046 int16_t tmp1[8]; // coefficients for adding to out 00047 int16_t tmp2[8]; // coefficients for adding to out 00048 int16_t tmp3[8]; // coefficients for adding to out 00049 int16_t cvector[8]; // correlated input vector 00050 int filtval; // gain value for one function 00051 int16_t newvec[60]; // tmp vector 00052 int16_t filters[32]; // filters for every subframe 00053 } TSContext; 00054 00055 static av_cold int truespeech_decode_init(AVCodecContext * avctx) 00056 { 00057 // TSContext *c = avctx->priv_data; 00058 00059 avctx->sample_fmt = SAMPLE_FMT_S16; 00060 return 0; 00061 } 00062 00063 static void truespeech_read_frame(TSContext *dec, const uint8_t *input) 00064 { 00065 uint32_t t; 00066 00067 /* first dword */ 00068 t = AV_RL32(input); 00069 input += 4; 00070 00071 dec->flag = t & 1; 00072 00073 dec->vector[0] = ts_codebook[0][(t >> 1) & 0x1F]; 00074 dec->vector[1] = ts_codebook[1][(t >> 6) & 0x1F]; 00075 dec->vector[2] = ts_codebook[2][(t >> 11) & 0xF]; 00076 dec->vector[3] = ts_codebook[3][(t >> 15) & 0xF]; 00077 dec->vector[4] = ts_codebook[4][(t >> 19) & 0xF]; 00078 dec->vector[5] = ts_codebook[5][(t >> 23) & 0x7]; 00079 dec->vector[6] = ts_codebook[6][(t >> 26) & 0x7]; 00080 dec->vector[7] = ts_codebook[7][(t >> 29) & 0x7]; 00081 00082 /* second dword */ 00083 t = AV_RL32(input); 00084 input += 4; 00085 00086 dec->offset2[0] = (t >> 0) & 0x7F; 00087 dec->offset2[1] = (t >> 7) & 0x7F; 00088 dec->offset2[2] = (t >> 14) & 0x7F; 00089 dec->offset2[3] = (t >> 21) & 0x7F; 00090 00091 dec->offset1[0] = ((t >> 28) & 0xF) << 4; 00092 00093 /* third dword */ 00094 t = AV_RL32(input); 00095 input += 4; 00096 00097 dec->pulseval[0] = (t >> 0) & 0x3FFF; 00098 dec->pulseval[1] = (t >> 14) & 0x3FFF; 00099 00100 dec->offset1[1] = (t >> 28) & 0x0F; 00101 00102 /* fourth dword */ 00103 t = AV_RL32(input); 00104 input += 4; 00105 00106 dec->pulseval[2] = (t >> 0) & 0x3FFF; 00107 dec->pulseval[3] = (t >> 14) & 0x3FFF; 00108 00109 dec->offset1[1] |= ((t >> 28) & 0x0F) << 4; 00110 00111 /* fifth dword */ 00112 t = AV_RL32(input); 00113 input += 4; 00114 00115 dec->pulsepos[0] = (t >> 4) & 0x7FFFFFF; 00116 00117 dec->pulseoff[0] = (t >> 0) & 0xF; 00118 00119 dec->offset1[0] |= (t >> 31) & 1; 00120 00121 /* sixth dword */ 00122 t = AV_RL32(input); 00123 input += 4; 00124 00125 dec->pulsepos[1] = (t >> 4) & 0x7FFFFFF; 00126 00127 dec->pulseoff[1] = (t >> 0) & 0xF; 00128 00129 dec->offset1[0] |= ((t >> 31) & 1) << 1; 00130 00131 /* seventh dword */ 00132 t = AV_RL32(input); 00133 input += 4; 00134 00135 dec->pulsepos[2] = (t >> 4) & 0x7FFFFFF; 00136 00137 dec->pulseoff[2] = (t >> 0) & 0xF; 00138 00139 dec->offset1[0] |= ((t >> 31) & 1) << 2; 00140 00141 /* eighth dword */ 00142 t = AV_RL32(input); 00143 input += 4; 00144 00145 dec->pulsepos[3] = (t >> 4) & 0x7FFFFFF; 00146 00147 dec->pulseoff[3] = (t >> 0) & 0xF; 00148 00149 dec->offset1[0] |= ((t >> 31) & 1) << 3; 00150 00151 } 00152 00153 static void truespeech_correlate_filter(TSContext *dec) 00154 { 00155 int16_t tmp[8]; 00156 int i, j; 00157 00158 for(i = 0; i < 8; i++){ 00159 if(i > 0){ 00160 memcpy(tmp, dec->cvector, i * 2); 00161 for(j = 0; j < i; j++) 00162 dec->cvector[j] = ((tmp[i - j - 1] * dec->vector[i]) + 00163 (dec->cvector[j] << 15) + 0x4000) >> 15; 00164 } 00165 dec->cvector[i] = (8 - dec->vector[i]) >> 3; 00166 } 00167 for(i = 0; i < 8; i++) 00168 dec->cvector[i] = (dec->cvector[i] * ts_230[i]) >> 15; 00169 00170 dec->filtval = dec->vector[0]; 00171 } 00172 00173 static void truespeech_filters_merge(TSContext *dec) 00174 { 00175 int i; 00176 00177 if(!dec->flag){ 00178 for(i = 0; i < 8; i++){ 00179 dec->filters[i + 0] = dec->prevfilt[i]; 00180 dec->filters[i + 8] = dec->prevfilt[i]; 00181 } 00182 }else{ 00183 for(i = 0; i < 8; i++){ 00184 dec->filters[i + 0]=(dec->cvector[i] * 21846 + dec->prevfilt[i] * 10923 + 16384) >> 15; 00185 dec->filters[i + 8]=(dec->cvector[i] * 10923 + dec->prevfilt[i] * 21846 + 16384) >> 15; 00186 } 00187 } 00188 for(i = 0; i < 8; i++){ 00189 dec->filters[i + 16] = dec->cvector[i]; 00190 dec->filters[i + 24] = dec->cvector[i]; 00191 } 00192 } 00193 00194 static void truespeech_apply_twopoint_filter(TSContext *dec, int quart) 00195 { 00196 int16_t tmp[146 + 60], *ptr0, *ptr1; 00197 const int16_t *filter; 00198 int i, t, off; 00199 00200 t = dec->offset2[quart]; 00201 if(t == 127){ 00202 memset(dec->newvec, 0, 60 * 2); 00203 return; 00204 } 00205 for(i = 0; i < 146; i++) 00206 tmp[i] = dec->filtbuf[i]; 00207 off = (t / 25) + dec->offset1[quart >> 1] + 18; 00208 ptr0 = tmp + 145 - off; 00209 ptr1 = tmp + 146; 00210 filter = (const int16_t*)ts_240 + (t % 25) * 2; 00211 for(i = 0; i < 60; i++){ 00212 t = (ptr0[0] * filter[0] + ptr0[1] * filter[1] + 0x2000) >> 14; 00213 ptr0++; 00214 dec->newvec[i] = t; 00215 ptr1[i] = t; 00216 } 00217 } 00218 00219 static void truespeech_place_pulses(TSContext *dec, int16_t *out, int quart) 00220 { 00221 int16_t tmp[7]; 00222 int i, j, t; 00223 const int16_t *ptr1; 00224 int16_t *ptr2; 00225 int coef; 00226 00227 memset(out, 0, 60 * 2); 00228 for(i = 0; i < 7; i++) { 00229 t = dec->pulseval[quart] & 3; 00230 dec->pulseval[quart] >>= 2; 00231 tmp[6 - i] = ts_562[dec->pulseoff[quart] * 4 + t]; 00232 } 00233 00234 coef = dec->pulsepos[quart] >> 15; 00235 ptr1 = (const int16_t*)ts_140 + 30; 00236 ptr2 = tmp; 00237 for(i = 0, j = 3; (i < 30) && (j > 0); i++){ 00238 t = *ptr1++; 00239 if(coef >= t) 00240 coef -= t; 00241 else{ 00242 out[i] = *ptr2++; 00243 ptr1 += 30; 00244 j--; 00245 } 00246 } 00247 coef = dec->pulsepos[quart] & 0x7FFF; 00248 ptr1 = (const int16_t*)ts_140; 00249 for(i = 30, j = 4; (i < 60) && (j > 0); i++){ 00250 t = *ptr1++; 00251 if(coef >= t) 00252 coef -= t; 00253 else{ 00254 out[i] = *ptr2++; 00255 ptr1 += 30; 00256 j--; 00257 } 00258 } 00259 00260 } 00261 00262 static void truespeech_update_filters(TSContext *dec, int16_t *out, int quart) 00263 { 00264 int i; 00265 00266 for(i = 0; i < 86; i++) 00267 dec->filtbuf[i] = dec->filtbuf[i + 60]; 00268 for(i = 0; i < 60; i++){ 00269 dec->filtbuf[i + 86] = out[i] + dec->newvec[i] - (dec->newvec[i] >> 3); 00270 out[i] += dec->newvec[i]; 00271 } 00272 } 00273 00274 static void truespeech_synth(TSContext *dec, int16_t *out, int quart) 00275 { 00276 int i,k; 00277 int t[8]; 00278 int16_t *ptr0, *ptr1; 00279 00280 ptr0 = dec->tmp1; 00281 ptr1 = dec->filters + quart * 8; 00282 for(i = 0; i < 60; i++){ 00283 int sum = 0; 00284 for(k = 0; k < 8; k++) 00285 sum += ptr0[k] * ptr1[k]; 00286 sum = (sum + (out[i] << 12) + 0x800) >> 12; 00287 out[i] = av_clip(sum, -0x7FFE, 0x7FFE); 00288 for(k = 7; k > 0; k--) 00289 ptr0[k] = ptr0[k - 1]; 00290 ptr0[0] = out[i]; 00291 } 00292 00293 for(i = 0; i < 8; i++) 00294 t[i] = (ts_5E2[i] * ptr1[i]) >> 15; 00295 00296 ptr0 = dec->tmp2; 00297 for(i = 0; i < 60; i++){ 00298 int sum = 0; 00299 for(k = 0; k < 8; k++) 00300 sum += ptr0[k] * t[k]; 00301 for(k = 7; k > 0; k--) 00302 ptr0[k] = ptr0[k - 1]; 00303 ptr0[0] = out[i]; 00304 out[i] = ((out[i] << 12) - sum) >> 12; 00305 } 00306 00307 for(i = 0; i < 8; i++) 00308 t[i] = (ts_5F2[i] * ptr1[i]) >> 15; 00309 00310 ptr0 = dec->tmp3; 00311 for(i = 0; i < 60; i++){ 00312 int sum = out[i] << 12; 00313 for(k = 0; k < 8; k++) 00314 sum += ptr0[k] * t[k]; 00315 for(k = 7; k > 0; k--) 00316 ptr0[k] = ptr0[k - 1]; 00317 ptr0[0] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE); 00318 00319 sum = ((ptr0[1] * (dec->filtval - (dec->filtval >> 2))) >> 4) + sum; 00320 sum = sum - (sum >> 3); 00321 out[i] = av_clip((sum + 0x800) >> 12, -0x7FFE, 0x7FFE); 00322 } 00323 } 00324 00325 static void truespeech_save_prevvec(TSContext *c) 00326 { 00327 int i; 00328 00329 for(i = 0; i < 8; i++) 00330 c->prevfilt[i] = c->cvector[i]; 00331 } 00332 00333 static int truespeech_decode_frame(AVCodecContext *avctx, 00334 void *data, int *data_size, 00335 AVPacket *avpkt) 00336 { 00337 const uint8_t *buf = avpkt->data; 00338 int buf_size = avpkt->size; 00339 TSContext *c = avctx->priv_data; 00340 00341 int i, j; 00342 short *samples = data; 00343 int consumed = 0; 00344 int16_t out_buf[240]; 00345 int iterations; 00346 00347 if (!buf_size) 00348 return 0; 00349 00350 iterations = FFMIN(buf_size / 32, *data_size / 480); 00351 for(j = 0; j < iterations; j++) { 00352 truespeech_read_frame(c, buf + consumed); 00353 consumed += 32; 00354 00355 truespeech_correlate_filter(c); 00356 truespeech_filters_merge(c); 00357 00358 memset(out_buf, 0, 240 * 2); 00359 for(i = 0; i < 4; i++) { 00360 truespeech_apply_twopoint_filter(c, i); 00361 truespeech_place_pulses(c, out_buf + i * 60, i); 00362 truespeech_update_filters(c, out_buf + i * 60, i); 00363 truespeech_synth(c, out_buf + i * 60, i); 00364 } 00365 00366 truespeech_save_prevvec(c); 00367 00368 /* finally output decoded frame */ 00369 for(i = 0; i < 240; i++) 00370 *samples++ = out_buf[i]; 00371 00372 } 00373 00374 *data_size = consumed * 15; 00375 00376 return consumed; 00377 } 00378 00379 AVCodec truespeech_decoder = { 00380 "truespeech", 00381 AVMEDIA_TYPE_AUDIO, 00382 CODEC_ID_TRUESPEECH, 00383 sizeof(TSContext), 00384 truespeech_decode_init, 00385 NULL, 00386 NULL, 00387 truespeech_decode_frame, 00388 .long_name = NULL_IF_CONFIG_SMALL("DSP Group TrueSpeech"), 00389 };