Libav
|
00001 /* 00002 * Nellymoser encoder 00003 * This code is developed as part of Google Summer of Code 2008 Program. 00004 * 00005 * Copyright (c) 2008 Bartlomiej Wolowiec 00006 * 00007 * This file is part of FFmpeg. 00008 * 00009 * FFmpeg is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * FFmpeg is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with FFmpeg; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 00038 #include "nellymoser.h" 00039 #include "avcodec.h" 00040 #include "dsputil.h" 00041 #include "fft.h" 00042 00043 #define BITSTREAM_WRITER_LE 00044 #include "put_bits.h" 00045 00046 #define POW_TABLE_SIZE (1<<11) 00047 #define POW_TABLE_OFFSET 3 00048 #define OPT_SIZE ((1<<15) + 3000) 00049 00050 typedef struct NellyMoserEncodeContext { 00051 AVCodecContext *avctx; 00052 int last_frame; 00053 int bufsel; 00054 int have_saved; 00055 DSPContext dsp; 00056 FFTContext mdct_ctx; 00057 DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; 00058 DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; 00059 DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; 00060 float (*opt )[NELLY_BANDS]; 00061 uint8_t (*path)[NELLY_BANDS]; 00062 } NellyMoserEncodeContext; 00063 00064 static float pow_table[POW_TABLE_SIZE]; 00065 00066 static const uint8_t sf_lut[96] = { 00067 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 00068 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 00069 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 00070 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 00071 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 00072 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 00073 }; 00074 00075 static const uint8_t sf_delta_lut[78] = { 00076 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 00077 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 00078 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 00079 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 00080 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 00081 }; 00082 00083 static const uint8_t quant_lut[230] = { 00084 0, 00085 00086 0, 1, 2, 00087 00088 0, 1, 2, 3, 4, 5, 6, 00089 00090 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 00091 12, 13, 13, 13, 14, 00092 00093 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 00094 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 00095 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 00096 30, 00097 00098 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 00099 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 00100 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 00101 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 00102 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 00103 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 00104 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 00105 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 00106 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 00107 61, 61, 61, 61, 62, 00108 }; 00109 00110 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 00111 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 00112 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 00113 00114 static void apply_mdct(NellyMoserEncodeContext *s) 00115 { 00116 memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float)); 00117 s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN); 00118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, 00119 NELLY_BUF_LEN); 00120 ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); 00121 00122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN); 00123 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, 00124 NELLY_BUF_LEN); 00125 ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); 00126 } 00127 00128 static av_cold int encode_init(AVCodecContext *avctx) 00129 { 00130 NellyMoserEncodeContext *s = avctx->priv_data; 00131 int i; 00132 00133 if (avctx->channels != 1) { 00134 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n"); 00135 return -1; 00136 } 00137 00138 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && 00139 avctx->sample_rate != 11025 && 00140 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && 00141 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 00142 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n"); 00143 return -1; 00144 } 00145 00146 avctx->frame_size = NELLY_SAMPLES; 00147 s->avctx = avctx; 00148 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0); 00149 dsputil_init(&s->dsp, avctx); 00150 00151 /* Generate overlap window */ 00152 ff_sine_window_init(ff_sine_128, 128); 00153 for (i = 0; i < POW_TABLE_SIZE; i++) 00154 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); 00155 00156 if (s->avctx->trellis) { 00157 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 00158 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t)); 00159 } 00160 00161 return 0; 00162 } 00163 00164 static av_cold int encode_end(AVCodecContext *avctx) 00165 { 00166 NellyMoserEncodeContext *s = avctx->priv_data; 00167 00168 ff_mdct_end(&s->mdct_ctx); 00169 00170 if (s->avctx->trellis) { 00171 av_free(s->opt); 00172 av_free(s->path); 00173 } 00174 00175 return 0; 00176 } 00177 00178 #define find_best(val, table, LUT, LUT_add, LUT_size) \ 00179 best_idx = \ 00180 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ 00181 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ 00182 best_idx++; 00183 00184 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 00185 { 00186 int band, best_idx, power_idx = 0; 00187 float power_candidate; 00188 00189 //base exponent 00190 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); 00191 idx_table[0] = best_idx; 00192 power_idx = ff_nelly_init_table[best_idx]; 00193 00194 for (band = 1; band < NELLY_BANDS; band++) { 00195 power_candidate = cand[band] - power_idx; 00196 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 00197 idx_table[band] = best_idx; 00198 power_idx += ff_nelly_delta_table[best_idx]; 00199 } 00200 } 00201 00202 static inline float distance(float x, float y, int band) 00203 { 00204 //return pow(fabs(x-y), 2.0); 00205 float tmp = x - y; 00206 return tmp * tmp; 00207 } 00208 00209 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 00210 { 00211 int i, j, band, best_idx; 00212 float power_candidate, best_val; 00213 00214 float (*opt )[NELLY_BANDS] = s->opt ; 00215 uint8_t(*path)[NELLY_BANDS] = s->path; 00216 00217 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 00218 opt[0][i] = INFINITY; 00219 } 00220 00221 for (i = 0; i < 64; i++) { 00222 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 00223 path[0][ff_nelly_init_table[i]] = i; 00224 } 00225 00226 for (band = 1; band < NELLY_BANDS; band++) { 00227 int q, c = 0; 00228 float tmp; 00229 int idx_min, idx_max, idx; 00230 power_candidate = cand[band]; 00231 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 00232 idx_min = FFMAX(0, cand[band] - q); 00233 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q); 00234 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { 00235 if ( isinf(opt[band - 1][i]) ) 00236 continue; 00237 for (j = 0; j < 32; j++) { 00238 idx = i + ff_nelly_delta_table[j]; 00239 if (idx > idx_max) 00240 break; 00241 if (idx >= idx_min) { 00242 tmp = opt[band - 1][i] + distance(idx, power_candidate, band); 00243 if (opt[band][idx] > tmp) { 00244 opt[band][idx] = tmp; 00245 path[band][idx] = j; 00246 c = 1; 00247 } 00248 } 00249 } 00250 } 00251 } 00252 assert(c); //FIXME 00253 } 00254 00255 best_val = INFINITY; 00256 best_idx = -1; 00257 band = NELLY_BANDS - 1; 00258 for (i = 0; i < OPT_SIZE; i++) { 00259 if (best_val > opt[band][i]) { 00260 best_val = opt[band][i]; 00261 best_idx = i; 00262 } 00263 } 00264 for (band = NELLY_BANDS - 1; band >= 0; band--) { 00265 idx_table[band] = path[band][best_idx]; 00266 if (band) { 00267 best_idx -= ff_nelly_delta_table[path[band][best_idx]]; 00268 } 00269 } 00270 } 00271 00278 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 00279 { 00280 PutBitContext pb; 00281 int i, j, band, block, best_idx, power_idx = 0; 00282 float power_val, coeff, coeff_sum; 00283 float pows[NELLY_FILL_LEN]; 00284 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS]; 00285 float cand[NELLY_BANDS]; 00286 00287 apply_mdct(s); 00288 00289 init_put_bits(&pb, output, output_size * 8); 00290 00291 i = 0; 00292 for (band = 0; band < NELLY_BANDS; band++) { 00293 coeff_sum = 0; 00294 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 00295 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] 00296 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; 00297 } 00298 cand[band] = 00299 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 00300 } 00301 00302 if (s->avctx->trellis) { 00303 get_exponent_dynamic(s, cand, idx_table); 00304 } else { 00305 get_exponent_greedy(s, cand, idx_table); 00306 } 00307 00308 i = 0; 00309 for (band = 0; band < NELLY_BANDS; band++) { 00310 if (band) { 00311 power_idx += ff_nelly_delta_table[idx_table[band]]; 00312 put_bits(&pb, 5, idx_table[band]); 00313 } else { 00314 power_idx = ff_nelly_init_table[idx_table[0]]; 00315 put_bits(&pb, 6, idx_table[0]); 00316 } 00317 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 00318 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 00319 s->mdct_out[i] *= power_val; 00320 s->mdct_out[i + NELLY_BUF_LEN] *= power_val; 00321 pows[i] = power_idx; 00322 } 00323 } 00324 00325 ff_nelly_get_sample_bits(pows, bits); 00326 00327 for (block = 0; block < 2; block++) { 00328 for (i = 0; i < NELLY_FILL_LEN; i++) { 00329 if (bits[i] > 0) { 00330 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; 00331 coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; 00332 best_idx = 00333 quant_lut[av_clip ( 00334 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 00335 quant_lut_offset[bits[i]], 00336 quant_lut_offset[bits[i]+1] - 1 00337 )]; 00338 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) 00339 best_idx++; 00340 00341 put_bits(&pb, bits[i], best_idx); 00342 } 00343 } 00344 if (!block) 00345 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0); 00346 } 00347 00348 flush_put_bits(&pb); 00349 } 00350 00351 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 00352 { 00353 NellyMoserEncodeContext *s = avctx->priv_data; 00354 int16_t *samples = data; 00355 int i; 00356 00357 if (s->last_frame) 00358 return 0; 00359 00360 if (data) { 00361 for (i = 0; i < avctx->frame_size; i++) { 00362 s->buf[s->bufsel][i] = samples[i]; 00363 } 00364 for (; i < NELLY_SAMPLES; i++) { 00365 s->buf[s->bufsel][i] = 0; 00366 } 00367 s->bufsel = 1 - s->bufsel; 00368 if (!s->have_saved) { 00369 s->have_saved = 1; 00370 return 0; 00371 } 00372 } else { 00373 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); 00374 s->bufsel = 1 - s->bufsel; 00375 s->last_frame = 1; 00376 } 00377 00378 if (s->have_saved) { 00379 encode_block(s, frame, buf_size); 00380 return NELLY_BLOCK_LEN; 00381 } 00382 return 0; 00383 } 00384 00385 AVCodec nellymoser_encoder = { 00386 .name = "nellymoser", 00387 .type = AVMEDIA_TYPE_AUDIO, 00388 .id = CODEC_ID_NELLYMOSER, 00389 .priv_data_size = sizeof(NellyMoserEncodeContext), 00390 .init = encode_init, 00391 .encode = encode_frame, 00392 .close = encode_end, 00393 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 00394 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), 00395 };