Libav 0.7.1
|
00001 /* 00002 * Nellymoser encoder 00003 * This code is developed as part of Google Summer of Code 2008 Program. 00004 * 00005 * Copyright (c) 2008 Bartlomiej Wolowiec 00006 * 00007 * This file is part of Libav. 00008 * 00009 * Libav is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * Libav is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with Libav; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00022 */ 00023 00038 #include "nellymoser.h" 00039 #include "avcodec.h" 00040 #include "dsputil.h" 00041 #include "fft.h" 00042 #include "sinewin.h" 00043 00044 #define BITSTREAM_WRITER_LE 00045 #include "put_bits.h" 00046 00047 #define POW_TABLE_SIZE (1<<11) 00048 #define POW_TABLE_OFFSET 3 00049 #define OPT_SIZE ((1<<15) + 3000) 00050 00051 typedef struct NellyMoserEncodeContext { 00052 AVCodecContext *avctx; 00053 int last_frame; 00054 int bufsel; 00055 int have_saved; 00056 DSPContext dsp; 00057 FFTContext mdct_ctx; 00058 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES]; 00059 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES]; 00060 DECLARE_ALIGNED(32, float, buf)[2][3 * NELLY_BUF_LEN]; 00061 float (*opt )[NELLY_BANDS]; 00062 uint8_t (*path)[NELLY_BANDS]; 00063 } NellyMoserEncodeContext; 00064 00065 static float pow_table[POW_TABLE_SIZE]; 00066 00067 static const uint8_t sf_lut[96] = { 00068 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 00069 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 00070 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 00071 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 00072 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 00073 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 00074 }; 00075 00076 static const uint8_t sf_delta_lut[78] = { 00077 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 00078 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 00079 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 00080 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 00081 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 00082 }; 00083 00084 static const uint8_t quant_lut[230] = { 00085 0, 00086 00087 0, 1, 2, 00088 00089 0, 1, 2, 3, 4, 5, 6, 00090 00091 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 00092 12, 13, 13, 13, 14, 00093 00094 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 00095 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 00096 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 00097 30, 00098 00099 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 00100 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 00101 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 00102 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 00103 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 00104 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 00105 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 00106 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 00107 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 00108 61, 61, 61, 61, 62, 00109 }; 00110 00111 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 00112 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 00113 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 00114 00115 static void apply_mdct(NellyMoserEncodeContext *s) 00116 { 00117 s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN); 00118 s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, 00119 NELLY_BUF_LEN); 00120 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); 00121 00122 s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, 00123 ff_sine_128, NELLY_BUF_LEN); 00124 s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, 00125 NELLY_BUF_LEN); 00126 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); 00127 } 00128 00129 static av_cold int encode_init(AVCodecContext *avctx) 00130 { 00131 NellyMoserEncodeContext *s = avctx->priv_data; 00132 int i; 00133 00134 if (avctx->channels != 1) { 00135 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n"); 00136 return -1; 00137 } 00138 00139 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && 00140 avctx->sample_rate != 11025 && 00141 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && 00142 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 00143 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n"); 00144 return -1; 00145 } 00146 00147 avctx->frame_size = NELLY_SAMPLES; 00148 s->avctx = avctx; 00149 ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0); 00150 dsputil_init(&s->dsp, avctx); 00151 00152 /* Generate overlap window */ 00153 ff_sine_window_init(ff_sine_128, 128); 00154 for (i = 0; i < POW_TABLE_SIZE; i++) 00155 pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); 00156 00157 if (s->avctx->trellis) { 00158 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 00159 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t)); 00160 } 00161 00162 return 0; 00163 } 00164 00165 static av_cold int encode_end(AVCodecContext *avctx) 00166 { 00167 NellyMoserEncodeContext *s = avctx->priv_data; 00168 00169 ff_mdct_end(&s->mdct_ctx); 00170 00171 if (s->avctx->trellis) { 00172 av_free(s->opt); 00173 av_free(s->path); 00174 } 00175 00176 return 0; 00177 } 00178 00179 #define find_best(val, table, LUT, LUT_add, LUT_size) \ 00180 best_idx = \ 00181 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ 00182 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ 00183 best_idx++; 00184 00185 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 00186 { 00187 int band, best_idx, power_idx = 0; 00188 float power_candidate; 00189 00190 //base exponent 00191 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); 00192 idx_table[0] = best_idx; 00193 power_idx = ff_nelly_init_table[best_idx]; 00194 00195 for (band = 1; band < NELLY_BANDS; band++) { 00196 power_candidate = cand[band] - power_idx; 00197 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 00198 idx_table[band] = best_idx; 00199 power_idx += ff_nelly_delta_table[best_idx]; 00200 } 00201 } 00202 00203 static inline float distance(float x, float y, int band) 00204 { 00205 //return pow(fabs(x-y), 2.0); 00206 float tmp = x - y; 00207 return tmp * tmp; 00208 } 00209 00210 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 00211 { 00212 int i, j, band, best_idx; 00213 float power_candidate, best_val; 00214 00215 float (*opt )[NELLY_BANDS] = s->opt ; 00216 uint8_t(*path)[NELLY_BANDS] = s->path; 00217 00218 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 00219 opt[0][i] = INFINITY; 00220 } 00221 00222 for (i = 0; i < 64; i++) { 00223 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 00224 path[0][ff_nelly_init_table[i]] = i; 00225 } 00226 00227 for (band = 1; band < NELLY_BANDS; band++) { 00228 int q, c = 0; 00229 float tmp; 00230 int idx_min, idx_max, idx; 00231 power_candidate = cand[band]; 00232 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 00233 idx_min = FFMAX(0, cand[band] - q); 00234 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q); 00235 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { 00236 if ( isinf(opt[band - 1][i]) ) 00237 continue; 00238 for (j = 0; j < 32; j++) { 00239 idx = i + ff_nelly_delta_table[j]; 00240 if (idx > idx_max) 00241 break; 00242 if (idx >= idx_min) { 00243 tmp = opt[band - 1][i] + distance(idx, power_candidate, band); 00244 if (opt[band][idx] > tmp) { 00245 opt[band][idx] = tmp; 00246 path[band][idx] = j; 00247 c = 1; 00248 } 00249 } 00250 } 00251 } 00252 } 00253 assert(c); //FIXME 00254 } 00255 00256 best_val = INFINITY; 00257 best_idx = -1; 00258 band = NELLY_BANDS - 1; 00259 for (i = 0; i < OPT_SIZE; i++) { 00260 if (best_val > opt[band][i]) { 00261 best_val = opt[band][i]; 00262 best_idx = i; 00263 } 00264 } 00265 for (band = NELLY_BANDS - 1; band >= 0; band--) { 00266 idx_table[band] = path[band][best_idx]; 00267 if (band) { 00268 best_idx -= ff_nelly_delta_table[path[band][best_idx]]; 00269 } 00270 } 00271 } 00272 00279 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 00280 { 00281 PutBitContext pb; 00282 int i, j, band, block, best_idx, power_idx = 0; 00283 float power_val, coeff, coeff_sum; 00284 float pows[NELLY_FILL_LEN]; 00285 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS]; 00286 float cand[NELLY_BANDS]; 00287 00288 apply_mdct(s); 00289 00290 init_put_bits(&pb, output, output_size * 8); 00291 00292 i = 0; 00293 for (band = 0; band < NELLY_BANDS; band++) { 00294 coeff_sum = 0; 00295 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 00296 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] 00297 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; 00298 } 00299 cand[band] = 00300 log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 00301 } 00302 00303 if (s->avctx->trellis) { 00304 get_exponent_dynamic(s, cand, idx_table); 00305 } else { 00306 get_exponent_greedy(s, cand, idx_table); 00307 } 00308 00309 i = 0; 00310 for (band = 0; band < NELLY_BANDS; band++) { 00311 if (band) { 00312 power_idx += ff_nelly_delta_table[idx_table[band]]; 00313 put_bits(&pb, 5, idx_table[band]); 00314 } else { 00315 power_idx = ff_nelly_init_table[idx_table[0]]; 00316 put_bits(&pb, 6, idx_table[0]); 00317 } 00318 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 00319 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 00320 s->mdct_out[i] *= power_val; 00321 s->mdct_out[i + NELLY_BUF_LEN] *= power_val; 00322 pows[i] = power_idx; 00323 } 00324 } 00325 00326 ff_nelly_get_sample_bits(pows, bits); 00327 00328 for (block = 0; block < 2; block++) { 00329 for (i = 0; i < NELLY_FILL_LEN; i++) { 00330 if (bits[i] > 0) { 00331 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; 00332 coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; 00333 best_idx = 00334 quant_lut[av_clip ( 00335 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 00336 quant_lut_offset[bits[i]], 00337 quant_lut_offset[bits[i]+1] - 1 00338 )]; 00339 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) 00340 best_idx++; 00341 00342 put_bits(&pb, bits[i], best_idx); 00343 } 00344 } 00345 if (!block) 00346 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0); 00347 } 00348 00349 flush_put_bits(&pb); 00350 } 00351 00352 static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 00353 { 00354 NellyMoserEncodeContext *s = avctx->priv_data; 00355 const int16_t *samples = data; 00356 int i; 00357 00358 if (s->last_frame) 00359 return 0; 00360 00361 if (data) { 00362 for (i = 0; i < avctx->frame_size; i++) { 00363 s->buf[s->bufsel][i] = samples[i]; 00364 } 00365 for (; i < NELLY_SAMPLES; i++) { 00366 s->buf[s->bufsel][i] = 0; 00367 } 00368 s->bufsel = 1 - s->bufsel; 00369 if (!s->have_saved) { 00370 s->have_saved = 1; 00371 return 0; 00372 } 00373 } else { 00374 memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); 00375 s->bufsel = 1 - s->bufsel; 00376 s->last_frame = 1; 00377 } 00378 00379 if (s->have_saved) { 00380 encode_block(s, frame, buf_size); 00381 return NELLY_BLOCK_LEN; 00382 } 00383 return 0; 00384 } 00385 00386 AVCodec ff_nellymoser_encoder = { 00387 .name = "nellymoser", 00388 .type = AVMEDIA_TYPE_AUDIO, 00389 .id = CODEC_ID_NELLYMOSER, 00390 .priv_data_size = sizeof(NellyMoserEncodeContext), 00391 .init = encode_init, 00392 .encode = encode_frame, 00393 .close = encode_end, 00394 .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, 00395 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), 00396 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, 00397 };