Libav
|
00001 /* 00002 * audio encoder psychoacoustic model 00003 * Copyright (C) 2008 Konstantin Shishkov 00004 * 00005 * This file is part of FFmpeg. 00006 * 00007 * FFmpeg is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * FFmpeg is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with FFmpeg; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #ifndef AVCODEC_PSYMODEL_H 00023 #define AVCODEC_PSYMODEL_H 00024 00025 #include "avcodec.h" 00026 00028 #define PSY_MAX_BANDS 128 00029 00033 typedef struct FFPsyBand { 00034 int bits; 00035 float energy; 00036 float threshold; 00037 float distortion; 00038 float perceptual_weight; 00039 } FFPsyBand; 00040 00044 typedef struct FFPsyWindowInfo { 00045 int window_type[3]; 00046 int window_shape; 00047 int num_windows; 00048 int grouping[8]; 00049 int *window_sizes; 00050 } FFPsyWindowInfo; 00051 00055 typedef struct FFPsyContext { 00056 AVCodecContext *avctx; 00057 const struct FFPsyModel *model; 00058 00059 FFPsyBand *psy_bands; 00060 00061 uint8_t **bands; 00062 int *num_bands; 00063 int num_lens; 00064 00065 void* model_priv_data; 00066 } FFPsyContext; 00067 00071 typedef struct FFPsyModel { 00072 const char *name; 00073 int (*init) (FFPsyContext *apc); 00074 FFPsyWindowInfo (*window)(FFPsyContext *ctx, const int16_t *audio, const int16_t *la, int channel, int prev_type); 00075 void (*analyze)(FFPsyContext *ctx, int channel, const float *coeffs, FFPsyWindowInfo *wi); 00076 void (*end) (FFPsyContext *apc); 00077 } FFPsyModel; 00078 00090 av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, 00091 int num_lens, 00092 const uint8_t **bands, const int* num_bands); 00093 00105 FFPsyWindowInfo ff_psy_suggest_window(FFPsyContext *ctx, 00106 const int16_t *audio, const int16_t *la, 00107 int channel, int prev_type); 00108 00109 00118 void ff_psy_set_band_info(FFPsyContext *ctx, int channel, const float *coeffs, 00119 FFPsyWindowInfo *wi); 00120 00126 av_cold void ff_psy_end(FFPsyContext *ctx); 00127 00128 00129 /************************************************************************** 00130 * Audio preprocessing stuff. * 00131 * This should be moved into some audio filter eventually. * 00132 **************************************************************************/ 00133 struct FFPsyPreprocessContext; 00134 00138 av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *avctx); 00139 00149 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, 00150 const int16_t *audio, int16_t *dest, 00151 int tag, int channels); 00152 00156 av_cold void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); 00157 00158 #endif /* AVCODEC_PSYMODEL_H */