Libav
|
00001 /* 00002 * (c) 2002 Fabrice Bellard 00003 * 00004 * This file is part of FFmpeg. 00005 * 00006 * FFmpeg is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * FFmpeg is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with FFmpeg; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 */ 00020 00026 #include "libavutil/mathematics.h" 00027 #include "libavutil/lfg.h" 00028 #include "libavutil/log.h" 00029 #include "fft.h" 00030 #include <math.h> 00031 #include <unistd.h> 00032 #include <sys/time.h> 00033 #include <stdlib.h> 00034 #include <string.h> 00035 00036 #undef exit 00037 00038 /* reference fft */ 00039 00040 #define MUL16(a,b) ((a) * (b)) 00041 00042 #define CMAC(pre, pim, are, aim, bre, bim) \ 00043 {\ 00044 pre += (MUL16(are, bre) - MUL16(aim, bim));\ 00045 pim += (MUL16(are, bim) + MUL16(bre, aim));\ 00046 } 00047 00048 FFTComplex *exptab; 00049 00050 static void fft_ref_init(int nbits, int inverse) 00051 { 00052 int n, i; 00053 double c1, s1, alpha; 00054 00055 n = 1 << nbits; 00056 exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 00057 00058 for (i = 0; i < (n/2); i++) { 00059 alpha = 2 * M_PI * (float)i / (float)n; 00060 c1 = cos(alpha); 00061 s1 = sin(alpha); 00062 if (!inverse) 00063 s1 = -s1; 00064 exptab[i].re = c1; 00065 exptab[i].im = s1; 00066 } 00067 } 00068 00069 static void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits) 00070 { 00071 int n, i, j, k, n2; 00072 double tmp_re, tmp_im, s, c; 00073 FFTComplex *q; 00074 00075 n = 1 << nbits; 00076 n2 = n >> 1; 00077 for (i = 0; i < n; i++) { 00078 tmp_re = 0; 00079 tmp_im = 0; 00080 q = tab; 00081 for (j = 0; j < n; j++) { 00082 k = (i * j) & (n - 1); 00083 if (k >= n2) { 00084 c = -exptab[k - n2].re; 00085 s = -exptab[k - n2].im; 00086 } else { 00087 c = exptab[k].re; 00088 s = exptab[k].im; 00089 } 00090 CMAC(tmp_re, tmp_im, c, s, q->re, q->im); 00091 q++; 00092 } 00093 tabr[i].re = tmp_re; 00094 tabr[i].im = tmp_im; 00095 } 00096 } 00097 00098 static void imdct_ref(float *out, float *in, int nbits) 00099 { 00100 int n = 1<<nbits; 00101 int k, i, a; 00102 double sum, f; 00103 00104 for (i = 0; i < n; i++) { 00105 sum = 0; 00106 for (k = 0; k < n/2; k++) { 00107 a = (2 * i + 1 + (n / 2)) * (2 * k + 1); 00108 f = cos(M_PI * a / (double)(2 * n)); 00109 sum += f * in[k]; 00110 } 00111 out[i] = -sum; 00112 } 00113 } 00114 00115 /* NOTE: no normalisation by 1 / N is done */ 00116 static void mdct_ref(float *output, float *input, int nbits) 00117 { 00118 int n = 1<<nbits; 00119 int k, i; 00120 double a, s; 00121 00122 /* do it by hand */ 00123 for (k = 0; k < n/2; k++) { 00124 s = 0; 00125 for (i = 0; i < n; i++) { 00126 a = (2*M_PI*(2*i+1+n/2)*(2*k+1) / (4 * n)); 00127 s += input[i] * cos(a); 00128 } 00129 output[k] = s; 00130 } 00131 } 00132 00133 static void idct_ref(float *output, float *input, int nbits) 00134 { 00135 int n = 1<<nbits; 00136 int k, i; 00137 double a, s; 00138 00139 /* do it by hand */ 00140 for (i = 0; i < n; i++) { 00141 s = 0.5 * input[0]; 00142 for (k = 1; k < n; k++) { 00143 a = M_PI*k*(i+0.5) / n; 00144 s += input[k] * cos(a); 00145 } 00146 output[i] = 2 * s / n; 00147 } 00148 } 00149 static void dct_ref(float *output, float *input, int nbits) 00150 { 00151 int n = 1<<nbits; 00152 int k, i; 00153 double a, s; 00154 00155 /* do it by hand */ 00156 for (k = 0; k < n; k++) { 00157 s = 0; 00158 for (i = 0; i < n; i++) { 00159 a = M_PI*k*(i+0.5) / n; 00160 s += input[i] * cos(a); 00161 } 00162 output[k] = s; 00163 } 00164 } 00165 00166 00167 static float frandom(AVLFG *prng) 00168 { 00169 return (int16_t)av_lfg_get(prng) / 32768.0; 00170 } 00171 00172 static int64_t gettime(void) 00173 { 00174 struct timeval tv; 00175 gettimeofday(&tv,NULL); 00176 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; 00177 } 00178 00179 static void check_diff(float *tab1, float *tab2, int n, double scale) 00180 { 00181 int i; 00182 double max= 0; 00183 double error= 0; 00184 00185 for (i = 0; i < n; i++) { 00186 double e= fabsf(tab1[i] - (tab2[i] / scale)); 00187 if (e >= 1e-3) { 00188 av_log(NULL, AV_LOG_ERROR, "ERROR %d: %f %f\n", 00189 i, tab1[i], tab2[i]); 00190 } 00191 error+= e*e; 00192 if(e>max) max= e; 00193 } 00194 av_log(NULL, AV_LOG_INFO, "max:%f e:%g\n", max, sqrt(error)/n); 00195 } 00196 00197 00198 static void help(void) 00199 { 00200 av_log(NULL, AV_LOG_INFO,"usage: fft-test [-h] [-s] [-i] [-n b]\n" 00201 "-h print this help\n" 00202 "-s speed test\n" 00203 "-m (I)MDCT test\n" 00204 "-d (I)DCT test\n" 00205 "-r (I)RDFT test\n" 00206 "-i inverse transform test\n" 00207 "-n b set the transform size to 2^b\n" 00208 "-f x set scale factor for output data of (I)MDCT to x\n" 00209 ); 00210 exit(1); 00211 } 00212 00213 enum tf_transform { 00214 TRANSFORM_FFT, 00215 TRANSFORM_MDCT, 00216 TRANSFORM_RDFT, 00217 TRANSFORM_DCT, 00218 }; 00219 00220 int main(int argc, char **argv) 00221 { 00222 FFTComplex *tab, *tab1, *tab_ref; 00223 FFTSample *tab2; 00224 int it, i, c; 00225 int do_speed = 0; 00226 enum tf_transform transform = TRANSFORM_FFT; 00227 int do_inverse = 0; 00228 FFTContext s1, *s = &s1; 00229 FFTContext m1, *m = &m1; 00230 RDFTContext r1, *r = &r1; 00231 DCTContext d1, *d = &d1; 00232 int fft_nbits, fft_size, fft_size_2; 00233 double scale = 1.0; 00234 AVLFG prng; 00235 av_lfg_init(&prng, 1); 00236 00237 fft_nbits = 9; 00238 for(;;) { 00239 c = getopt(argc, argv, "hsimrdn:f:"); 00240 if (c == -1) 00241 break; 00242 switch(c) { 00243 case 'h': 00244 help(); 00245 break; 00246 case 's': 00247 do_speed = 1; 00248 break; 00249 case 'i': 00250 do_inverse = 1; 00251 break; 00252 case 'm': 00253 transform = TRANSFORM_MDCT; 00254 break; 00255 case 'r': 00256 transform = TRANSFORM_RDFT; 00257 break; 00258 case 'd': 00259 transform = TRANSFORM_DCT; 00260 break; 00261 case 'n': 00262 fft_nbits = atoi(optarg); 00263 break; 00264 case 'f': 00265 scale = atof(optarg); 00266 break; 00267 } 00268 } 00269 00270 fft_size = 1 << fft_nbits; 00271 fft_size_2 = fft_size >> 1; 00272 tab = av_malloc(fft_size * sizeof(FFTComplex)); 00273 tab1 = av_malloc(fft_size * sizeof(FFTComplex)); 00274 tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); 00275 tab2 = av_malloc(fft_size * sizeof(FFTSample)); 00276 00277 switch (transform) { 00278 case TRANSFORM_MDCT: 00279 av_log(NULL, AV_LOG_INFO,"Scale factor is set to %f\n", scale); 00280 if (do_inverse) 00281 av_log(NULL, AV_LOG_INFO,"IMDCT"); 00282 else 00283 av_log(NULL, AV_LOG_INFO,"MDCT"); 00284 ff_mdct_init(m, fft_nbits, do_inverse, scale); 00285 break; 00286 case TRANSFORM_FFT: 00287 if (do_inverse) 00288 av_log(NULL, AV_LOG_INFO,"IFFT"); 00289 else 00290 av_log(NULL, AV_LOG_INFO,"FFT"); 00291 ff_fft_init(s, fft_nbits, do_inverse); 00292 fft_ref_init(fft_nbits, do_inverse); 00293 break; 00294 case TRANSFORM_RDFT: 00295 if (do_inverse) 00296 av_log(NULL, AV_LOG_INFO,"IDFT_C2R"); 00297 else 00298 av_log(NULL, AV_LOG_INFO,"DFT_R2C"); 00299 ff_rdft_init(r, fft_nbits, do_inverse ? IDFT_C2R : DFT_R2C); 00300 fft_ref_init(fft_nbits, do_inverse); 00301 break; 00302 case TRANSFORM_DCT: 00303 if (do_inverse) 00304 av_log(NULL, AV_LOG_INFO,"DCT_III"); 00305 else 00306 av_log(NULL, AV_LOG_INFO,"DCT_II"); 00307 ff_dct_init(d, fft_nbits, do_inverse ? DCT_III : DCT_II); 00308 break; 00309 } 00310 av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); 00311 00312 /* generate random data */ 00313 00314 for (i = 0; i < fft_size; i++) { 00315 tab1[i].re = frandom(&prng); 00316 tab1[i].im = frandom(&prng); 00317 } 00318 00319 /* checking result */ 00320 av_log(NULL, AV_LOG_INFO,"Checking...\n"); 00321 00322 switch (transform) { 00323 case TRANSFORM_MDCT: 00324 if (do_inverse) { 00325 imdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); 00326 ff_imdct_calc(m, tab2, (float *)tab1); 00327 check_diff((float *)tab_ref, tab2, fft_size, scale); 00328 } else { 00329 mdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); 00330 00331 ff_mdct_calc(m, tab2, (float *)tab1); 00332 00333 check_diff((float *)tab_ref, tab2, fft_size / 2, scale); 00334 } 00335 break; 00336 case TRANSFORM_FFT: 00337 memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); 00338 ff_fft_permute(s, tab); 00339 ff_fft_calc(s, tab); 00340 00341 fft_ref(tab_ref, tab1, fft_nbits); 00342 check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 1.0); 00343 break; 00344 case TRANSFORM_RDFT: 00345 if (do_inverse) { 00346 tab1[ 0].im = 0; 00347 tab1[fft_size_2].im = 0; 00348 for (i = 1; i < fft_size_2; i++) { 00349 tab1[fft_size_2+i].re = tab1[fft_size_2-i].re; 00350 tab1[fft_size_2+i].im = -tab1[fft_size_2-i].im; 00351 } 00352 00353 memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); 00354 tab2[1] = tab1[fft_size_2].re; 00355 00356 ff_rdft_calc(r, tab2); 00357 fft_ref(tab_ref, tab1, fft_nbits); 00358 for (i = 0; i < fft_size; i++) { 00359 tab[i].re = tab2[i]; 00360 tab[i].im = 0; 00361 } 00362 check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 0.5); 00363 } else { 00364 for (i = 0; i < fft_size; i++) { 00365 tab2[i] = tab1[i].re; 00366 tab1[i].im = 0; 00367 } 00368 ff_rdft_calc(r, tab2); 00369 fft_ref(tab_ref, tab1, fft_nbits); 00370 tab_ref[0].im = tab_ref[fft_size_2].re; 00371 check_diff((float *)tab_ref, (float *)tab2, fft_size, 1.0); 00372 } 00373 break; 00374 case TRANSFORM_DCT: 00375 memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); 00376 ff_dct_calc(d, tab); 00377 if (do_inverse) { 00378 idct_ref(tab_ref, tab1, fft_nbits); 00379 } else { 00380 dct_ref(tab_ref, tab1, fft_nbits); 00381 } 00382 check_diff((float *)tab_ref, (float *)tab, fft_size, 1.0); 00383 break; 00384 } 00385 00386 /* do a speed test */ 00387 00388 if (do_speed) { 00389 int64_t time_start, duration; 00390 int nb_its; 00391 00392 av_log(NULL, AV_LOG_INFO,"Speed test...\n"); 00393 /* we measure during about 1 seconds */ 00394 nb_its = 1; 00395 for(;;) { 00396 time_start = gettime(); 00397 for (it = 0; it < nb_its; it++) { 00398 switch (transform) { 00399 case TRANSFORM_MDCT: 00400 if (do_inverse) { 00401 ff_imdct_calc(m, (float *)tab, (float *)tab1); 00402 } else { 00403 ff_mdct_calc(m, (float *)tab, (float *)tab1); 00404 } 00405 break; 00406 case TRANSFORM_FFT: 00407 memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); 00408 ff_fft_calc(s, tab); 00409 break; 00410 case TRANSFORM_RDFT: 00411 memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); 00412 ff_rdft_calc(r, tab2); 00413 break; 00414 case TRANSFORM_DCT: 00415 memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); 00416 ff_dct_calc(d, tab2); 00417 break; 00418 } 00419 } 00420 duration = gettime() - time_start; 00421 if (duration >= 1000000) 00422 break; 00423 nb_its *= 2; 00424 } 00425 av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n", 00426 (double)duration / nb_its, 00427 (double)duration / 1000000.0, 00428 nb_its); 00429 } 00430 00431 switch (transform) { 00432 case TRANSFORM_MDCT: 00433 ff_mdct_end(m); 00434 break; 00435 case TRANSFORM_FFT: 00436 ff_fft_end(s); 00437 break; 00438 case TRANSFORM_RDFT: 00439 ff_rdft_end(r); 00440 break; 00441 case TRANSFORM_DCT: 00442 ff_dct_end(d); 00443 break; 00444 } 00445 return 0; 00446 }