• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/snow.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "avcodec.h"
00022 #include "dsputil.h"
00023 #include "snow.h"
00024 
00025 #include "rangecoder.h"
00026 #include "mathops.h"
00027 
00028 #include "mpegvideo.h"
00029 
00030 #undef NDEBUG
00031 #include <assert.h>
00032 
00033 static const int8_t quant3[256]={
00034  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00043 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00044 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00049 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00050 };
00051 static const int8_t quant3b[256]={
00052  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00053  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00054  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00055  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00061 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00062 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00068 };
00069 static const int8_t quant3bA[256]={
00070  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00071  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00072  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00073  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00086 };
00087 static const int8_t quant5[256]={
00088  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00089  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00090  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00091  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00096 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00097 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00098 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00104 };
00105 static const int8_t quant7[256]={
00106  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00107  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00108  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00122 };
00123 static const int8_t quant9[256]={
00124  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00125  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00140 };
00141 static const int8_t quant11[256]={
00142  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00143  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00144  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00158 };
00159 static const int8_t quant13[256]={
00160  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00161  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00162  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00163  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00176 };
00177 
00178 #if 0 //64*cubic
00179 static const uint8_t obmc32[1024]={
00180   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00181   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00182   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00183   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00184   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00185   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00186   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00187   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00188   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00189   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00190   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00191   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00192   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00193   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00194   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00196   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00197   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00198   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00199   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00200   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00201   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00202   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00203   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00204   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00205   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00206   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00207   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00208   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00209   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00210   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00211   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00212 //error:0.000022
00213 };
00214 static const uint8_t obmc16[256]={
00215   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00216   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00217   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00218   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00219   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00220   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00221   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00223   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00224   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00225   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00226   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00227   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00228   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00229   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00230   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00231 //error:0.000033
00232 };
00233 #elif 1 // 64*linear
00234 static const uint8_t obmc32[1024]={
00235   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00236   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00237   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00238   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00239   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00240   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00241   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00242   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00243   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00244   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00245   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00246   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00247   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00248   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00249   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00251   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00252   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00253   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00254   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00255   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00256   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00257   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00258   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00259   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00260   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00261   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00262   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00263   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00264   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00265   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00266   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00267  //error:0.000020
00268 };
00269 static const uint8_t obmc16[256]={
00270   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00271   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00272   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00273   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00274   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00275  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00276  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00278  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00279  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00280  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00281   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00282   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00283   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00284   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00285   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00286 //error:0.000015
00287 };
00288 #else //64*cos
00289 static const uint8_t obmc32[1024]={
00290   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00291   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00292   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00293   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00294   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00295   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00296   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00297   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00298   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00299   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00300   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00301   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00302   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00303   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00304   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00306   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00307   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00308   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00309   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00310   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00311   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00312   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00313   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00314   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00315   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00316   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00317   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00318   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00319   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00320   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00321   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00322 //error:0.000022
00323 };
00324 static const uint8_t obmc16[256]={
00325   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00326   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00327   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00328   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00329   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00330   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00331   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00333   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00334   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00335   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00336   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00337   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00338   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00339   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00340   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00341 //error:0.000022
00342 };
00343 #endif /* 0 */
00344 
00345 //linear *64
00346 static const uint8_t obmc8[64]={
00347   4, 12, 20, 28, 28, 20, 12,  4,
00348  12, 36, 60, 84, 84, 60, 36, 12,
00349  20, 60,100,140,140,100, 60, 20,
00350  28, 84,140,196,196,140, 84, 28,
00351  28, 84,140,196,196,140, 84, 28,
00352  20, 60,100,140,140,100, 60, 20,
00353  12, 36, 60, 84, 84, 60, 36, 12,
00354   4, 12, 20, 28, 28, 20, 12,  4,
00355 //error:0.000000
00356 };
00357 
00358 //linear *64
00359 static const uint8_t obmc4[16]={
00360  16, 48, 48, 16,
00361  48,144,144, 48,
00362  48,144,144, 48,
00363  16, 48, 48, 16,
00364 //error:0.000000
00365 };
00366 
00367 static const uint8_t * const obmc_tab[4]={
00368     obmc32, obmc16, obmc8, obmc4
00369 };
00370 
00371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00372 
00373 typedef struct BlockNode{
00374     int16_t mx;
00375     int16_t my;
00376     uint8_t ref;
00377     uint8_t color[3];
00378     uint8_t type;
00379 //#define TYPE_SPLIT    1
00380 #define BLOCK_INTRA   1
00381 #define BLOCK_OPT     2
00382 //#define TYPE_NOCOLOR  4
00383     uint8_t level; //FIXME merge into type?
00384 }BlockNode;
00385 
00386 static const BlockNode null_block= { //FIXME add border maybe
00387     .color= {128,128,128},
00388     .mx= 0,
00389     .my= 0,
00390     .ref= 0,
00391     .type= 0,
00392     .level= 0,
00393 };
00394 
00395 #define LOG2_MB_SIZE 4
00396 #define MB_SIZE (1<<LOG2_MB_SIZE)
00397 #define ENCODER_EXTRA_BITS 4
00398 #define HTAPS_MAX 8
00399 
00400 typedef struct x_and_coeff{
00401     int16_t x;
00402     uint16_t coeff;
00403 } x_and_coeff;
00404 
00405 typedef struct SubBand{
00406     int level;
00407     int stride;
00408     int width;
00409     int height;
00410     int qlog;        
00411     DWTELEM *buf;
00412     IDWTELEM *ibuf;
00413     int buf_x_offset;
00414     int buf_y_offset;
00415     int stride_line; 
00416     x_and_coeff * x_coeff;
00417     struct SubBand *parent;
00418     uint8_t state[/*7*2*/ 7 + 512][32];
00419 }SubBand;
00420 
00421 typedef struct Plane{
00422     int width;
00423     int height;
00424     SubBand band[MAX_DECOMPOSITIONS][4];
00425 
00426     int htaps;
00427     int8_t hcoeff[HTAPS_MAX/2];
00428     int diag_mc;
00429     int fast_mc;
00430 
00431     int last_htaps;
00432     int8_t last_hcoeff[HTAPS_MAX/2];
00433     int last_diag_mc;
00434 }Plane;
00435 
00436 typedef struct SnowContext{
00437 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
00438 
00439     AVCodecContext *avctx;
00440     RangeCoder c;
00441     DSPContext dsp;
00442     AVFrame new_picture;
00443     AVFrame input_picture;              
00444     AVFrame current_picture;
00445     AVFrame last_picture[MAX_REF_FRAMES];
00446     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00447     AVFrame mconly_picture;
00448 //     uint8_t q_context[16];
00449     uint8_t header_state[32];
00450     uint8_t block_state[128 + 32*128];
00451     int keyframe;
00452     int always_reset;
00453     int version;
00454     int spatial_decomposition_type;
00455     int last_spatial_decomposition_type;
00456     int temporal_decomposition_type;
00457     int spatial_decomposition_count;
00458     int last_spatial_decomposition_count;
00459     int temporal_decomposition_count;
00460     int max_ref_frames;
00461     int ref_frames;
00462     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00463     uint32_t *ref_scores[MAX_REF_FRAMES];
00464     DWTELEM *spatial_dwt_buffer;
00465     IDWTELEM *spatial_idwt_buffer;
00466     int colorspace_type;
00467     int chroma_h_shift;
00468     int chroma_v_shift;
00469     int spatial_scalability;
00470     int qlog;
00471     int last_qlog;
00472     int lambda;
00473     int lambda2;
00474     int pass1_rc;
00475     int mv_scale;
00476     int last_mv_scale;
00477     int qbias;
00478     int last_qbias;
00479 #define QBIAS_SHIFT 3
00480     int b_width;
00481     int b_height;
00482     int block_max_depth;
00483     int last_block_max_depth;
00484     Plane plane[MAX_PLANES];
00485     BlockNode *block;
00486 #define ME_CACHE_SIZE 1024
00487     int me_cache[ME_CACHE_SIZE];
00488     int me_cache_generation;
00489     slice_buffer sb;
00490 
00491     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
00492 
00493     uint8_t *scratchbuf;
00494 }SnowContext;
00495 
00496 typedef struct {
00497     IDWTELEM *b0;
00498     IDWTELEM *b1;
00499     IDWTELEM *b2;
00500     IDWTELEM *b3;
00501     int y;
00502 } DWTCompose;
00503 
00504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
00505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
00506 
00507 static void iterative_me(SnowContext *s);
00508 
00509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
00510 {
00511     int i;
00512 
00513     buf->base_buffer = base_buffer;
00514     buf->line_count = line_count;
00515     buf->line_width = line_width;
00516     buf->data_count = max_allocated_lines;
00517     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
00518     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
00519 
00520     for(i = 0; i < max_allocated_lines; i++){
00521         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
00522     }
00523 
00524     buf->data_stack_top = max_allocated_lines - 1;
00525 }
00526 
00527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
00528 {
00529     int offset;
00530     IDWTELEM * buffer;
00531 
00532     assert(buf->data_stack_top >= 0);
00533 //  assert(!buf->line[line]);
00534     if (buf->line[line])
00535         return buf->line[line];
00536 
00537     offset = buf->line_width * line;
00538     buffer = buf->data_stack[buf->data_stack_top];
00539     buf->data_stack_top--;
00540     buf->line[line] = buffer;
00541 
00542     return buffer;
00543 }
00544 
00545 static void slice_buffer_release(slice_buffer * buf, int line)
00546 {
00547     int offset;
00548     IDWTELEM * buffer;
00549 
00550     assert(line >= 0 && line < buf->line_count);
00551     assert(buf->line[line]);
00552 
00553     offset = buf->line_width * line;
00554     buffer = buf->line[line];
00555     buf->data_stack_top++;
00556     buf->data_stack[buf->data_stack_top] = buffer;
00557     buf->line[line] = NULL;
00558 }
00559 
00560 static void slice_buffer_flush(slice_buffer * buf)
00561 {
00562     int i;
00563     for(i = 0; i < buf->line_count; i++){
00564         if (buf->line[i])
00565             slice_buffer_release(buf, i);
00566     }
00567 }
00568 
00569 static void slice_buffer_destroy(slice_buffer * buf)
00570 {
00571     int i;
00572     slice_buffer_flush(buf);
00573 
00574     for(i = buf->data_count - 1; i >= 0; i--){
00575         av_freep(&buf->data_stack[i]);
00576     }
00577     av_freep(&buf->data_stack);
00578     av_freep(&buf->line);
00579 }
00580 
00581 #ifdef __sgi
00582 // Avoid a name clash on SGI IRIX
00583 #undef qexp
00584 #endif
00585 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00586 static uint8_t qexp[QROOT];
00587 
00588 static inline int mirror(int v, int m){
00589     while((unsigned)v > (unsigned)m){
00590         v=-v;
00591         if(v<0) v+= 2*m;
00592     }
00593     return v;
00594 }
00595 
00596 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00597     int i;
00598 
00599     if(v){
00600         const int a= FFABS(v);
00601         const int e= av_log2(a);
00602 #if 1
00603         const int el= FFMIN(e, 10);
00604         put_rac(c, state+0, 0);
00605 
00606         for(i=0; i<el; i++){
00607             put_rac(c, state+1+i, 1);  //1..10
00608         }
00609         for(; i<e; i++){
00610             put_rac(c, state+1+9, 1);  //1..10
00611         }
00612         put_rac(c, state+1+FFMIN(i,9), 0);
00613 
00614         for(i=e-1; i>=el; i--){
00615             put_rac(c, state+22+9, (a>>i)&1); //22..31
00616         }
00617         for(; i>=0; i--){
00618             put_rac(c, state+22+i, (a>>i)&1); //22..31
00619         }
00620 
00621         if(is_signed)
00622             put_rac(c, state+11 + el, v < 0); //11..21
00623 #else
00624 
00625         put_rac(c, state+0, 0);
00626         if(e<=9){
00627             for(i=0; i<e; i++){
00628                 put_rac(c, state+1+i, 1);  //1..10
00629             }
00630             put_rac(c, state+1+i, 0);
00631 
00632             for(i=e-1; i>=0; i--){
00633                 put_rac(c, state+22+i, (a>>i)&1); //22..31
00634             }
00635 
00636             if(is_signed)
00637                 put_rac(c, state+11 + e, v < 0); //11..21
00638         }else{
00639             for(i=0; i<e; i++){
00640                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
00641             }
00642             put_rac(c, state+1+FFMIN(i,9), 0);
00643 
00644             for(i=e-1; i>=0; i--){
00645                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
00646             }
00647 
00648             if(is_signed)
00649                 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
00650         }
00651 #endif /* 1 */
00652     }else{
00653         put_rac(c, state+0, 1);
00654     }
00655 }
00656 
00657 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00658     if(get_rac(c, state+0))
00659         return 0;
00660     else{
00661         int i, e, a;
00662         e= 0;
00663         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
00664             e++;
00665         }
00666 
00667         a= 1;
00668         for(i=e-1; i>=0; i--){
00669             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
00670         }
00671 
00672         if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
00673             return -a;
00674         else
00675             return a;
00676     }
00677 }
00678 
00679 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00680     int i;
00681     int r= log2>=0 ? 1<<log2 : 1;
00682 
00683     assert(v>=0);
00684     assert(log2>=-4);
00685 
00686     while(v >= r){
00687         put_rac(c, state+4+log2, 1);
00688         v -= r;
00689         log2++;
00690         if(log2>0) r+=r;
00691     }
00692     put_rac(c, state+4+log2, 0);
00693 
00694     for(i=log2-1; i>=0; i--){
00695         put_rac(c, state+31-i, (v>>i)&1);
00696     }
00697 }
00698 
00699 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00700     int i;
00701     int r= log2>=0 ? 1<<log2 : 1;
00702     int v=0;
00703 
00704     assert(log2>=-4);
00705 
00706     while(get_rac(c, state+4+log2)){
00707         v+= r;
00708         log2++;
00709         if(log2>0) r+=r;
00710     }
00711 
00712     for(i=log2-1; i>=0; i--){
00713         v+= get_rac(c, state+31-i)<<i;
00714     }
00715 
00716     return v;
00717 }
00718 
00719 static av_always_inline void
00720 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00721      int dst_step, int src_step, int ref_step,
00722      int width, int mul, int add, int shift,
00723      int highpass, int inverse){
00724     const int mirror_left= !highpass;
00725     const int mirror_right= (width&1) ^ highpass;
00726     const int w= (width>>1) - 1 + (highpass & width);
00727     int i;
00728 
00729 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00730     if(mirror_left){
00731         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00732         dst += dst_step;
00733         src += src_step;
00734     }
00735 
00736     for(i=0; i<w; i++){
00737         dst[i*dst_step] =
00738             LIFT(src[i*src_step],
00739                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00740                  inverse);
00741     }
00742 
00743     if(mirror_right){
00744         dst[w*dst_step] =
00745             LIFT(src[w*src_step],
00746                  ((mul*2*ref[w*ref_step]+add)>>shift),
00747                  inverse);
00748     }
00749 }
00750 
00751 static av_always_inline void
00752 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00753          int dst_step, int src_step, int ref_step,
00754          int width, int mul, int add, int shift,
00755          int highpass, int inverse){
00756     const int mirror_left= !highpass;
00757     const int mirror_right= (width&1) ^ highpass;
00758     const int w= (width>>1) - 1 + (highpass & width);
00759     int i;
00760 
00761 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
00762     if(mirror_left){
00763         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
00764         dst += dst_step;
00765         src += src_step;
00766     }
00767 
00768     for(i=0; i<w; i++){
00769         dst[i*dst_step] =
00770             LIFT(src[i*src_step],
00771                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
00772                  inverse);
00773     }
00774 
00775     if(mirror_right){
00776         dst[w*dst_step] =
00777             LIFT(src[w*src_step],
00778                  ((mul*2*ref[w*ref_step]+add)>>shift),
00779                  inverse);
00780     }
00781 }
00782 
00783 #ifndef liftS
00784 static av_always_inline void
00785 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
00786       int dst_step, int src_step, int ref_step,
00787       int width, int mul, int add, int shift,
00788       int highpass, int inverse){
00789     const int mirror_left= !highpass;
00790     const int mirror_right= (width&1) ^ highpass;
00791     const int w= (width>>1) - 1 + (highpass & width);
00792     int i;
00793 
00794     assert(shift == 4);
00795 #define LIFTS(src, ref, inv) \
00796         ((inv) ? \
00797             (src) + (((ref) + 4*(src))>>shift): \
00798             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00799     if(mirror_left){
00800         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00801         dst += dst_step;
00802         src += src_step;
00803     }
00804 
00805     for(i=0; i<w; i++){
00806         dst[i*dst_step] =
00807             LIFTS(src[i*src_step],
00808                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00809                   inverse);
00810     }
00811 
00812     if(mirror_right){
00813         dst[w*dst_step] =
00814             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00815     }
00816 }
00817 static av_always_inline void
00818 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
00819           int dst_step, int src_step, int ref_step,
00820           int width, int mul, int add, int shift,
00821           int highpass, int inverse){
00822     const int mirror_left= !highpass;
00823     const int mirror_right= (width&1) ^ highpass;
00824     const int w= (width>>1) - 1 + (highpass & width);
00825     int i;
00826 
00827     assert(shift == 4);
00828 #define LIFTS(src, ref, inv) \
00829     ((inv) ? \
00830         (src) + (((ref) + 4*(src))>>shift): \
00831         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
00832     if(mirror_left){
00833         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
00834         dst += dst_step;
00835         src += src_step;
00836     }
00837 
00838     for(i=0; i<w; i++){
00839         dst[i*dst_step] =
00840             LIFTS(src[i*src_step],
00841                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
00842                   inverse);
00843     }
00844 
00845     if(mirror_right){
00846         dst[w*dst_step] =
00847             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
00848     }
00849 }
00850 #endif /* ! liftS */
00851 
00852 static void horizontal_decompose53i(DWTELEM *b, int width){
00853     DWTELEM temp[width];
00854     const int width2= width>>1;
00855     int x;
00856     const int w2= (width+1)>>1;
00857 
00858     for(x=0; x<width2; x++){
00859         temp[x   ]= b[2*x    ];
00860         temp[x+w2]= b[2*x + 1];
00861     }
00862     if(width&1)
00863         temp[x   ]= b[2*x    ];
00864 #if 0
00865     {
00866     int A1,A2,A3,A4;
00867     A2= temp[1       ];
00868     A4= temp[0       ];
00869     A1= temp[0+width2];
00870     A1 -= (A2 + A4)>>1;
00871     A4 += (A1 + 1)>>1;
00872     b[0+width2] = A1;
00873     b[0       ] = A4;
00874     for(x=1; x+1<width2; x+=2){
00875         A3= temp[x+width2];
00876         A4= temp[x+1     ];
00877         A3 -= (A2 + A4)>>1;
00878         A2 += (A1 + A3 + 2)>>2;
00879         b[x+width2] = A3;
00880         b[x       ] = A2;
00881 
00882         A1= temp[x+1+width2];
00883         A2= temp[x+2       ];
00884         A1 -= (A2 + A4)>>1;
00885         A4 += (A1 + A3 + 2)>>2;
00886         b[x+1+width2] = A1;
00887         b[x+1       ] = A4;
00888     }
00889     A3= temp[width-1];
00890     A3 -= A2;
00891     A2 += (A1 + A3 + 2)>>2;
00892     b[width -1] = A3;
00893     b[width2-1] = A2;
00894     }
00895 #else
00896     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
00897     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
00898 #endif /* 0 */
00899 }
00900 
00901 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00902     int i;
00903 
00904     for(i=0; i<width; i++){
00905         b1[i] -= (b0[i] + b2[i])>>1;
00906     }
00907 }
00908 
00909 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00910     int i;
00911 
00912     for(i=0; i<width; i++){
00913         b1[i] += (b0[i] + b2[i] + 2)>>2;
00914     }
00915 }
00916 
00917 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
00918     int y;
00919     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
00920     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
00921 
00922     for(y=-2; y<height; y+=2){
00923         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
00924         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
00925 
00926         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
00927         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
00928 
00929         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
00930         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
00931 
00932         b0=b2;
00933         b1=b3;
00934     }
00935 }
00936 
00937 static void horizontal_decompose97i(DWTELEM *b, int width){
00938     DWTELEM temp[width];
00939     const int w2= (width+1)>>1;
00940 
00941     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
00942     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
00943     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
00944     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
00945 }
00946 
00947 
00948 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00949     int i;
00950 
00951     for(i=0; i<width; i++){
00952         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
00953     }
00954 }
00955 
00956 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00957     int i;
00958 
00959     for(i=0; i<width; i++){
00960         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
00961     }
00962 }
00963 
00964 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00965     int i;
00966 
00967     for(i=0; i<width; i++){
00968 #ifdef liftS
00969         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
00970 #else
00971         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
00972 #endif
00973     }
00974 }
00975 
00976 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
00977     int i;
00978 
00979     for(i=0; i<width; i++){
00980         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
00981     }
00982 }
00983 
00984 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
00985     int y;
00986     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
00987     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
00988     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
00989     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
00990 
00991     for(y=-4; y<height; y+=2){
00992         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
00993         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
00994 
00995         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
00996         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
00997 
00998         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
00999         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
01000         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
01001         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
01002 
01003         b0=b2;
01004         b1=b3;
01005         b2=b4;
01006         b3=b5;
01007     }
01008 }
01009 
01010 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01011     int level;
01012 
01013     for(level=0; level<decomposition_count; level++){
01014         switch(type){
01015         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
01016         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
01017         }
01018     }
01019 }
01020 
01021 static void horizontal_compose53i(IDWTELEM *b, int width){
01022     IDWTELEM temp[width];
01023     const int width2= width>>1;
01024     const int w2= (width+1)>>1;
01025     int x;
01026 
01027 #if 0
01028     int A1,A2,A3,A4;
01029     A2= temp[1       ];
01030     A4= temp[0       ];
01031     A1= temp[0+width2];
01032     A1 -= (A2 + A4)>>1;
01033     A4 += (A1 + 1)>>1;
01034     b[0+width2] = A1;
01035     b[0       ] = A4;
01036     for(x=1; x+1<width2; x+=2){
01037         A3= temp[x+width2];
01038         A4= temp[x+1     ];
01039         A3 -= (A2 + A4)>>1;
01040         A2 += (A1 + A3 + 2)>>2;
01041         b[x+width2] = A3;
01042         b[x       ] = A2;
01043 
01044         A1= temp[x+1+width2];
01045         A2= temp[x+2       ];
01046         A1 -= (A2 + A4)>>1;
01047         A4 += (A1 + A3 + 2)>>2;
01048         b[x+1+width2] = A1;
01049         b[x+1       ] = A4;
01050     }
01051     A3= temp[width-1];
01052     A3 -= A2;
01053     A2 += (A1 + A3 + 2)>>2;
01054     b[width -1] = A3;
01055     b[width2-1] = A2;
01056 #else
01057     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
01058     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
01059 #endif /* 0 */
01060     for(x=0; x<width2; x++){
01061         b[2*x    ]= temp[x   ];
01062         b[2*x + 1]= temp[x+w2];
01063     }
01064     if(width&1)
01065         b[2*x    ]= temp[x   ];
01066 }
01067 
01068 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01069     int i;
01070 
01071     for(i=0; i<width; i++){
01072         b1[i] += (b0[i] + b2[i])>>1;
01073     }
01074 }
01075 
01076 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01077     int i;
01078 
01079     for(i=0; i<width; i++){
01080         b1[i] -= (b0[i] + b2[i] + 2)>>2;
01081     }
01082 }
01083 
01084 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01085     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
01086     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
01087     cs->y = -1;
01088 }
01089 
01090 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01091     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
01092     cs->b1 = buffer + mirror(-1  , height-1)*stride;
01093     cs->y = -1;
01094 }
01095 
01096 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01097     int y= cs->y;
01098 
01099     IDWTELEM *b0= cs->b0;
01100     IDWTELEM *b1= cs->b1;
01101     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
01102     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
01103 
01104         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01105         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01106 
01107         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01108         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01109 
01110     cs->b0 = b2;
01111     cs->b1 = b3;
01112     cs->y += 2;
01113 }
01114 
01115 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01116     int y= cs->y;
01117     IDWTELEM *b0= cs->b0;
01118     IDWTELEM *b1= cs->b1;
01119     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
01120     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
01121 
01122         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
01123         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
01124 
01125         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
01126         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
01127 
01128     cs->b0 = b2;
01129     cs->b1 = b3;
01130     cs->y += 2;
01131 }
01132 
01133 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
01134     DWTCompose cs;
01135     spatial_compose53i_init(&cs, buffer, height, stride);
01136     while(cs.y <= height)
01137         spatial_compose53i_dy(&cs, buffer, width, height, stride);
01138 }
01139 
01140 
01141 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
01142     IDWTELEM temp[width];
01143     const int w2= (width+1)>>1;
01144 
01145     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
01146     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
01147     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
01148     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
01149 }
01150 
01151 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01152     int i;
01153 
01154     for(i=0; i<width; i++){
01155         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01156     }
01157 }
01158 
01159 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01160     int i;
01161 
01162     for(i=0; i<width; i++){
01163         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
01164     }
01165 }
01166 
01167 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01168     int i;
01169 
01170     for(i=0; i<width; i++){
01171 #ifdef liftS
01172         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
01173 #else
01174         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
01175 #endif
01176     }
01177 }
01178 
01179 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
01180     int i;
01181 
01182     for(i=0; i<width; i++){
01183         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
01184     }
01185 }
01186 
01187 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
01188     int i;
01189 
01190     for(i=0; i<width; i++){
01191         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
01192         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
01193 #ifdef liftS
01194         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
01195 #else
01196         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
01197 #endif
01198         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
01199     }
01200 }
01201 
01202 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
01203     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
01204     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
01205     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
01206     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
01207     cs->y = -3;
01208 }
01209 
01210 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
01211     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
01212     cs->b1 = buffer + mirror(-3  , height-1)*stride;
01213     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
01214     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
01215     cs->y = -3;
01216 }
01217 
01218 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
01219     int y = cs->y;
01220 
01221     IDWTELEM *b0= cs->b0;
01222     IDWTELEM *b1= cs->b1;
01223     IDWTELEM *b2= cs->b2;
01224     IDWTELEM *b3= cs->b3;
01225     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
01226     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
01227 
01228     if(y>0 && y+4<height){
01229         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
01230     }else{
01231         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01232         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01233         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01234         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01235     }
01236 
01237         if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
01238         if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
01239 
01240     cs->b0=b2;
01241     cs->b1=b3;
01242     cs->b2=b4;
01243     cs->b3=b5;
01244     cs->y += 2;
01245 }
01246 
01247 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
01248     int y = cs->y;
01249     IDWTELEM *b0= cs->b0;
01250     IDWTELEM *b1= cs->b1;
01251     IDWTELEM *b2= cs->b2;
01252     IDWTELEM *b3= cs->b3;
01253     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
01254     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
01255 
01256         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
01257         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
01258         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
01259         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
01260 
01261         if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
01262         if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
01263 
01264     cs->b0=b2;
01265     cs->b1=b3;
01266     cs->b2=b4;
01267     cs->b3=b5;
01268     cs->y += 2;
01269 }
01270 
01271 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
01272     DWTCompose cs;
01273     spatial_compose97i_init(&cs, buffer, height, stride);
01274     while(cs.y <= height)
01275         spatial_compose97i_dy(&cs, buffer, width, height, stride);
01276 }
01277 
01278 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
01279     int level;
01280     for(level=decomposition_count-1; level>=0; level--){
01281         switch(type){
01282         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01283         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
01284         }
01285     }
01286 }
01287 
01288 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01289     int level;
01290     for(level=decomposition_count-1; level>=0; level--){
01291         switch(type){
01292         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
01293         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
01294         }
01295     }
01296 }
01297 
01298 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
01299     const int support = type==1 ? 3 : 5;
01300     int level;
01301     if(type==2) return;
01302 
01303     for(level=decomposition_count-1; level>=0; level--){
01304         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01305             switch(type){
01306             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01307                 break;
01308             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
01309                 break;
01310             }
01311         }
01312     }
01313 }
01314 
01315 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
01316     const int support = type==1 ? 3 : 5;
01317     int level;
01318     if(type==2) return;
01319 
01320     for(level=decomposition_count-1; level>=0; level--){
01321         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
01322             switch(type){
01323             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01324                 break;
01325             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
01326                 break;
01327             }
01328         }
01329     }
01330 }
01331 
01332 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
01333         DWTCompose cs[MAX_DECOMPOSITIONS];
01334         int y;
01335         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
01336         for(y=0; y<height; y+=4)
01337             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
01338 }
01339 
01340 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01341     const int w= b->width;
01342     const int h= b->height;
01343     int x, y;
01344 
01345     if(1){
01346         int run=0;
01347         int runs[w*h];
01348         int run_index=0;
01349         int max_index;
01350 
01351         for(y=0; y<h; y++){
01352             for(x=0; x<w; x++){
01353                 int v, p=0;
01354                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
01355                 v= src[x + y*stride];
01356 
01357                 if(y){
01358                     t= src[x + (y-1)*stride];
01359                     if(x){
01360                         lt= src[x - 1 + (y-1)*stride];
01361                     }
01362                     if(x + 1 < w){
01363                         rt= src[x + 1 + (y-1)*stride];
01364                     }
01365                 }
01366                 if(x){
01367                     l= src[x - 1 + y*stride];
01368                     /*if(x > 1){
01369                         if(orientation==1) ll= src[y + (x-2)*stride];
01370                         else               ll= src[x - 2 + y*stride];
01371                     }*/
01372                 }
01373                 if(parent){
01374                     int px= x>>1;
01375                     int py= y>>1;
01376                     if(px<b->parent->width && py<b->parent->height)
01377                         p= parent[px + py*2*stride];
01378                 }
01379                 if(!(/*ll|*/l|lt|t|rt|p)){
01380                     if(v){
01381                         runs[run_index++]= run;
01382                         run=0;
01383                     }else{
01384                         run++;
01385                     }
01386                 }
01387             }
01388         }
01389         max_index= run_index;
01390         runs[run_index++]= run;
01391         run_index=0;
01392         run= runs[run_index++];
01393 
01394         put_symbol2(&s->c, b->state[30], max_index, 0);
01395         if(run_index <= max_index)
01396             put_symbol2(&s->c, b->state[1], run, 3);
01397 
01398         for(y=0; y<h; y++){
01399             if(s->c.bytestream_end - s->c.bytestream < w*40){
01400                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
01401                 return -1;
01402             }
01403             for(x=0; x<w; x++){
01404                 int v, p=0;
01405                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
01406                 v= src[x + y*stride];
01407 
01408                 if(y){
01409                     t= src[x + (y-1)*stride];
01410                     if(x){
01411                         lt= src[x - 1 + (y-1)*stride];
01412                     }
01413                     if(x + 1 < w){
01414                         rt= src[x + 1 + (y-1)*stride];
01415                     }
01416                 }
01417                 if(x){
01418                     l= src[x - 1 + y*stride];
01419                     /*if(x > 1){
01420                         if(orientation==1) ll= src[y + (x-2)*stride];
01421                         else               ll= src[x - 2 + y*stride];
01422                     }*/
01423                 }
01424                 if(parent){
01425                     int px= x>>1;
01426                     int py= y>>1;
01427                     if(px<b->parent->width && py<b->parent->height)
01428                         p= parent[px + py*2*stride];
01429                 }
01430                 if(/*ll|*/l|lt|t|rt|p){
01431                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01432 
01433                     put_rac(&s->c, &b->state[0][context], !!v);
01434                 }else{
01435                     if(!run){
01436                         run= runs[run_index++];
01437 
01438                         if(run_index <= max_index)
01439                             put_symbol2(&s->c, b->state[1], run, 3);
01440                         assert(v);
01441                     }else{
01442                         run--;
01443                         assert(!v);
01444                     }
01445                 }
01446                 if(v){
01447                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
01448                     int l2= 2*FFABS(l) + (l<0);
01449                     int t2= 2*FFABS(t) + (t<0);
01450 
01451                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
01452                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
01453                 }
01454             }
01455         }
01456     }
01457     return 0;
01458 }
01459 
01460 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
01461 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
01462 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
01463     return encode_subband_c0run(s, b, src, parent, stride, orientation);
01464 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
01465 }
01466 
01467 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
01468     const int w= b->width;
01469     const int h= b->height;
01470     int x,y;
01471 
01472     if(1){
01473         int run, runs;
01474         x_and_coeff *xc= b->x_coeff;
01475         x_and_coeff *prev_xc= NULL;
01476         x_and_coeff *prev2_xc= xc;
01477         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
01478         x_and_coeff *prev_parent_xc= parent_xc;
01479 
01480         runs= get_symbol2(&s->c, b->state[30], 0);
01481         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01482         else           run= INT_MAX;
01483 
01484         for(y=0; y<h; y++){
01485             int v=0;
01486             int lt=0, t=0, rt=0;
01487 
01488             if(y && prev_xc->x == 0){
01489                 rt= prev_xc->coeff;
01490             }
01491             for(x=0; x<w; x++){
01492                 int p=0;
01493                 const int l= v;
01494 
01495                 lt= t; t= rt;
01496 
01497                 if(y){
01498                     if(prev_xc->x <= x)
01499                         prev_xc++;
01500                     if(prev_xc->x == x + 1)
01501                         rt= prev_xc->coeff;
01502                     else
01503                         rt=0;
01504                 }
01505                 if(parent_xc){
01506                     if(x>>1 > parent_xc->x){
01507                         parent_xc++;
01508                     }
01509                     if(x>>1 == parent_xc->x){
01510                         p= parent_xc->coeff;
01511                     }
01512                 }
01513                 if(/*ll|*/l|lt|t|rt|p){
01514                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
01515 
01516                     v=get_rac(&s->c, &b->state[0][context]);
01517                     if(v){
01518                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
01519                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
01520 
01521                         xc->x=x;
01522                         (xc++)->coeff= v;
01523                     }
01524                 }else{
01525                     if(!run){
01526                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
01527                         else           run= INT_MAX;
01528                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
01529                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
01530 
01531                         xc->x=x;
01532                         (xc++)->coeff= v;
01533                     }else{
01534                         int max_run;
01535                         run--;
01536                         v=0;
01537 
01538                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
01539                         else  max_run= FFMIN(run, w-x-1);
01540                         if(parent_xc)
01541                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
01542                         x+= max_run;
01543                         run-= max_run;
01544                     }
01545                 }
01546             }
01547             (xc++)->x= w+1; //end marker
01548             prev_xc= prev2_xc;
01549             prev2_xc= xc;
01550 
01551             if(parent_xc){
01552                 if(y&1){
01553                     while(parent_xc->x != parent->width+1)
01554                         parent_xc++;
01555                     parent_xc++;
01556                     prev_parent_xc= parent_xc;
01557                 }else{
01558                     parent_xc= prev_parent_xc;
01559                 }
01560             }
01561         }
01562 
01563         (xc++)->x= w+1; //end marker
01564     }
01565 }
01566 
01567 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
01568     const int w= b->width;
01569     int y;
01570     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01571     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01572     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01573     int new_index = 0;
01574 
01575     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
01576         qadd= 0;
01577         qmul= 1<<QEXPSHIFT;
01578     }
01579 
01580     /* If we are on the second or later slice, restore our index. */
01581     if (start_y != 0)
01582         new_index = save_state[0];
01583 
01584 
01585     for(y=start_y; y<h; y++){
01586         int x = 0;
01587         int v;
01588         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
01589         memset(line, 0, b->width*sizeof(IDWTELEM));
01590         v = b->x_coeff[new_index].coeff;
01591         x = b->x_coeff[new_index++].x;
01592         while(x < w){
01593             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
01594             register int u= -(v&1);
01595             line[x] = (t^u) - u;
01596 
01597             v = b->x_coeff[new_index].coeff;
01598             x = b->x_coeff[new_index++].x;
01599         }
01600     }
01601 
01602     /* Save our variables for the next slice. */
01603     save_state[0] = new_index;
01604 
01605     return;
01606 }
01607 
01608 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
01609     int plane_index, level, orientation;
01610 
01611     for(plane_index=0; plane_index<3; plane_index++){
01612         for(level=0; level<MAX_DECOMPOSITIONS; level++){
01613             for(orientation=level ? 1:0; orientation<4; orientation++){
01614                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
01615             }
01616         }
01617     }
01618     memset(s->header_state, MID_STATE, sizeof(s->header_state));
01619     memset(s->block_state, MID_STATE, sizeof(s->block_state));
01620 }
01621 
01622 static int alloc_blocks(SnowContext *s){
01623     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
01624     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
01625 
01626     s->b_width = w;
01627     s->b_height= h;
01628 
01629     av_free(s->block);
01630     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
01631     return 0;
01632 }
01633 
01634 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
01635     uint8_t *bytestream= d->bytestream;
01636     uint8_t *bytestream_start= d->bytestream_start;
01637     *d= *s;
01638     d->bytestream= bytestream;
01639     d->bytestream_start= bytestream_start;
01640 }
01641 
01642 //near copy & paste from dsputil, FIXME
01643 static int pix_sum(uint8_t * pix, int line_size, int w)
01644 {
01645     int s, i, j;
01646 
01647     s = 0;
01648     for (i = 0; i < w; i++) {
01649         for (j = 0; j < w; j++) {
01650             s += pix[0];
01651             pix ++;
01652         }
01653         pix += line_size - w;
01654     }
01655     return s;
01656 }
01657 
01658 //near copy & paste from dsputil, FIXME
01659 static int pix_norm1(uint8_t * pix, int line_size, int w)
01660 {
01661     int s, i, j;
01662     uint32_t *sq = ff_squareTbl + 256;
01663 
01664     s = 0;
01665     for (i = 0; i < w; i++) {
01666         for (j = 0; j < w; j ++) {
01667             s += sq[pix[0]];
01668             pix ++;
01669         }
01670         pix += line_size - w;
01671     }
01672     return s;
01673 }
01674 
01675 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
01676     const int w= s->b_width << s->block_max_depth;
01677     const int rem_depth= s->block_max_depth - level;
01678     const int index= (x + y*w) << rem_depth;
01679     const int block_w= 1<<rem_depth;
01680     BlockNode block;
01681     int i,j;
01682 
01683     block.color[0]= l;
01684     block.color[1]= cb;
01685     block.color[2]= cr;
01686     block.mx= mx;
01687     block.my= my;
01688     block.ref= ref;
01689     block.type= type;
01690     block.level= level;
01691 
01692     for(j=0; j<block_w; j++){
01693         for(i=0; i<block_w; i++){
01694             s->block[index + i + j*w]= block;
01695         }
01696     }
01697 }
01698 
01699 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
01700     const int offset[3]= {
01701           y*c->  stride + x,
01702         ((y*c->uvstride + x)>>1),
01703         ((y*c->uvstride + x)>>1),
01704     };
01705     int i;
01706     for(i=0; i<3; i++){
01707         c->src[0][i]= src [i];
01708         c->ref[0][i]= ref [i] + offset[i];
01709     }
01710     assert(!ref_index);
01711 }
01712 
01713 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
01714                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
01715     if(s->ref_frames == 1){
01716         *mx = mid_pred(left->mx, top->mx, tr->mx);
01717         *my = mid_pred(left->my, top->my, tr->my);
01718     }else{
01719         const int *scale = scale_mv_ref[ref];
01720         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
01721                        (top ->mx * scale[top ->ref] + 128) >>8,
01722                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
01723         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
01724                        (top ->my * scale[top ->ref] + 128) >>8,
01725                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
01726     }
01727 }
01728 
01729 //FIXME copy&paste
01730 #define P_LEFT P[1]
01731 #define P_TOP P[2]
01732 #define P_TOPRIGHT P[3]
01733 #define P_MEDIAN P[4]
01734 #define P_MV1 P[9]
01735 #define FLAG_QPEL   1 //must be 1
01736 
01737 static int encode_q_branch(SnowContext *s, int level, int x, int y){
01738     uint8_t p_buffer[1024];
01739     uint8_t i_buffer[1024];
01740     uint8_t p_state[sizeof(s->block_state)];
01741     uint8_t i_state[sizeof(s->block_state)];
01742     RangeCoder pc, ic;
01743     uint8_t *pbbak= s->c.bytestream;
01744     uint8_t *pbbak_start= s->c.bytestream_start;
01745     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
01746     const int w= s->b_width  << s->block_max_depth;
01747     const int h= s->b_height << s->block_max_depth;
01748     const int rem_depth= s->block_max_depth - level;
01749     const int index= (x + y*w) << rem_depth;
01750     const int block_w= 1<<(LOG2_MB_SIZE - level);
01751     int trx= (x+1)<<rem_depth;
01752     int try= (y+1)<<rem_depth;
01753     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
01754     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
01755     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
01756     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
01757     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
01758     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
01759     int pl = left->color[0];
01760     int pcb= left->color[1];
01761     int pcr= left->color[2];
01762     int pmx, pmy;
01763     int mx=0, my=0;
01764     int l,cr,cb;
01765     const int stride= s->current_picture.linesize[0];
01766     const int uvstride= s->current_picture.linesize[1];
01767     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
01768                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
01769                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
01770     int P[10][2];
01771     int16_t last_mv[3][2];
01772     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
01773     const int shift= 1+qpel;
01774     MotionEstContext *c= &s->m.me;
01775     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01776     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
01777     int my_context= av_log2(2*FFABS(left->my - top->my));
01778     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01779     int ref, best_ref, ref_score, ref_mx, ref_my;
01780 
01781     assert(sizeof(s->block_state) >= 256);
01782     if(s->keyframe){
01783         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01784         return 0;
01785     }
01786 
01787 //    clip predictors / edge ?
01788 
01789     P_LEFT[0]= left->mx;
01790     P_LEFT[1]= left->my;
01791     P_TOP [0]= top->mx;
01792     P_TOP [1]= top->my;
01793     P_TOPRIGHT[0]= tr->mx;
01794     P_TOPRIGHT[1]= tr->my;
01795 
01796     last_mv[0][0]= s->block[index].mx;
01797     last_mv[0][1]= s->block[index].my;
01798     last_mv[1][0]= right->mx;
01799     last_mv[1][1]= right->my;
01800     last_mv[2][0]= bottom->mx;
01801     last_mv[2][1]= bottom->my;
01802 
01803     s->m.mb_stride=2;
01804     s->m.mb_x=
01805     s->m.mb_y= 0;
01806     c->skip= 0;
01807 
01808     assert(c->  stride ==   stride);
01809     assert(c->uvstride == uvstride);
01810 
01811     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
01812     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
01813     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
01814     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
01815 
01816     c->xmin = - x*block_w - 16+2;
01817     c->ymin = - y*block_w - 16+2;
01818     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01819     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
01820 
01821     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
01822     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
01823     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
01824     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
01825     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
01826     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
01827     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
01828 
01829     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
01830     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
01831 
01832     if (!y) {
01833         c->pred_x= P_LEFT[0];
01834         c->pred_y= P_LEFT[1];
01835     } else {
01836         c->pred_x = P_MEDIAN[0];
01837         c->pred_y = P_MEDIAN[1];
01838     }
01839 
01840     score= INT_MAX;
01841     best_ref= 0;
01842     for(ref=0; ref<s->ref_frames; ref++){
01843         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
01844 
01845         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
01846                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
01847 
01848         assert(ref_mx >= c->xmin);
01849         assert(ref_mx <= c->xmax);
01850         assert(ref_my >= c->ymin);
01851         assert(ref_my <= c->ymax);
01852 
01853         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
01854         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
01855         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
01856         if(s->ref_mvs[ref]){
01857             s->ref_mvs[ref][index][0]= ref_mx;
01858             s->ref_mvs[ref][index][1]= ref_my;
01859             s->ref_scores[ref][index]= ref_score;
01860         }
01861         if(score > ref_score){
01862             score= ref_score;
01863             best_ref= ref;
01864             mx= ref_mx;
01865             my= ref_my;
01866         }
01867     }
01868     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
01869 
01870   //  subpel search
01871     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
01872     pc= s->c;
01873     pc.bytestream_start=
01874     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
01875     memcpy(p_state, s->block_state, sizeof(s->block_state));
01876 
01877     if(level!=s->block_max_depth)
01878         put_rac(&pc, &p_state[4 + s_context], 1);
01879     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
01880     if(s->ref_frames > 1)
01881         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
01882     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
01883     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
01884     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
01885     p_len= pc.bytestream - pc.bytestream_start;
01886     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
01887 
01888     block_s= block_w*block_w;
01889     sum = pix_sum(current_data[0], stride, block_w);
01890     l= (sum + block_s/2)/block_s;
01891     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
01892 
01893     block_s= block_w*block_w>>2;
01894     sum = pix_sum(current_data[1], uvstride, block_w>>1);
01895     cb= (sum + block_s/2)/block_s;
01896 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
01897     sum = pix_sum(current_data[2], uvstride, block_w>>1);
01898     cr= (sum + block_s/2)/block_s;
01899 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
01900 
01901     ic= s->c;
01902     ic.bytestream_start=
01903     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
01904     memcpy(i_state, s->block_state, sizeof(s->block_state));
01905     if(level!=s->block_max_depth)
01906         put_rac(&ic, &i_state[4 + s_context], 1);
01907     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
01908     put_symbol(&ic, &i_state[32],  l-pl , 1);
01909     put_symbol(&ic, &i_state[64], cb-pcb, 1);
01910     put_symbol(&ic, &i_state[96], cr-pcr, 1);
01911     i_len= ic.bytestream - ic.bytestream_start;
01912     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
01913 
01914 //    assert(score==256*256*256*64-1);
01915     assert(iscore < 255*255*256 + s->lambda2*10);
01916     assert(iscore >= 0);
01917     assert(l>=0 && l<=255);
01918     assert(pl>=0 && pl<=255);
01919 
01920     if(level==0){
01921         int varc= iscore >> 8;
01922         int vard= score >> 8;
01923         if (vard <= 64 || vard < varc)
01924             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
01925         else
01926             c->scene_change_score+= s->m.qscale;
01927     }
01928 
01929     if(level!=s->block_max_depth){
01930         put_rac(&s->c, &s->block_state[4 + s_context], 0);
01931         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
01932         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
01933         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
01934         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
01935         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
01936 
01937         if(score2 < score && score2 < iscore)
01938             return score2;
01939     }
01940 
01941     if(iscore < score){
01942         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
01943         memcpy(pbbak, i_buffer, i_len);
01944         s->c= ic;
01945         s->c.bytestream_start= pbbak_start;
01946         s->c.bytestream= pbbak + i_len;
01947         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
01948         memcpy(s->block_state, i_state, sizeof(s->block_state));
01949         return iscore;
01950     }else{
01951         memcpy(pbbak, p_buffer, p_len);
01952         s->c= pc;
01953         s->c.bytestream_start= pbbak_start;
01954         s->c.bytestream= pbbak + p_len;
01955         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
01956         memcpy(s->block_state, p_state, sizeof(s->block_state));
01957         return score;
01958     }
01959 }
01960 
01961 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
01962     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
01963         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
01964     }else{
01965         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
01966     }
01967 }
01968 
01969 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
01970     const int w= s->b_width  << s->block_max_depth;
01971     const int rem_depth= s->block_max_depth - level;
01972     const int index= (x + y*w) << rem_depth;
01973     int trx= (x+1)<<rem_depth;
01974     BlockNode *b= &s->block[index];
01975     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
01976     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
01977     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
01978     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
01979     int pl = left->color[0];
01980     int pcb= left->color[1];
01981     int pcr= left->color[2];
01982     int pmx, pmy;
01983     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
01984     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
01985     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
01986     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
01987 
01988     if(s->keyframe){
01989         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
01990         return;
01991     }
01992 
01993     if(level!=s->block_max_depth){
01994         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
01995             put_rac(&s->c, &s->block_state[4 + s_context], 1);
01996         }else{
01997             put_rac(&s->c, &s->block_state[4 + s_context], 0);
01998             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
01999             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02000             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02001             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02002             return;
02003         }
02004     }
02005     if(b->type & BLOCK_INTRA){
02006         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02007         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02008         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02009         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02010         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02011         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02012     }else{
02013         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02014         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02015         if(s->ref_frames > 1)
02016             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02017         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02018         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02019         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02020     }
02021 }
02022 
02023 static void decode_q_branch(SnowContext *s, int level, int x, int y){
02024     const int w= s->b_width << s->block_max_depth;
02025     const int rem_depth= s->block_max_depth - level;
02026     const int index= (x + y*w) << rem_depth;
02027     int trx= (x+1)<<rem_depth;
02028     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02029     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
02030     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
02031     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
02032     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02033 
02034     if(s->keyframe){
02035         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
02036         return;
02037     }
02038 
02039     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
02040         int type, mx, my;
02041         int l = left->color[0];
02042         int cb= left->color[1];
02043         int cr= left->color[2];
02044         int ref = 0;
02045         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02046         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
02047         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
02048 
02049         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
02050 
02051         if(type){
02052             pred_mv(s, &mx, &my, 0, left, top, tr);
02053             l += get_symbol(&s->c, &s->block_state[32], 1);
02054             cb+= get_symbol(&s->c, &s->block_state[64], 1);
02055             cr+= get_symbol(&s->c, &s->block_state[96], 1);
02056         }else{
02057             if(s->ref_frames > 1)
02058                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
02059             pred_mv(s, &mx, &my, ref, left, top, tr);
02060             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
02061             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
02062         }
02063         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
02064     }else{
02065         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
02066         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
02067         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
02068         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
02069     }
02070 }
02071 
02072 static void encode_blocks(SnowContext *s, int search){
02073     int x, y;
02074     int w= s->b_width;
02075     int h= s->b_height;
02076 
02077     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
02078         iterative_me(s);
02079 
02080     for(y=0; y<h; y++){
02081         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
02082             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02083             return;
02084         }
02085         for(x=0; x<w; x++){
02086             if(s->avctx->me_method == ME_ITER || !search)
02087                 encode_q_branch2(s, 0, x, y);
02088             else
02089                 encode_q_branch (s, 0, x, y);
02090         }
02091     }
02092 }
02093 
02094 static void decode_blocks(SnowContext *s){
02095     int x, y;
02096     int w= s->b_width;
02097     int h= s->b_height;
02098 
02099     for(y=0; y<h; y++){
02100         for(x=0; x<w; x++){
02101             decode_q_branch(s, 0, x, y);
02102         }
02103     }
02104 }
02105 
02106 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
02107     static const uint8_t weight[64]={
02108     8,7,6,5,4,3,2,1,
02109     7,7,0,0,0,0,0,1,
02110     6,0,6,0,0,0,2,0,
02111     5,0,0,5,0,3,0,0,
02112     4,0,0,0,4,0,0,0,
02113     3,0,0,5,0,3,0,0,
02114     2,0,6,0,0,0,2,0,
02115     1,7,0,0,0,0,0,1,
02116     };
02117 
02118     static const uint8_t brane[256]={
02119     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
02120     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
02121     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
02122     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
02123     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
02124     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
02125     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
02126     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
02127     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
02128     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
02129     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
02130     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
02131     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
02132     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
02133     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
02134     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
02135     };
02136 
02137     static const uint8_t needs[16]={
02138     0,1,0,0,
02139     2,4,2,0,
02140     0,1,0,0,
02141     15
02142     };
02143 
02144     int x, y, b, r, l;
02145     int16_t tmpIt   [64*(32+HTAPS_MAX)];
02146     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
02147     int16_t *tmpI= tmpIt;
02148     uint8_t *tmp2= tmp2t[0];
02149     const uint8_t *hpel[11];
02150     assert(dx<16 && dy<16);
02151     r= brane[dx + 16*dy]&15;
02152     l= brane[dx + 16*dy]>>4;
02153 
02154     b= needs[l] | needs[r];
02155     if(p && !p->diag_mc)
02156         b= 15;
02157 
02158     if(b&5){
02159         for(y=0; y < b_h+HTAPS_MAX-1; y++){
02160             for(x=0; x < b_w; x++){
02161                 int a_1=src[x + HTAPS_MAX/2-4];
02162                 int a0= src[x + HTAPS_MAX/2-3];
02163                 int a1= src[x + HTAPS_MAX/2-2];
02164                 int a2= src[x + HTAPS_MAX/2-1];
02165                 int a3= src[x + HTAPS_MAX/2+0];
02166                 int a4= src[x + HTAPS_MAX/2+1];
02167                 int a5= src[x + HTAPS_MAX/2+2];
02168                 int a6= src[x + HTAPS_MAX/2+3];
02169                 int am=0;
02170                 if(!p || p->fast_mc){
02171                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
02172                     tmpI[x]= am;
02173                     am= (am+16)>>5;
02174                 }else{
02175                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
02176                     tmpI[x]= am;
02177                     am= (am+32)>>6;
02178                 }
02179 
02180                 if(am&(~255)) am= ~(am>>31);
02181                 tmp2[x]= am;
02182             }
02183             tmpI+= 64;
02184             tmp2+= stride;
02185             src += stride;
02186         }
02187         src -= stride*y;
02188     }
02189     src += HTAPS_MAX/2 - 1;
02190     tmp2= tmp2t[1];
02191 
02192     if(b&2){
02193         for(y=0; y < b_h; y++){
02194             for(x=0; x < b_w+1; x++){
02195                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
02196                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
02197                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
02198                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
02199                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
02200                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
02201                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
02202                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
02203                 int am=0;
02204                 if(!p || p->fast_mc)
02205                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
02206                 else
02207                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
02208 
02209                 if(am&(~255)) am= ~(am>>31);
02210                 tmp2[x]= am;
02211             }
02212             src += stride;
02213             tmp2+= stride;
02214         }
02215         src -= stride*y;
02216     }
02217     src += stride*(HTAPS_MAX/2 - 1);
02218     tmp2= tmp2t[2];
02219     tmpI= tmpIt;
02220     if(b&4){
02221         for(y=0; y < b_h; y++){
02222             for(x=0; x < b_w; x++){
02223                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
02224                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
02225                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
02226                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
02227                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
02228                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
02229                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
02230                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
02231                 int am=0;
02232                 if(!p || p->fast_mc)
02233                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
02234                 else
02235                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
02236                 if(am&(~255)) am= ~(am>>31);
02237                 tmp2[x]= am;
02238             }
02239             tmpI+= 64;
02240             tmp2+= stride;
02241         }
02242     }
02243 
02244     hpel[ 0]= src;
02245     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
02246     hpel[ 2]= src + 1;
02247 
02248     hpel[ 4]= tmp2t[1];
02249     hpel[ 5]= tmp2t[2];
02250     hpel[ 6]= tmp2t[1] + 1;
02251 
02252     hpel[ 8]= src + stride;
02253     hpel[ 9]= hpel[1] + stride;
02254     hpel[10]= hpel[8] + 1;
02255 
02256     if(b==15){
02257         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
02258         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
02259         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
02260         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
02261         dx&=7;
02262         dy&=7;
02263         for(y=0; y < b_h; y++){
02264             for(x=0; x < b_w; x++){
02265                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
02266                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
02267             }
02268             src1+=stride;
02269             src2+=stride;
02270             src3+=stride;
02271             src4+=stride;
02272             dst +=stride;
02273         }
02274     }else{
02275         const uint8_t *src1= hpel[l];
02276         const uint8_t *src2= hpel[r];
02277         int a= weight[((dx&7) + (8*(dy&7)))];
02278         int b= 8-a;
02279         for(y=0; y < b_h; y++){
02280             for(x=0; x < b_w; x++){
02281                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
02282             }
02283             src1+=stride;
02284             src2+=stride;
02285             dst +=stride;
02286         }
02287     }
02288 }
02289 
02290 #define mca(dx,dy,b_w)\
02291 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
02292     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
02293     assert(h==b_w);\
02294     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
02295 }
02296 
02297 mca( 0, 0,16)
02298 mca( 8, 0,16)
02299 mca( 0, 8,16)
02300 mca( 8, 8,16)
02301 mca( 0, 0,8)
02302 mca( 8, 0,8)
02303 mca( 0, 8,8)
02304 mca( 8, 8,8)
02305 
02306 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
02307     if(block->type & BLOCK_INTRA){
02308         int x, y;
02309         const int color = block->color[plane_index];
02310         const int color4= color*0x01010101;
02311         if(b_w==32){
02312             for(y=0; y < b_h; y++){
02313                 *(uint32_t*)&dst[0 + y*stride]= color4;
02314                 *(uint32_t*)&dst[4 + y*stride]= color4;
02315                 *(uint32_t*)&dst[8 + y*stride]= color4;
02316                 *(uint32_t*)&dst[12+ y*stride]= color4;
02317                 *(uint32_t*)&dst[16+ y*stride]= color4;
02318                 *(uint32_t*)&dst[20+ y*stride]= color4;
02319                 *(uint32_t*)&dst[24+ y*stride]= color4;
02320                 *(uint32_t*)&dst[28+ y*stride]= color4;
02321             }
02322         }else if(b_w==16){
02323             for(y=0; y < b_h; y++){
02324                 *(uint32_t*)&dst[0 + y*stride]= color4;
02325                 *(uint32_t*)&dst[4 + y*stride]= color4;
02326                 *(uint32_t*)&dst[8 + y*stride]= color4;
02327                 *(uint32_t*)&dst[12+ y*stride]= color4;
02328             }
02329         }else if(b_w==8){
02330             for(y=0; y < b_h; y++){
02331                 *(uint32_t*)&dst[0 + y*stride]= color4;
02332                 *(uint32_t*)&dst[4 + y*stride]= color4;
02333             }
02334         }else if(b_w==4){
02335             for(y=0; y < b_h; y++){
02336                 *(uint32_t*)&dst[0 + y*stride]= color4;
02337             }
02338         }else{
02339             for(y=0; y < b_h; y++){
02340                 for(x=0; x < b_w; x++){
02341                     dst[x + y*stride]= color;
02342                 }
02343             }
02344         }
02345     }else{
02346         uint8_t *src= s->last_picture[block->ref].data[plane_index];
02347         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
02348         int mx= block->mx*scale;
02349         int my= block->my*scale;
02350         const int dx= mx&15;
02351         const int dy= my&15;
02352         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
02353         sx += (mx>>4) - (HTAPS_MAX/2-1);
02354         sy += (my>>4) - (HTAPS_MAX/2-1);
02355         src += sx + sy*stride;
02356         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
02357            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
02358             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
02359             src= tmp + MB_SIZE;
02360         }
02361 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
02362 //        assert(!(b_w&(b_w-1)));
02363         assert(b_w>1 && b_h>1);
02364         assert((tab_index>=0 && tab_index<4) || b_w==32);
02365         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
02366             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
02367         else if(b_w==32){
02368             int y;
02369             for(y=0; y<b_h; y+=16){
02370                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
02371                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
02372             }
02373         }else if(b_w==b_h)
02374             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
02375         else if(b_w==2*b_h){
02376             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
02377             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
02378         }else{
02379             assert(2*b_w==b_h);
02380             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
02381             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
02382         }
02383     }
02384 }
02385 
02386 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
02387                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
02388     int y, x;
02389     IDWTELEM * dst;
02390     for(y=0; y<b_h; y++){
02391         //FIXME ugly misuse of obmc_stride
02392         const uint8_t *obmc1= obmc + y*obmc_stride;
02393         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02394         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02395         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02396         dst = slice_buffer_get_line(sb, src_y + y);
02397         for(x=0; x<b_w; x++){
02398             int v=   obmc1[x] * block[3][x + y*src_stride]
02399                     +obmc2[x] * block[2][x + y*src_stride]
02400                     +obmc3[x] * block[1][x + y*src_stride]
02401                     +obmc4[x] * block[0][x + y*src_stride];
02402 
02403             v <<= 8 - LOG2_OBMC_MAX;
02404             if(FRAC_BITS != 8){
02405                 v >>= 8 - FRAC_BITS;
02406             }
02407             if(add){
02408                 v += dst[x + src_x];
02409                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02410                 if(v&(~255)) v= ~(v>>31);
02411                 dst8[x + y*src_stride] = v;
02412             }else{
02413                 dst[x + src_x] -= v;
02414             }
02415         }
02416     }
02417 }
02418 
02419 //FIXME name cleanup (b_w, block_w, b_width stuff)
02420 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
02421     const int b_width = s->b_width  << s->block_max_depth;
02422     const int b_height= s->b_height << s->block_max_depth;
02423     const int b_stride= b_width;
02424     BlockNode *lt= &s->block[b_x + b_y*b_stride];
02425     BlockNode *rt= lt+1;
02426     BlockNode *lb= lt+b_stride;
02427     BlockNode *rb= lb+1;
02428     uint8_t *block[4];
02429     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
02430     uint8_t *tmp = s->scratchbuf;
02431     uint8_t *ptmp;
02432     int x,y;
02433 
02434     if(b_x<0){
02435         lt= rt;
02436         lb= rb;
02437     }else if(b_x + 1 >= b_width){
02438         rt= lt;
02439         rb= lb;
02440     }
02441     if(b_y<0){
02442         lt= lb;
02443         rt= rb;
02444     }else if(b_y + 1 >= b_height){
02445         lb= lt;
02446         rb= rt;
02447     }
02448 
02449     if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
02450         obmc -= src_x;
02451         b_w += src_x;
02452         if(!sliced && !offset_dst)
02453             dst -= src_x;
02454         src_x=0;
02455     }else if(src_x + b_w > w){
02456         b_w = w - src_x;
02457     }
02458     if(src_y<0){
02459         obmc -= src_y*obmc_stride;
02460         b_h += src_y;
02461         if(!sliced && !offset_dst)
02462             dst -= src_y*dst_stride;
02463         src_y=0;
02464     }else if(src_y + b_h> h){
02465         b_h = h - src_y;
02466     }
02467 
02468     if(b_w<=0 || b_h<=0) return;
02469 
02470     assert(src_stride > 2*MB_SIZE + 5);
02471 
02472     if(!sliced && offset_dst)
02473         dst += src_x + src_y*dst_stride;
02474     dst8+= src_x + src_y*src_stride;
02475 //    src += src_x + src_y*src_stride;
02476 
02477     ptmp= tmp + 3*tmp_step;
02478     block[0]= ptmp;
02479     ptmp+=tmp_step;
02480     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
02481 
02482     if(same_block(lt, rt)){
02483         block[1]= block[0];
02484     }else{
02485         block[1]= ptmp;
02486         ptmp+=tmp_step;
02487         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
02488     }
02489 
02490     if(same_block(lt, lb)){
02491         block[2]= block[0];
02492     }else if(same_block(rt, lb)){
02493         block[2]= block[1];
02494     }else{
02495         block[2]= ptmp;
02496         ptmp+=tmp_step;
02497         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
02498     }
02499 
02500     if(same_block(lt, rb) ){
02501         block[3]= block[0];
02502     }else if(same_block(rt, rb)){
02503         block[3]= block[1];
02504     }else if(same_block(lb, rb)){
02505         block[3]= block[2];
02506     }else{
02507         block[3]= ptmp;
02508         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
02509     }
02510 #if 0
02511     for(y=0; y<b_h; y++){
02512         for(x=0; x<b_w; x++){
02513             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
02514             if(add) dst[x + y*dst_stride] += v;
02515             else    dst[x + y*dst_stride] -= v;
02516         }
02517     }
02518     for(y=0; y<b_h; y++){
02519         uint8_t *obmc2= obmc + (obmc_stride>>1);
02520         for(x=0; x<b_w; x++){
02521             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
02522             if(add) dst[x + y*dst_stride] += v;
02523             else    dst[x + y*dst_stride] -= v;
02524         }
02525     }
02526     for(y=0; y<b_h; y++){
02527         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02528         for(x=0; x<b_w; x++){
02529             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
02530             if(add) dst[x + y*dst_stride] += v;
02531             else    dst[x + y*dst_stride] -= v;
02532         }
02533     }
02534     for(y=0; y<b_h; y++){
02535         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
02536         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02537         for(x=0; x<b_w; x++){
02538             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
02539             if(add) dst[x + y*dst_stride] += v;
02540             else    dst[x + y*dst_stride] -= v;
02541         }
02542     }
02543 #else
02544     if(sliced){
02545         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
02546     }else{
02547         for(y=0; y<b_h; y++){
02548             //FIXME ugly misuse of obmc_stride
02549             const uint8_t *obmc1= obmc + y*obmc_stride;
02550             const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
02551             const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
02552             const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
02553             for(x=0; x<b_w; x++){
02554                 int v=   obmc1[x] * block[3][x + y*src_stride]
02555                         +obmc2[x] * block[2][x + y*src_stride]
02556                         +obmc3[x] * block[1][x + y*src_stride]
02557                         +obmc4[x] * block[0][x + y*src_stride];
02558 
02559                 v <<= 8 - LOG2_OBMC_MAX;
02560                 if(FRAC_BITS != 8){
02561                     v >>= 8 - FRAC_BITS;
02562                 }
02563                 if(add){
02564                     v += dst[x + y*dst_stride];
02565                     v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
02566                     if(v&(~255)) v= ~(v>>31);
02567                     dst8[x + y*src_stride] = v;
02568                 }else{
02569                     dst[x + y*dst_stride] -= v;
02570                 }
02571             }
02572         }
02573     }
02574 #endif /* 0 */
02575 }
02576 
02577 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
02578     Plane *p= &s->plane[plane_index];
02579     const int mb_w= s->b_width  << s->block_max_depth;
02580     const int mb_h= s->b_height << s->block_max_depth;
02581     int x, y, mb_x;
02582     int block_size = MB_SIZE >> s->block_max_depth;
02583     int block_w    = plane_index ? block_size/2 : block_size;
02584     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02585     int obmc_stride= plane_index ? block_size : 2*block_size;
02586     int ref_stride= s->current_picture.linesize[plane_index];
02587     uint8_t *dst8= s->current_picture.data[plane_index];
02588     int w= p->width;
02589     int h= p->height;
02590 
02591     if(s->keyframe || (s->avctx->debug&512)){
02592         if(mb_y==mb_h)
02593             return;
02594 
02595         if(add){
02596             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02597 //                DWTELEM * line = slice_buffer_get_line(sb, y);
02598                 IDWTELEM * line = sb->line[y];
02599                 for(x=0; x<w; x++){
02600 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02601                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02602                     v >>= FRAC_BITS;
02603                     if(v&(~255)) v= ~(v>>31);
02604                     dst8[x + y*ref_stride]= v;
02605                 }
02606             }
02607         }else{
02608             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02609 //                DWTELEM * line = slice_buffer_get_line(sb, y);
02610                 IDWTELEM * line = sb->line[y];
02611                 for(x=0; x<w; x++){
02612                     line[x] -= 128 << FRAC_BITS;
02613 //                    buf[x + y*w]-= 128<<FRAC_BITS;
02614                 }
02615             }
02616         }
02617 
02618         return;
02619     }
02620 
02621     for(mb_x=0; mb_x<=mb_w; mb_x++){
02622         add_yblock(s, 1, sb, old_buffer, dst8, obmc,
02623                    block_w*mb_x - block_w/2,
02624                    block_w*mb_y - block_w/2,
02625                    block_w, block_w,
02626                    w, h,
02627                    w, ref_stride, obmc_stride,
02628                    mb_x - 1, mb_y - 1,
02629                    add, 0, plane_index);
02630     }
02631 }
02632 
02633 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
02634     Plane *p= &s->plane[plane_index];
02635     const int mb_w= s->b_width  << s->block_max_depth;
02636     const int mb_h= s->b_height << s->block_max_depth;
02637     int x, y, mb_x;
02638     int block_size = MB_SIZE >> s->block_max_depth;
02639     int block_w    = plane_index ? block_size/2 : block_size;
02640     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02641     const int obmc_stride= plane_index ? block_size : 2*block_size;
02642     int ref_stride= s->current_picture.linesize[plane_index];
02643     uint8_t *dst8= s->current_picture.data[plane_index];
02644     int w= p->width;
02645     int h= p->height;
02646 
02647     if(s->keyframe || (s->avctx->debug&512)){
02648         if(mb_y==mb_h)
02649             return;
02650 
02651         if(add){
02652             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02653                 for(x=0; x<w; x++){
02654                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
02655                     v >>= FRAC_BITS;
02656                     if(v&(~255)) v= ~(v>>31);
02657                     dst8[x + y*ref_stride]= v;
02658                 }
02659             }
02660         }else{
02661             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
02662                 for(x=0; x<w; x++){
02663                     buf[x + y*w]-= 128<<FRAC_BITS;
02664                 }
02665             }
02666         }
02667 
02668         return;
02669     }
02670 
02671     for(mb_x=0; mb_x<=mb_w; mb_x++){
02672         add_yblock(s, 0, NULL, buf, dst8, obmc,
02673                    block_w*mb_x - block_w/2,
02674                    block_w*mb_y - block_w/2,
02675                    block_w, block_w,
02676                    w, h,
02677                    w, ref_stride, obmc_stride,
02678                    mb_x - 1, mb_y - 1,
02679                    add, 1, plane_index);
02680     }
02681 }
02682 
02683 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
02684     const int mb_h= s->b_height << s->block_max_depth;
02685     int mb_y;
02686     for(mb_y=0; mb_y<=mb_h; mb_y++)
02687         predict_slice(s, buf, plane_index, add, mb_y);
02688 }
02689 
02690 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02691     int i, x2, y2;
02692     Plane *p= &s->plane[plane_index];
02693     const int block_size = MB_SIZE >> s->block_max_depth;
02694     const int block_w    = plane_index ? block_size/2 : block_size;
02695     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02696     const int obmc_stride= plane_index ? block_size : 2*block_size;
02697     const int ref_stride= s->current_picture.linesize[plane_index];
02698     uint8_t *src= s-> input_picture.data[plane_index];
02699     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
02700     const int b_stride = s->b_width << s->block_max_depth;
02701     const int w= p->width;
02702     const int h= p->height;
02703     int index= mb_x + mb_y*b_stride;
02704     BlockNode *b= &s->block[index];
02705     BlockNode backup= *b;
02706     int ab=0;
02707     int aa=0;
02708 
02709     b->type|= BLOCK_INTRA;
02710     b->color[plane_index]= 0;
02711     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02712 
02713     for(i=0; i<4; i++){
02714         int mb_x2= mb_x + (i &1) - 1;
02715         int mb_y2= mb_y + (i>>1) - 1;
02716         int x= block_w*mb_x2 + block_w/2;
02717         int y= block_w*mb_y2 + block_w/2;
02718 
02719         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02720                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02721 
02722         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02723             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02724                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02725                 int obmc_v= obmc[index];
02726                 int d;
02727                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02728                 if(x<0) obmc_v += obmc[index + block_w];
02729                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02730                 if(x+block_w>w) obmc_v += obmc[index - block_w];
02731                 //FIXME precalculate this or simplify it somehow else
02732 
02733                 d = -dst[index] + (1<<(FRAC_BITS-1));
02734                 dst[index] = d;
02735                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02736                 aa += obmc_v * obmc_v; //FIXME precalculate this
02737             }
02738         }
02739     }
02740     *b= backup;
02741 
02742     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
02743 }
02744 
02745 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02746     const int b_stride = s->b_width << s->block_max_depth;
02747     const int b_height = s->b_height<< s->block_max_depth;
02748     int index= x + y*b_stride;
02749     const BlockNode *b     = &s->block[index];
02750     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02751     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
02752     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
02753     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02754     int dmx, dmy;
02755 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
02756 //  int my_context= av_log2(2*FFABS(left->my - top->my));
02757 
02758     if(x<0 || x>=b_stride || y>=b_height)
02759         return 0;
02760 /*
02761 1            0      0
02762 01X          1-2    1
02763 001XX        3-6    2-3
02764 0001XXX      7-14   4-7
02765 00001XXXX   15-30   8-15
02766 */
02767 //FIXME try accurate rate
02768 //FIXME intra and inter predictors if surrounding blocks are not the same type
02769     if(b->type & BLOCK_INTRA){
02770         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02771                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
02772                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
02773     }else{
02774         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02775         dmx-= b->mx;
02776         dmy-= b->my;
02777         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
02778                     + av_log2(2*FFABS(dmy))
02779                     + av_log2(2*b->ref));
02780     }
02781 }
02782 
02783 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02784     Plane *p= &s->plane[plane_index];
02785     const int block_size = MB_SIZE >> s->block_max_depth;
02786     const int block_w    = plane_index ? block_size/2 : block_size;
02787     const int obmc_stride= plane_index ? block_size : 2*block_size;
02788     const int ref_stride= s->current_picture.linesize[plane_index];
02789     uint8_t *dst= s->current_picture.data[plane_index];
02790     uint8_t *src= s->  input_picture.data[plane_index];
02791     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02792     uint8_t *cur = s->scratchbuf;
02793     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02794     const int b_stride = s->b_width << s->block_max_depth;
02795     const int b_height = s->b_height<< s->block_max_depth;
02796     const int w= p->width;
02797     const int h= p->height;
02798     int distortion;
02799     int rate= 0;
02800     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02801     int sx= block_w*mb_x - block_w/2;
02802     int sy= block_w*mb_y - block_w/2;
02803     int x0= FFMAX(0,-sx);
02804     int y0= FFMAX(0,-sy);
02805     int x1= FFMIN(block_w*2, w-sx);
02806     int y1= FFMIN(block_w*2, h-sy);
02807     int i,x,y;
02808 
02809     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02810 
02811     for(y=y0; y<y1; y++){
02812         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02813         const IDWTELEM *pred1 = pred + y*obmc_stride;
02814         uint8_t *cur1 = cur + y*ref_stride;
02815         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02816         for(x=x0; x<x1; x++){
02817 #if FRAC_BITS >= LOG2_OBMC_MAX
02818             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02819 #else
02820             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02821 #endif
02822             v = (v + pred1[x]) >> FRAC_BITS;
02823             if(v&(~255)) v= ~(v>>31);
02824             dst1[x] = v;
02825         }
02826     }
02827 
02828     /* copy the regions where obmc[] = (uint8_t)256 */
02829     if(LOG2_OBMC_MAX == 8
02830         && (mb_x == 0 || mb_x == b_stride-1)
02831         && (mb_y == 0 || mb_y == b_height-1)){
02832         if(mb_x == 0)
02833             x1 = block_w;
02834         else
02835             x0 = block_w;
02836         if(mb_y == 0)
02837             y1 = block_w;
02838         else
02839             y0 = block_w;
02840         for(y=y0; y<y1; y++)
02841             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02842     }
02843 
02844     if(block_w==16){
02845         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
02846         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
02847         /* FIXME cmps overlap but do not cover the wavelet's whole support.
02848          * So improving the score of one block is not strictly guaranteed
02849          * to improve the score of the whole frame, thus iterative motion
02850          * estimation does not always converge. */
02851         if(s->avctx->me_cmp == FF_CMP_W97)
02852             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02853         else if(s->avctx->me_cmp == FF_CMP_W53)
02854             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02855         else{
02856             distortion = 0;
02857             for(i=0; i<4; i++){
02858                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02859                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02860             }
02861         }
02862     }else{
02863         assert(block_w==8);
02864         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02865     }
02866 
02867     if(plane_index==0){
02868         for(i=0; i<4; i++){
02869 /* ..RRr
02870  * .RXx.
02871  * rxx..
02872  */
02873             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02874         }
02875         if(mb_x == b_stride-2)
02876             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02877     }
02878     return distortion + rate*penalty_factor;
02879 }
02880 
02881 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02882     int i, y2;
02883     Plane *p= &s->plane[plane_index];
02884     const int block_size = MB_SIZE >> s->block_max_depth;
02885     const int block_w    = plane_index ? block_size/2 : block_size;
02886     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02887     const int obmc_stride= plane_index ? block_size : 2*block_size;
02888     const int ref_stride= s->current_picture.linesize[plane_index];
02889     uint8_t *dst= s->current_picture.data[plane_index];
02890     uint8_t *src= s-> input_picture.data[plane_index];
02891     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
02892     // const has only been removed from zero_dst to suppress a warning
02893     static IDWTELEM zero_dst[4096]; //FIXME
02894     const int b_stride = s->b_width << s->block_max_depth;
02895     const int w= p->width;
02896     const int h= p->height;
02897     int distortion= 0;
02898     int rate= 0;
02899     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02900 
02901     for(i=0; i<9; i++){
02902         int mb_x2= mb_x + (i%3) - 1;
02903         int mb_y2= mb_y + (i/3) - 1;
02904         int x= block_w*mb_x2 + block_w/2;
02905         int y= block_w*mb_y2 + block_w/2;
02906 
02907         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02908                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02909 
02910         //FIXME find a cleaner/simpler way to skip the outside stuff
02911         for(y2= y; y2<0; y2++)
02912             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02913         for(y2= h; y2<y+block_w; y2++)
02914             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02915         if(x<0){
02916             for(y2= y; y2<y+block_w; y2++)
02917                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02918         }
02919         if(x+block_w > w){
02920             for(y2= y; y2<y+block_w; y2++)
02921                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02922         }
02923 
02924         assert(block_w== 8 || block_w==16);
02925         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02926     }
02927 
02928     if(plane_index==0){
02929         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02930         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02931 
02932 /* ..RRRr
02933  * .RXXx.
02934  * .RXXx.
02935  * rxxx.
02936  */
02937         if(merged)
02938             rate = get_block_bits(s, mb_x, mb_y, 2);
02939         for(i=merged?4:0; i<9; i++){
02940             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02941             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02942         }
02943     }
02944     return distortion + rate*penalty_factor;
02945 }
02946 
02947 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
02948     const int b_stride= s->b_width << s->block_max_depth;
02949     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02950     BlockNode backup= *block;
02951     int rd, index, value;
02952 
02953     assert(mb_x>=0 && mb_y>=0);
02954     assert(mb_x<b_stride);
02955 
02956     if(intra){
02957         block->color[0] = p[0];
02958         block->color[1] = p[1];
02959         block->color[2] = p[2];
02960         block->type |= BLOCK_INTRA;
02961     }else{
02962         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
02963         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
02964         if(s->me_cache[index] == value)
02965             return 0;
02966         s->me_cache[index]= value;
02967 
02968         block->mx= p[0];
02969         block->my= p[1];
02970         block->type &= ~BLOCK_INTRA;
02971     }
02972 
02973     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
02974 
02975 //FIXME chroma
02976     if(rd < *best_rd){
02977         *best_rd= rd;
02978         return 1;
02979     }else{
02980         *block= backup;
02981         return 0;
02982     }
02983 }
02984 
02985 /* special case for int[2] args we discard afterwards,
02986  * fixes compilation problem with gcc 2.95 */
02987 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
02988     int p[2] = {p0, p1};
02989     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
02990 }
02991 
02992 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
02993     const int b_stride= s->b_width << s->block_max_depth;
02994     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
02995     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
02996     int rd, index, value;
02997 
02998     assert(mb_x>=0 && mb_y>=0);
02999     assert(mb_x<b_stride);
03000     assert(((mb_x|mb_y)&1) == 0);
03001 
03002     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03003     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03004     if(s->me_cache[index] == value)
03005         return 0;
03006     s->me_cache[index]= value;
03007 
03008     block->mx= p0;
03009     block->my= p1;
03010     block->ref= ref;
03011     block->type &= ~BLOCK_INTRA;
03012     block[1]= block[b_stride]= block[b_stride+1]= *block;
03013 
03014     rd= get_4block_rd(s, mb_x, mb_y, 0);
03015 
03016 //FIXME chroma
03017     if(rd < *best_rd){
03018         *best_rd= rd;
03019         return 1;
03020     }else{
03021         block[0]= backup[0];
03022         block[1]= backup[1];
03023         block[b_stride]= backup[2];
03024         block[b_stride+1]= backup[3];
03025         return 0;
03026     }
03027 }
03028 
03029 static void iterative_me(SnowContext *s){
03030     int pass, mb_x, mb_y;
03031     const int b_width = s->b_width  << s->block_max_depth;
03032     const int b_height= s->b_height << s->block_max_depth;
03033     const int b_stride= b_width;
03034     int color[3];
03035 
03036     {
03037         RangeCoder r = s->c;
03038         uint8_t state[sizeof(s->block_state)];
03039         memcpy(state, s->block_state, sizeof(s->block_state));
03040         for(mb_y= 0; mb_y<s->b_height; mb_y++)
03041             for(mb_x= 0; mb_x<s->b_width; mb_x++)
03042                 encode_q_branch(s, 0, mb_x, mb_y);
03043         s->c = r;
03044         memcpy(s->block_state, state, sizeof(s->block_state));
03045     }
03046 
03047     for(pass=0; pass<25; pass++){
03048         int change= 0;
03049 
03050         for(mb_y= 0; mb_y<b_height; mb_y++){
03051             for(mb_x= 0; mb_x<b_width; mb_x++){
03052                 int dia_change, i, j, ref;
03053                 int best_rd= INT_MAX, ref_rd;
03054                 BlockNode backup, ref_b;
03055                 const int index= mb_x + mb_y * b_stride;
03056                 BlockNode *block= &s->block[index];
03057                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
03058                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
03059                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
03060                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
03061                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
03062                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
03063                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03064                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03065                 const int b_w= (MB_SIZE >> s->block_max_depth);
03066                 uint8_t obmc_edged[b_w*2][b_w*2];
03067 
03068                 if(pass && (block->type & BLOCK_OPT))
03069                     continue;
03070                 block->type |= BLOCK_OPT;
03071 
03072                 backup= *block;
03073 
03074                 if(!s->me_cache_generation)
03075                     memset(s->me_cache, 0, sizeof(s->me_cache));
03076                 s->me_cache_generation += 1<<22;
03077 
03078                 //FIXME precalculate
03079                 {
03080                     int x, y;
03081                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03082                     if(mb_x==0)
03083                         for(y=0; y<b_w*2; y++)
03084                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03085                     if(mb_x==b_stride-1)
03086                         for(y=0; y<b_w*2; y++)
03087                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03088                     if(mb_y==0){
03089                         for(x=0; x<b_w*2; x++)
03090                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
03091                         for(y=1; y<b_w; y++)
03092                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03093                     }
03094                     if(mb_y==b_height-1){
03095                         for(x=0; x<b_w*2; x++)
03096                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03097                         for(y=b_w; y<b_w*2-1; y++)
03098                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03099                     }
03100                 }
03101 
03102                 //skip stuff outside the picture
03103                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
03104                     uint8_t *src= s->  input_picture.data[0];
03105                     uint8_t *dst= s->current_picture.data[0];
03106                     const int stride= s->current_picture.linesize[0];
03107                     const int block_w= MB_SIZE >> s->block_max_depth;
03108                     const int sx= block_w*mb_x - block_w/2;
03109                     const int sy= block_w*mb_y - block_w/2;
03110                     const int w= s->plane[0].width;
03111                     const int h= s->plane[0].height;
03112                     int y;
03113 
03114                     for(y=sy; y<0; y++)
03115                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03116                     for(y=h; y<sy+block_w*2; y++)
03117                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03118                     if(sx<0){
03119                         for(y=sy; y<sy+block_w*2; y++)
03120                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03121                     }
03122                     if(sx+block_w*2 > w){
03123                         for(y=sy; y<sy+block_w*2; y++)
03124                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03125                     }
03126                 }
03127 
03128                 // intra(black) = neighbors' contribution to the current block
03129                 for(i=0; i<3; i++)
03130                     color[i]= get_dc(s, mb_x, mb_y, i);
03131 
03132                 // get previous score (cannot be cached due to OBMC)
03133                 if(pass > 0 && (block->type&BLOCK_INTRA)){
03134                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
03135                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03136                 }else
03137                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03138 
03139                 ref_b= *block;
03140                 ref_rd= best_rd;
03141                 for(ref=0; ref < s->ref_frames; ref++){
03142                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03143                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
03144                         continue;
03145                     block->ref= ref;
03146                     best_rd= INT_MAX;
03147 
03148                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03149                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03150                     if(tb)
03151                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03152                     if(lb)
03153                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03154                     if(rb)
03155                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03156                     if(bb)
03157                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03158 
03159                     /* fullpel ME */
03160                     //FIXME avoid subpel interpolation / round to nearest integer
03161                     do{
03162                         dia_change=0;
03163                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03164                             for(j=0; j<i; j++){
03165                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03166                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03167                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03168                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03169                             }
03170                         }
03171                     }while(dia_change);
03172                     /* subpel ME */
03173                     do{
03174                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03175                         dia_change=0;
03176                         for(i=0; i<8; i++)
03177                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03178                     }while(dia_change);
03179                     //FIXME or try the standard 2 pass qpel or similar
03180 
03181                     mvr[0][0]= block->mx;
03182                     mvr[0][1]= block->my;
03183                     if(ref_rd > best_rd){
03184                         ref_rd= best_rd;
03185                         ref_b= *block;
03186                     }
03187                 }
03188                 best_rd= ref_rd;
03189                 *block= ref_b;
03190 #if 1
03191                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03192                 //FIXME RD style color selection
03193 #endif
03194                 if(!same_block(block, &backup)){
03195                     if(tb ) tb ->type &= ~BLOCK_OPT;
03196                     if(lb ) lb ->type &= ~BLOCK_OPT;
03197                     if(rb ) rb ->type &= ~BLOCK_OPT;
03198                     if(bb ) bb ->type &= ~BLOCK_OPT;
03199                     if(tlb) tlb->type &= ~BLOCK_OPT;
03200                     if(trb) trb->type &= ~BLOCK_OPT;
03201                     if(blb) blb->type &= ~BLOCK_OPT;
03202                     if(brb) brb->type &= ~BLOCK_OPT;
03203                     change ++;
03204                 }
03205             }
03206         }
03207         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03208         if(!change)
03209             break;
03210     }
03211 
03212     if(s->block_max_depth == 1){
03213         int change= 0;
03214         for(mb_y= 0; mb_y<b_height; mb_y+=2){
03215             for(mb_x= 0; mb_x<b_width; mb_x+=2){
03216                 int i;
03217                 int best_rd, init_rd;
03218                 const int index= mb_x + mb_y * b_stride;
03219                 BlockNode *b[4];
03220 
03221                 b[0]= &s->block[index];
03222                 b[1]= b[0]+1;
03223                 b[2]= b[0]+b_stride;
03224                 b[3]= b[2]+1;
03225                 if(same_block(b[0], b[1]) &&
03226                    same_block(b[0], b[2]) &&
03227                    same_block(b[0], b[3]))
03228                     continue;
03229 
03230                 if(!s->me_cache_generation)
03231                     memset(s->me_cache, 0, sizeof(s->me_cache));
03232                 s->me_cache_generation += 1<<22;
03233 
03234                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03235 
03236                 //FIXME more multiref search?
03237                 check_4block_inter(s, mb_x, mb_y,
03238                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03239                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03240 
03241                 for(i=0; i<4; i++)
03242                     if(!(b[i]->type&BLOCK_INTRA))
03243                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03244 
03245                 if(init_rd != best_rd)
03246                     change++;
03247             }
03248         }
03249         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03250     }
03251 }
03252 
03253 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03254     const int w= b->width;
03255     const int h= b->height;
03256     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03257     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03258     int x,y, thres1, thres2;
03259 
03260     if(s->qlog == LOSSLESS_QLOG){
03261         for(y=0; y<h; y++)
03262             for(x=0; x<w; x++)
03263                 dst[x + y*stride]= src[x + y*stride];
03264         return;
03265     }
03266 
03267     bias= bias ? 0 : (3*qmul)>>3;
03268     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03269     thres2= 2*thres1;
03270 
03271     if(!bias){
03272         for(y=0; y<h; y++){
03273             for(x=0; x<w; x++){
03274                 int i= src[x + y*stride];
03275 
03276                 if((unsigned)(i+thres1) > thres2){
03277                     if(i>=0){
03278                         i<<= QEXPSHIFT;
03279                         i/= qmul; //FIXME optimize
03280                         dst[x + y*stride]=  i;
03281                     }else{
03282                         i= -i;
03283                         i<<= QEXPSHIFT;
03284                         i/= qmul; //FIXME optimize
03285                         dst[x + y*stride]= -i;
03286                     }
03287                 }else
03288                     dst[x + y*stride]= 0;
03289             }
03290         }
03291     }else{
03292         for(y=0; y<h; y++){
03293             for(x=0; x<w; x++){
03294                 int i= src[x + y*stride];
03295 
03296                 if((unsigned)(i+thres1) > thres2){
03297                     if(i>=0){
03298                         i<<= QEXPSHIFT;
03299                         i= (i + bias) / qmul; //FIXME optimize
03300                         dst[x + y*stride]=  i;
03301                     }else{
03302                         i= -i;
03303                         i<<= QEXPSHIFT;
03304                         i= (i + bias) / qmul; //FIXME optimize
03305                         dst[x + y*stride]= -i;
03306                     }
03307                 }else
03308                     dst[x + y*stride]= 0;
03309             }
03310         }
03311     }
03312 }
03313 
03314 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
03315     const int w= b->width;
03316     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03317     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03318     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03319     int x,y;
03320 
03321     if(s->qlog == LOSSLESS_QLOG) return;
03322 
03323     for(y=start_y; y<end_y; y++){
03324 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
03325         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03326         for(x=0; x<w; x++){
03327             int i= line[x];
03328             if(i<0){
03329                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
03330             }else if(i>0){
03331                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
03332             }
03333         }
03334     }
03335 }
03336 
03337 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03338     const int w= b->width;
03339     const int h= b->height;
03340     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03341     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03342     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03343     int x,y;
03344 
03345     if(s->qlog == LOSSLESS_QLOG) return;
03346 
03347     for(y=0; y<h; y++){
03348         for(x=0; x<w; x++){
03349             int i= src[x + y*stride];
03350             if(i<0){
03351                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
03352             }else if(i>0){
03353                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
03354             }
03355         }
03356     }
03357 }
03358 
03359 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03360     const int w= b->width;
03361     const int h= b->height;
03362     int x,y;
03363 
03364     for(y=h-1; y>=0; y--){
03365         for(x=w-1; x>=0; x--){
03366             int i= x + y*stride;
03367 
03368             if(x){
03369                 if(use_median){
03370                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03371                     else  src[i] -= src[i - 1];
03372                 }else{
03373                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03374                     else  src[i] -= src[i - 1];
03375                 }
03376             }else{
03377                 if(y) src[i] -= src[i - stride];
03378             }
03379         }
03380     }
03381 }
03382 
03383 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
03384     const int w= b->width;
03385     int x,y;
03386 
03387     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
03388     IDWTELEM * prev;
03389 
03390     if (start_y != 0)
03391         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03392 
03393     for(y=start_y; y<end_y; y++){
03394         prev = line;
03395 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
03396         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
03397         for(x=0; x<w; x++){
03398             if(x){
03399                 if(use_median){
03400                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
03401                     else  line[x] += line[x - 1];
03402                 }else{
03403                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
03404                     else  line[x] += line[x - 1];
03405                 }
03406             }else{
03407                 if(y) line[x] += prev[x];
03408             }
03409         }
03410     }
03411 }
03412 
03413 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03414     const int w= b->width;
03415     const int h= b->height;
03416     int x,y;
03417 
03418     for(y=0; y<h; y++){
03419         for(x=0; x<w; x++){
03420             int i= x + y*stride;
03421 
03422             if(x){
03423                 if(use_median){
03424                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03425                     else  src[i] += src[i - 1];
03426                 }else{
03427                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03428                     else  src[i] += src[i - 1];
03429                 }
03430             }else{
03431                 if(y) src[i] += src[i - stride];
03432             }
03433         }
03434     }
03435 }
03436 
03437 static void encode_qlogs(SnowContext *s){
03438     int plane_index, level, orientation;
03439 
03440     for(plane_index=0; plane_index<2; plane_index++){
03441         for(level=0; level<s->spatial_decomposition_count; level++){
03442             for(orientation=level ? 1:0; orientation<4; orientation++){
03443                 if(orientation==2) continue;
03444                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03445             }
03446         }
03447     }
03448 }
03449 
03450 static void encode_header(SnowContext *s){
03451     int plane_index, i;
03452     uint8_t kstate[32];
03453 
03454     memset(kstate, MID_STATE, sizeof(kstate));
03455 
03456     put_rac(&s->c, kstate, s->keyframe);
03457     if(s->keyframe || s->always_reset){
03458         reset_contexts(s);
03459         s->last_spatial_decomposition_type=
03460         s->last_qlog=
03461         s->last_qbias=
03462         s->last_mv_scale=
03463         s->last_block_max_depth= 0;
03464         for(plane_index=0; plane_index<2; plane_index++){
03465             Plane *p= &s->plane[plane_index];
03466             p->last_htaps=0;
03467             p->last_diag_mc=0;
03468             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03469         }
03470     }
03471     if(s->keyframe){
03472         put_symbol(&s->c, s->header_state, s->version, 0);
03473         put_rac(&s->c, s->header_state, s->always_reset);
03474         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03475         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03476         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03477         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03478         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03479         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03480         put_rac(&s->c, s->header_state, s->spatial_scalability);
03481 //        put_rac(&s->c, s->header_state, s->rate_scalability);
03482         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03483 
03484         encode_qlogs(s);
03485     }
03486 
03487     if(!s->keyframe){
03488         int update_mc=0;
03489         for(plane_index=0; plane_index<2; plane_index++){
03490             Plane *p= &s->plane[plane_index];
03491             update_mc |= p->last_htaps   != p->htaps;
03492             update_mc |= p->last_diag_mc != p->diag_mc;
03493             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03494         }
03495         put_rac(&s->c, s->header_state, update_mc);
03496         if(update_mc){
03497             for(plane_index=0; plane_index<2; plane_index++){
03498                 Plane *p= &s->plane[plane_index];
03499                 put_rac(&s->c, s->header_state, p->diag_mc);
03500                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03501                 for(i= p->htaps/2; i; i--)
03502                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03503             }
03504         }
03505         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03506             put_rac(&s->c, s->header_state, 1);
03507             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03508             encode_qlogs(s);
03509         }else
03510             put_rac(&s->c, s->header_state, 0);
03511     }
03512 
03513     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03514     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
03515     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
03516     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
03517     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03518 
03519 }
03520 
03521 static void update_last_header_values(SnowContext *s){
03522     int plane_index;
03523 
03524     if(!s->keyframe){
03525         for(plane_index=0; plane_index<2; plane_index++){
03526             Plane *p= &s->plane[plane_index];
03527             p->last_diag_mc= p->diag_mc;
03528             p->last_htaps  = p->htaps;
03529             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03530         }
03531     }
03532 
03533     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
03534     s->last_qlog                        = s->qlog;
03535     s->last_qbias                       = s->qbias;
03536     s->last_mv_scale                    = s->mv_scale;
03537     s->last_block_max_depth             = s->block_max_depth;
03538     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
03539 }
03540 
03541 static void decode_qlogs(SnowContext *s){
03542     int plane_index, level, orientation;
03543 
03544     for(plane_index=0; plane_index<3; plane_index++){
03545         for(level=0; level<s->spatial_decomposition_count; level++){
03546             for(orientation=level ? 1:0; orientation<4; orientation++){
03547                 int q;
03548                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
03549                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
03550                 else                    q= get_symbol(&s->c, s->header_state, 1);
03551                 s->plane[plane_index].band[level][orientation].qlog= q;
03552             }
03553         }
03554     }
03555 }
03556 
03557 static int decode_header(SnowContext *s){
03558     int plane_index, tmp;
03559     uint8_t kstate[32];
03560 
03561     memset(kstate, MID_STATE, sizeof(kstate));
03562 
03563     s->keyframe= get_rac(&s->c, kstate);
03564     if(s->keyframe || s->always_reset){
03565         reset_contexts(s);
03566         s->spatial_decomposition_type=
03567         s->qlog=
03568         s->qbias=
03569         s->mv_scale=
03570         s->block_max_depth= 0;
03571     }
03572     if(s->keyframe){
03573         s->version= get_symbol(&s->c, s->header_state, 0);
03574         if(s->version>0){
03575             av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
03576             return -1;
03577         }
03578         s->always_reset= get_rac(&s->c, s->header_state);
03579         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
03580         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03581         s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03582         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
03583         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
03584         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
03585         s->spatial_scalability= get_rac(&s->c, s->header_state);
03586 //        s->rate_scalability= get_rac(&s->c, s->header_state);
03587         tmp= get_symbol(&s->c, s->header_state, 0)+1;
03588         if(tmp < 1 || tmp > MAX_REF_FRAMES){
03589             av_log(s->avctx, AV_LOG_ERROR, "reference frame count is %d\n", tmp);
03590             return -1;
03591         }
03592         s->max_ref_frames= tmp;
03593 
03594         decode_qlogs(s);
03595     }
03596 
03597     if(!s->keyframe){
03598         if(get_rac(&s->c, s->header_state)){
03599             for(plane_index=0; plane_index<2; plane_index++){
03600                 int htaps, i, sum=0;
03601                 Plane *p= &s->plane[plane_index];
03602                 p->diag_mc= get_rac(&s->c, s->header_state);
03603                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
03604                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
03605                     return -1;
03606                 p->htaps= htaps;
03607                 for(i= htaps/2; i; i--){
03608                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
03609                     sum += p->hcoeff[i];
03610                 }
03611                 p->hcoeff[0]= 32-sum;
03612             }
03613             s->plane[2].diag_mc= s->plane[1].diag_mc;
03614             s->plane[2].htaps  = s->plane[1].htaps;
03615             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
03616         }
03617         if(get_rac(&s->c, s->header_state)){
03618             s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
03619             decode_qlogs(s);
03620         }
03621     }
03622 
03623     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
03624     if(s->spatial_decomposition_type > 1){
03625         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
03626         return -1;
03627     }
03628 
03629     s->qlog           += get_symbol(&s->c, s->header_state, 1);
03630     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
03631     s->qbias          += get_symbol(&s->c, s->header_state, 1);
03632     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
03633     if(s->block_max_depth > 1 || s->block_max_depth < 0){
03634         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
03635         s->block_max_depth= 0;
03636         return -1;
03637     }
03638 
03639     return 0;
03640 }
03641 
03642 static void init_qexp(void){
03643     int i;
03644     double v=128;
03645 
03646     for(i=0; i<QROOT; i++){
03647         qexp[i]= lrintf(v);
03648         v *= pow(2, 1.0 / QROOT);
03649     }
03650 }
03651 
03652 static av_cold int common_init(AVCodecContext *avctx){
03653     SnowContext *s = avctx->priv_data;
03654     int width, height;
03655     int i, j;
03656 
03657     s->avctx= avctx;
03658     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
03659 
03660     dsputil_init(&s->dsp, avctx);
03661 
03662 #define mcf(dx,dy)\
03663     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
03664     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
03665         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
03666     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
03667     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
03668         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
03669 
03670     mcf( 0, 0)
03671     mcf( 4, 0)
03672     mcf( 8, 0)
03673     mcf(12, 0)
03674     mcf( 0, 4)
03675     mcf( 4, 4)
03676     mcf( 8, 4)
03677     mcf(12, 4)
03678     mcf( 0, 8)
03679     mcf( 4, 8)
03680     mcf( 8, 8)
03681     mcf(12, 8)
03682     mcf( 0,12)
03683     mcf( 4,12)
03684     mcf( 8,12)
03685     mcf(12,12)
03686 
03687 #define mcfh(dx,dy)\
03688     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
03689     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
03690         mc_block_hpel ## dx ## dy ## 16;\
03691     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
03692     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
03693         mc_block_hpel ## dx ## dy ## 8;
03694 
03695     mcfh(0, 0)
03696     mcfh(8, 0)
03697     mcfh(0, 8)
03698     mcfh(8, 8)
03699 
03700     if(!qexp[0])
03701         init_qexp();
03702 
03703 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
03704 
03705     width= s->avctx->width;
03706     height= s->avctx->height;
03707 
03708     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
03709     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
03710 
03711     for(i=0; i<MAX_REF_FRAMES; i++)
03712         for(j=0; j<MAX_REF_FRAMES; j++)
03713             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
03714 
03715     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
03716     s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
03717 
03718     return 0;
03719 }
03720 
03721 static int common_init_after_header(AVCodecContext *avctx){
03722     SnowContext *s = avctx->priv_data;
03723     int plane_index, level, orientation;
03724 
03725     for(plane_index=0; plane_index<3; plane_index++){
03726         int w= s->avctx->width;
03727         int h= s->avctx->height;
03728 
03729         if(plane_index){
03730             w>>= s->chroma_h_shift;
03731             h>>= s->chroma_v_shift;
03732         }
03733         s->plane[plane_index].width = w;
03734         s->plane[plane_index].height= h;
03735 
03736         for(level=s->spatial_decomposition_count-1; level>=0; level--){
03737             for(orientation=level ? 1 : 0; orientation<4; orientation++){
03738                 SubBand *b= &s->plane[plane_index].band[level][orientation];
03739 
03740                 b->buf= s->spatial_dwt_buffer;
03741                 b->level= level;
03742                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
03743                 b->width = (w + !(orientation&1))>>1;
03744                 b->height= (h + !(orientation>1))>>1;
03745 
03746                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
03747                 b->buf_x_offset = 0;
03748                 b->buf_y_offset = 0;
03749 
03750                 if(orientation&1){
03751                     b->buf += (w+1)>>1;
03752                     b->buf_x_offset = (w+1)>>1;
03753                 }
03754                 if(orientation>1){
03755                     b->buf += b->stride>>1;
03756                     b->buf_y_offset = b->stride_line >> 1;
03757                 }
03758                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
03759 
03760                 if(level)
03761                     b->parent= &s->plane[plane_index].band[level-1][orientation];
03762                 //FIXME avoid this realloc
03763                 av_freep(&b->x_coeff);
03764                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
03765             }
03766             w= (w+1)>>1;
03767             h= (h+1)>>1;
03768         }
03769     }
03770 
03771     return 0;
03772 }
03773 
03774 static int qscale2qlog(int qscale){
03775     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
03776            + 61*QROOT/8; //<64 >60
03777 }
03778 
03779 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
03780 {
03781     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
03782      * FIXME we know exact mv bits at this point,
03783      * but ratecontrol isn't set up to include them. */
03784     uint32_t coef_sum= 0;
03785     int level, orientation, delta_qlog;
03786 
03787     for(level=0; level<s->spatial_decomposition_count; level++){
03788         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03789             SubBand *b= &s->plane[0].band[level][orientation];
03790             IDWTELEM *buf= b->ibuf;
03791             const int w= b->width;
03792             const int h= b->height;
03793             const int stride= b->stride;
03794             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
03795             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03796             const int qdiv= (1<<16)/qmul;
03797             int x, y;
03798             //FIXME this is ugly
03799             for(y=0; y<h; y++)
03800                 for(x=0; x<w; x++)
03801                     buf[x+y*stride]= b->buf[x+y*stride];
03802             if(orientation==0)
03803                 decorrelate(s, b, buf, stride, 1, 0);
03804             for(y=0; y<h; y++)
03805                 for(x=0; x<w; x++)
03806                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
03807         }
03808     }
03809 
03810     /* ugly, ratecontrol just takes a sqrt again */
03811     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
03812     assert(coef_sum < INT_MAX);
03813 
03814     if(pict->pict_type == FF_I_TYPE){
03815         s->m.current_picture.mb_var_sum= coef_sum;
03816         s->m.current_picture.mc_mb_var_sum= 0;
03817     }else{
03818         s->m.current_picture.mc_mb_var_sum= coef_sum;
03819         s->m.current_picture.mb_var_sum= 0;
03820     }
03821 
03822     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
03823     if (pict->quality < 0)
03824         return INT_MIN;
03825     s->lambda= pict->quality * 3/2;
03826     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
03827     s->qlog+= delta_qlog;
03828     return delta_qlog;
03829 }
03830 
03831 static void calculate_visual_weight(SnowContext *s, Plane *p){
03832     int width = p->width;
03833     int height= p->height;
03834     int level, orientation, x, y;
03835 
03836     for(level=0; level<s->spatial_decomposition_count; level++){
03837         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03838             SubBand *b= &p->band[level][orientation];
03839             IDWTELEM *ibuf= b->ibuf;
03840             int64_t error=0;
03841 
03842             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
03843             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
03844             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
03845             for(y=0; y<height; y++){
03846                 for(x=0; x<width; x++){
03847                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
03848                     error += d*d;
03849                 }
03850             }
03851 
03852             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
03853         }
03854     }
03855 }
03856 
03857 #define QUANTIZE2 0
03858 
03859 #if QUANTIZE2==1
03860 #define Q2_STEP 8
03861 
03862 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
03863     SubBand *b= &p->band[level][orientation];
03864     int x, y;
03865     int xo=0;
03866     int yo=0;
03867     int step= 1 << (s->spatial_decomposition_count - level);
03868 
03869     if(orientation&1)
03870         xo= step>>1;
03871     if(orientation&2)
03872         yo= step>>1;
03873 
03874     //FIXME bias for nonzero ?
03875     //FIXME optimize
03876     memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
03877     for(y=0; y<p->height; y++){
03878         for(x=0; x<p->width; x++){
03879             int sx= (x-xo + step/2) / step / Q2_STEP;
03880             int sy= (y-yo + step/2) / step / Q2_STEP;
03881             int v= r0[x + y*p->width] - r1[x + y*p->width];
03882             assert(sx>=0 && sy>=0 && sx < score_stride);
03883             v= ((v+8)>>4)<<4;
03884             score[sx + sy*score_stride] += v*v;
03885             assert(score[sx + sy*score_stride] >= 0);
03886         }
03887     }
03888 }
03889 
03890 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
03891     int level, orientation;
03892 
03893     for(level=0; level<s->spatial_decomposition_count; level++){
03894         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03895             SubBand *b= &p->band[level][orientation];
03896             IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
03897 
03898             dequantize(s, b, dst, b->stride);
03899         }
03900     }
03901 }
03902 
03903 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
03904     int level, orientation, ys, xs, x, y, pass;
03905     IDWTELEM best_dequant[height * stride];
03906     IDWTELEM idwt2_buffer[height * stride];
03907     const int score_stride= (width + 10)/Q2_STEP;
03908     int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
03909     int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
03910     int threshold= (s->m.lambda * s->m.lambda) >> 6;
03911 
03912     //FIXME pass the copy cleanly ?
03913 
03914 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
03915     ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
03916 
03917     for(level=0; level<s->spatial_decomposition_count; level++){
03918         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03919             SubBand *b= &p->band[level][orientation];
03920             IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03921              DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
03922             assert(src == b->buf); // code does not depend on this but it is true currently
03923 
03924             quantize(s, b, dst, src, b->stride, s->qbias);
03925         }
03926     }
03927     for(pass=0; pass<1; pass++){
03928         if(s->qbias == 0) //keyframe
03929             continue;
03930         for(level=0; level<s->spatial_decomposition_count; level++){
03931             for(orientation=level ? 1 : 0; orientation<4; orientation++){
03932                 SubBand *b= &p->band[level][orientation];
03933                 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
03934                 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
03935 
03936                 for(ys= 0; ys<Q2_STEP; ys++){
03937                     for(xs= 0; xs<Q2_STEP; xs++){
03938                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03939                         dequantize_all(s, p, idwt2_buffer, width, height);
03940                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03941                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03942                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
03943                         for(y=ys; y<b->height; y+= Q2_STEP){
03944                             for(x=xs; x<b->width; x+= Q2_STEP){
03945                                 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
03946                                 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
03947                                 //FIXME try more than just --
03948                             }
03949                         }
03950                         dequantize_all(s, p, idwt2_buffer, width, height);
03951                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
03952                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
03953                         for(y=ys; y<b->height; y+= Q2_STEP){
03954                             for(x=xs; x<b->width; x+= Q2_STEP){
03955                                 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
03956                                 if(score[score_idx] <= best_score[score_idx] + threshold){
03957                                     best_score[score_idx]= score[score_idx];
03958                                     if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
03959                                     if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
03960                                     //FIXME copy instead
03961                                 }
03962                             }
03963                         }
03964                     }
03965                 }
03966             }
03967         }
03968     }
03969     memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
03970 }
03971 
03972 #endif /* QUANTIZE2==1 */
03973 
03974 static av_cold int encode_init(AVCodecContext *avctx)
03975 {
03976     SnowContext *s = avctx->priv_data;
03977     int plane_index;
03978 
03979     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
03980         av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
03981                "Use vstrict=-2 / -strict -2 to use it anyway.\n");
03982         return -1;
03983     }
03984 
03985     if(avctx->prediction_method == DWT_97
03986        && (avctx->flags & CODEC_FLAG_QSCALE)
03987        && avctx->global_quality == 0){
03988         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
03989         return -1;
03990     }
03991 
03992     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
03993 
03994     s->chroma_h_shift= 1; //FIXME XXX
03995     s->chroma_v_shift= 1;
03996 
03997     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
03998     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
03999 
04000     for(plane_index=0; plane_index<3; plane_index++){
04001         s->plane[plane_index].diag_mc= 1;
04002         s->plane[plane_index].htaps= 6;
04003         s->plane[plane_index].hcoeff[0]=  40;
04004         s->plane[plane_index].hcoeff[1]= -10;
04005         s->plane[plane_index].hcoeff[2]=   2;
04006         s->plane[plane_index].fast_mc= 1;
04007     }
04008 
04009     common_init(avctx);
04010     alloc_blocks(s);
04011 
04012     s->version=0;
04013 
04014     s->m.avctx   = avctx;
04015     s->m.flags   = avctx->flags;
04016     s->m.bit_rate= avctx->bit_rate;
04017 
04018     s->m.me.temp      =
04019     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
04020     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04021     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
04022     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
04023     h263_encode_init(&s->m); //mv_penalty
04024 
04025     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
04026 
04027     if(avctx->flags&CODEC_FLAG_PASS1){
04028         if(!avctx->stats_out)
04029             avctx->stats_out = av_mallocz(256);
04030     }
04031     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
04032         if(ff_rate_control_init(&s->m) < 0)
04033             return -1;
04034     }
04035     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
04036 
04037     avctx->coded_frame= &s->current_picture;
04038     switch(avctx->pix_fmt){
04039 //    case PIX_FMT_YUV444P:
04040 //    case PIX_FMT_YUV422P:
04041     case PIX_FMT_YUV420P:
04042     case PIX_FMT_GRAY8:
04043 //    case PIX_FMT_YUV411P:
04044 //    case PIX_FMT_YUV410P:
04045         s->colorspace_type= 0;
04046         break;
04047 /*    case PIX_FMT_RGB32:
04048         s->colorspace= 1;
04049         break;*/
04050     default:
04051         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
04052         return -1;
04053     }
04054 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
04055     s->chroma_h_shift= 1;
04056     s->chroma_v_shift= 1;
04057 
04058     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
04059     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
04060 
04061     s->avctx->get_buffer(s->avctx, &s->input_picture);
04062 
04063     if(s->avctx->me_method == ME_ITER){
04064         int i;
04065         int size= s->b_width * s->b_height << 2*s->block_max_depth;
04066         for(i=0; i<s->max_ref_frames; i++){
04067             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
04068             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
04069         }
04070     }
04071 
04072     return 0;
04073 }
04074 
04075 #define USE_HALFPEL_PLANE 0
04076 
04077 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
04078     int p,x,y;
04079 
04080     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
04081 
04082     for(p=0; p<3; p++){
04083         int is_chroma= !!p;
04084         int w= s->avctx->width  >>is_chroma;
04085         int h= s->avctx->height >>is_chroma;
04086         int ls= frame->linesize[p];
04087         uint8_t *src= frame->data[p];
04088 
04089         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04090         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04091         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
04092 
04093         halfpel[0][p]= src;
04094         for(y=0; y<h; y++){
04095             for(x=0; x<w; x++){
04096                 int i= y*ls + x;
04097 
04098                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
04099             }
04100         }
04101         for(y=0; y<h; y++){
04102             for(x=0; x<w; x++){
04103                 int i= y*ls + x;
04104 
04105                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04106             }
04107         }
04108         src= halfpel[1][p];
04109         for(y=0; y<h; y++){
04110             for(x=0; x<w; x++){
04111                 int i= y*ls + x;
04112 
04113                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
04114             }
04115         }
04116 
04117 //FIXME border!
04118     }
04119 }
04120 
04121 static int frame_start(SnowContext *s){
04122    AVFrame tmp;
04123    int w= s->avctx->width; //FIXME round up to x16 ?
04124    int h= s->avctx->height;
04125 
04126     if(s->current_picture.data[0]){
04127         s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
04128         s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
04129         s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
04130     }
04131 
04132     tmp= s->last_picture[s->max_ref_frames-1];
04133     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
04134     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
04135     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
04136         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
04137     s->last_picture[0]= s->current_picture;
04138     s->current_picture= tmp;
04139 
04140     if(s->keyframe){
04141         s->ref_frames= 0;
04142     }else{
04143         int i;
04144         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
04145             if(i && s->last_picture[i-1].key_frame)
04146                 break;
04147         s->ref_frames= i;
04148     }
04149 
04150     s->current_picture.reference= 1;
04151     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
04152         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
04153         return -1;
04154     }
04155 
04156     s->current_picture.key_frame= s->keyframe;
04157 
04158     return 0;
04159 }
04160 
04161 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
04162     SnowContext *s = avctx->priv_data;
04163     RangeCoder * const c= &s->c;
04164     AVFrame *pict = data;
04165     const int width= s->avctx->width;
04166     const int height= s->avctx->height;
04167     int level, orientation, plane_index, i, y;
04168     uint8_t rc_header_bak[sizeof(s->header_state)];
04169     uint8_t rc_block_bak[sizeof(s->block_state)];
04170 
04171     ff_init_range_encoder(c, buf, buf_size);
04172     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04173 
04174     for(i=0; i<3; i++){
04175         int shift= !!i;
04176         for(y=0; y<(height>>shift); y++)
04177             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
04178                    &pict->data[i][y * pict->linesize[i]],
04179                    width>>shift);
04180     }
04181     s->new_picture = *pict;
04182 
04183     s->m.picture_number= avctx->frame_number;
04184     if(avctx->flags&CODEC_FLAG_PASS2){
04185         s->m.pict_type =
04186         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
04187         s->keyframe= pict->pict_type==FF_I_TYPE;
04188         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
04189             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
04190             if (pict->quality < 0)
04191                 return -1;
04192         }
04193     }else{
04194         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
04195         s->m.pict_type=
04196         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
04197     }
04198 
04199     if(s->pass1_rc && avctx->frame_number == 0)
04200         pict->quality= 2*FF_QP2LAMBDA;
04201     if(pict->quality){
04202         s->qlog= qscale2qlog(pict->quality);
04203         s->lambda = pict->quality * 3/2;
04204     }
04205     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
04206         s->qlog= LOSSLESS_QLOG;
04207         s->lambda = 0;
04208     }//else keep previous frame's qlog until after motion estimation
04209 
04210     frame_start(s);
04211 
04212     s->m.current_picture_ptr= &s->m.current_picture;
04213     if(pict->pict_type == FF_P_TYPE){
04214         int block_width = (width +15)>>4;
04215         int block_height= (height+15)>>4;
04216         int stride= s->current_picture.linesize[0];
04217 
04218         assert(s->current_picture.data[0]);
04219         assert(s->last_picture[0].data[0]);
04220 
04221         s->m.avctx= s->avctx;
04222         s->m.current_picture.data[0]= s->current_picture.data[0];
04223         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
04224         s->m.    new_picture.data[0]= s->  input_picture.data[0];
04225         s->m.   last_picture_ptr= &s->m.   last_picture;
04226         s->m.linesize=
04227         s->m.   last_picture.linesize[0]=
04228         s->m.    new_picture.linesize[0]=
04229         s->m.current_picture.linesize[0]= stride;
04230         s->m.uvlinesize= s->current_picture.linesize[1];
04231         s->m.width = width;
04232         s->m.height= height;
04233         s->m.mb_width = block_width;
04234         s->m.mb_height= block_height;
04235         s->m.mb_stride=   s->m.mb_width+1;
04236         s->m.b8_stride= 2*s->m.mb_width+1;
04237         s->m.f_code=1;
04238         s->m.pict_type= pict->pict_type;
04239         s->m.me_method= s->avctx->me_method;
04240         s->m.me.scene_change_score=0;
04241         s->m.flags= s->avctx->flags;
04242         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
04243         s->m.out_format= FMT_H263;
04244         s->m.unrestricted_mv= 1;
04245 
04246         s->m.lambda = s->lambda;
04247         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
04248         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
04249 
04250         s->m.dsp= s->dsp; //move
04251         ff_init_me(&s->m);
04252         s->dsp= s->m.dsp;
04253     }
04254 
04255     if(s->pass1_rc){
04256         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
04257         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
04258     }
04259 
04260 redo_frame:
04261 
04262     if(pict->pict_type == FF_I_TYPE)
04263         s->spatial_decomposition_count= 5;
04264     else
04265         s->spatial_decomposition_count= 5;
04266 
04267     s->m.pict_type = pict->pict_type;
04268     s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
04269 
04270     common_init_after_header(avctx);
04271 
04272     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
04273         for(plane_index=0; plane_index<3; plane_index++){
04274             calculate_visual_weight(s, &s->plane[plane_index]);
04275         }
04276     }
04277 
04278     encode_header(s);
04279     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04280     encode_blocks(s, 1);
04281     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
04282 
04283     for(plane_index=0; plane_index<3; plane_index++){
04284         Plane *p= &s->plane[plane_index];
04285         int w= p->width;
04286         int h= p->height;
04287         int x, y;
04288 //        int bits= put_bits_count(&s->c.pb);
04289 
04290         if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
04291             //FIXME optimize
04292             if(pict->data[plane_index]) //FIXME gray hack
04293                 for(y=0; y<h; y++){
04294                     for(x=0; x<w; x++){
04295                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
04296                     }
04297                 }
04298             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
04299 
04300             if(   plane_index==0
04301                && pict->pict_type == FF_P_TYPE
04302                && !(avctx->flags&CODEC_FLAG_PASS2)
04303                && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
04304                 ff_init_range_encoder(c, buf, buf_size);
04305                 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04306                 pict->pict_type= FF_I_TYPE;
04307                 s->keyframe=1;
04308                 s->current_picture.key_frame=1;
04309                 goto redo_frame;
04310             }
04311 
04312             if(s->qlog == LOSSLESS_QLOG){
04313                 for(y=0; y<h; y++){
04314                     for(x=0; x<w; x++){
04315                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
04316                     }
04317                 }
04318             }else{
04319                 for(y=0; y<h; y++){
04320                     for(x=0; x<w; x++){
04321                         s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
04322                     }
04323                 }
04324             }
04325 
04326             /*  if(QUANTIZE2)
04327                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
04328             else*/
04329                 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04330 
04331             if(s->pass1_rc && plane_index==0){
04332                 int delta_qlog = ratecontrol_1pass(s, pict);
04333                 if (delta_qlog <= INT_MIN)
04334                     return -1;
04335                 if(delta_qlog){
04336                     //reordering qlog in the bitstream would eliminate this reset
04337                     ff_init_range_encoder(c, buf, buf_size);
04338                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
04339                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
04340                     encode_header(s);
04341                     encode_blocks(s, 0);
04342                 }
04343             }
04344 
04345             for(level=0; level<s->spatial_decomposition_count; level++){
04346                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04347                     SubBand *b= &p->band[level][orientation];
04348 
04349                     if(!QUANTIZE2)
04350                         quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
04351                     if(orientation==0)
04352                         decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
04353                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
04354                     assert(b->parent==NULL || b->parent->stride == b->stride*2);
04355                     if(orientation==0)
04356                         correlate(s, b, b->ibuf, b->stride, 1, 0);
04357                 }
04358             }
04359 
04360             for(level=0; level<s->spatial_decomposition_count; level++){
04361                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04362                     SubBand *b= &p->band[level][orientation];
04363 
04364                     dequantize(s, b, b->ibuf, b->stride);
04365                 }
04366             }
04367 
04368             ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
04369             if(s->qlog == LOSSLESS_QLOG){
04370                 for(y=0; y<h; y++){
04371                     for(x=0; x<w; x++){
04372                         s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
04373                     }
04374                 }
04375             }
04376             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04377         }else{
04378             //ME/MC only
04379             if(pict->pict_type == FF_I_TYPE){
04380                 for(y=0; y<h; y++){
04381                     for(x=0; x<w; x++){
04382                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
04383                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
04384                     }
04385                 }
04386             }else{
04387                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
04388                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04389             }
04390         }
04391         if(s->avctx->flags&CODEC_FLAG_PSNR){
04392             int64_t error= 0;
04393 
04394             if(pict->data[plane_index]) //FIXME gray hack
04395                 for(y=0; y<h; y++){
04396                     for(x=0; x<w; x++){
04397                         int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
04398                         error += d*d;
04399                     }
04400                 }
04401             s->avctx->error[plane_index] += error;
04402             s->current_picture.error[plane_index] = error;
04403         }
04404 
04405     }
04406 
04407     update_last_header_values(s);
04408 
04409     if(s->last_picture[s->max_ref_frames-1].data[0]){
04410         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04411         for(i=0; i<9; i++)
04412             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04413                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04414     }
04415 
04416     s->current_picture.coded_picture_number = avctx->frame_number;
04417     s->current_picture.pict_type = pict->pict_type;
04418     s->current_picture.quality = pict->quality;
04419     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
04420     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
04421     s->m.current_picture.display_picture_number =
04422     s->m.current_picture.coded_picture_number = avctx->frame_number;
04423     s->m.current_picture.quality = pict->quality;
04424     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
04425     if(s->pass1_rc)
04426         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
04427             return -1;
04428     if(avctx->flags&CODEC_FLAG_PASS1)
04429         ff_write_pass1_stats(&s->m);
04430     s->m.last_pict_type = s->m.pict_type;
04431     avctx->frame_bits = s->m.frame_bits;
04432     avctx->mv_bits = s->m.mv_bits;
04433     avctx->misc_bits = s->m.misc_bits;
04434     avctx->p_tex_bits = s->m.p_tex_bits;
04435 
04436     emms_c();
04437 
04438     return ff_rac_terminate(c);
04439 }
04440 
04441 static av_cold void common_end(SnowContext *s){
04442     int plane_index, level, orientation, i;
04443 
04444     av_freep(&s->spatial_dwt_buffer);
04445     av_freep(&s->spatial_idwt_buffer);
04446 
04447     s->m.me.temp= NULL;
04448     av_freep(&s->m.me.scratchpad);
04449     av_freep(&s->m.me.map);
04450     av_freep(&s->m.me.score_map);
04451     av_freep(&s->m.obmc_scratchpad);
04452 
04453     av_freep(&s->block);
04454     av_freep(&s->scratchbuf);
04455 
04456     for(i=0; i<MAX_REF_FRAMES; i++){
04457         av_freep(&s->ref_mvs[i]);
04458         av_freep(&s->ref_scores[i]);
04459         if(s->last_picture[i].data[0])
04460             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
04461     }
04462 
04463     for(plane_index=0; plane_index<3; plane_index++){
04464         for(level=s->spatial_decomposition_count-1; level>=0; level--){
04465             for(orientation=level ? 1 : 0; orientation<4; orientation++){
04466                 SubBand *b= &s->plane[plane_index].band[level][orientation];
04467 
04468                 av_freep(&b->x_coeff);
04469             }
04470         }
04471     }
04472 }
04473 
04474 static av_cold int encode_end(AVCodecContext *avctx)
04475 {
04476     SnowContext *s = avctx->priv_data;
04477 
04478     common_end(s);
04479     av_free(avctx->stats_out);
04480 
04481     return 0;
04482 }
04483 
04484 static av_cold int decode_init(AVCodecContext *avctx)
04485 {
04486     avctx->pix_fmt= PIX_FMT_YUV420P;
04487 
04488     common_init(avctx);
04489 
04490     return 0;
04491 }
04492 
04493 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
04494     SnowContext *s = avctx->priv_data;
04495     RangeCoder * const c= &s->c;
04496     int bytes_read;
04497     AVFrame *picture = data;
04498     int level, orientation, plane_index, i;
04499 
04500     ff_init_range_decoder(c, buf, buf_size);
04501     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
04502 
04503     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
04504     if(decode_header(s)<0)
04505         return -1;
04506     common_init_after_header(avctx);
04507 
04508     // realloc slice buffer for the case that spatial_decomposition_count changed
04509     slice_buffer_destroy(&s->sb);
04510     slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
04511 
04512     for(plane_index=0; plane_index<3; plane_index++){
04513         Plane *p= &s->plane[plane_index];
04514         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
04515                                               && p->hcoeff[1]==-10
04516                                               && p->hcoeff[2]==2;
04517     }
04518 
04519     alloc_blocks(s);
04520 
04521     frame_start(s);
04522     //keyframe flag duplication mess FIXME
04523     if(avctx->debug&FF_DEBUG_PICT_INFO)
04524         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
04525 
04526     decode_blocks(s);
04527 
04528     for(plane_index=0; plane_index<3; plane_index++){
04529         Plane *p= &s->plane[plane_index];
04530         int w= p->width;
04531         int h= p->height;
04532         int x, y;
04533         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
04534 
04535         if(s->avctx->debug&2048){
04536             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
04537             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
04538 
04539             for(y=0; y<h; y++){
04540                 for(x=0; x<w; x++){
04541                     int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
04542                     s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
04543                 }
04544             }
04545         }
04546 
04547         {
04548         for(level=0; level<s->spatial_decomposition_count; level++){
04549             for(orientation=level ? 1 : 0; orientation<4; orientation++){
04550                 SubBand *b= &p->band[level][orientation];
04551                 unpack_coeffs(s, b, b->parent, orientation);
04552             }
04553         }
04554         }
04555 
04556         {
04557         const int mb_h= s->b_height << s->block_max_depth;
04558         const int block_size = MB_SIZE >> s->block_max_depth;
04559         const int block_w    = plane_index ? block_size/2 : block_size;
04560         int mb_y;
04561         DWTCompose cs[MAX_DECOMPOSITIONS];
04562         int yd=0, yq=0;
04563         int y;
04564         int end_y;
04565 
04566         ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
04567         for(mb_y=0; mb_y<=mb_h; mb_y++){
04568 
04569             int slice_starty = block_w*mb_y;
04570             int slice_h = block_w*(mb_y+1);
04571             if (!(s->keyframe || s->avctx->debug&512)){
04572                 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
04573                 slice_h -= (block_w >> 1);
04574             }
04575 
04576             for(level=0; level<s->spatial_decomposition_count; level++){
04577                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
04578                     SubBand *b= &p->band[level][orientation];
04579                     int start_y;
04580                     int end_y;
04581                     int our_mb_start = mb_y;
04582                     int our_mb_end = (mb_y + 1);
04583                     const int extra= 3;
04584                     start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
04585                     end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
04586                     if (!(s->keyframe || s->avctx->debug&512)){
04587                         start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04588                         end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
04589                     }
04590                     start_y = FFMIN(b->height, start_y);
04591                     end_y = FFMIN(b->height, end_y);
04592 
04593                     if (start_y != end_y){
04594                         if (orientation == 0){
04595                             SubBand * correlate_band = &p->band[0][0];
04596                             int correlate_end_y = FFMIN(b->height, end_y + 1);
04597                             int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
04598                             decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
04599                             correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
04600                             dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
04601                         }
04602                         else
04603                             decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
04604                     }
04605                 }
04606             }
04607 
04608             for(; yd<slice_h; yd+=4){
04609                 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
04610             }
04611 
04612             if(s->qlog == LOSSLESS_QLOG){
04613                 for(; yq<slice_h && yq<h; yq++){
04614                     IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
04615                     for(x=0; x<w; x++){
04616                         line[x] <<= FRAC_BITS;
04617                     }
04618                 }
04619             }
04620 
04621             predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
04622 
04623             y = FFMIN(p->height, slice_starty);
04624             end_y = FFMIN(p->height, slice_h);
04625             while(y < end_y)
04626                 slice_buffer_release(&s->sb, y++);
04627         }
04628 
04629         slice_buffer_flush(&s->sb);
04630         }
04631 
04632     }
04633 
04634     emms_c();
04635 
04636     if(s->last_picture[s->max_ref_frames-1].data[0]){
04637         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
04638         for(i=0; i<9; i++)
04639             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
04640                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
04641     }
04642 
04643     if(!(s->avctx->debug&2048))
04644         *picture= s->current_picture;
04645     else
04646         *picture= s->mconly_picture;
04647 
04648     *data_size = sizeof(AVFrame);
04649 
04650     bytes_read= c->bytestream - c->bytestream_start;
04651     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
04652 
04653     return bytes_read;
04654 }
04655 
04656 static av_cold int decode_end(AVCodecContext *avctx)
04657 {
04658     SnowContext *s = avctx->priv_data;
04659 
04660     slice_buffer_destroy(&s->sb);
04661 
04662     common_end(s);
04663 
04664     return 0;
04665 }
04666 
04667 AVCodec snow_decoder = {
04668     "snow",
04669     CODEC_TYPE_VIDEO,
04670     CODEC_ID_SNOW,
04671     sizeof(SnowContext),
04672     decode_init,
04673     NULL,
04674     decode_end,
04675     decode_frame,
04676     0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
04677     NULL,
04678     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04679 };
04680 
04681 #if CONFIG_SNOW_ENCODER
04682 AVCodec snow_encoder = {
04683     "snow",
04684     CODEC_TYPE_VIDEO,
04685     CODEC_ID_SNOW,
04686     sizeof(SnowContext),
04687     encode_init,
04688     encode_frame,
04689     encode_end,
04690     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04691 };
04692 #endif
04693 
04694 
04695 #ifdef TEST
04696 #undef malloc
04697 #undef free
04698 #undef printf
04699 #undef random
04700 
04701 int main(void){
04702     int width=256;
04703     int height=256;
04704     int buffer[2][width*height];
04705     SnowContext s;
04706     int i;
04707     s.spatial_decomposition_count=6;
04708     s.spatial_decomposition_type=1;
04709 
04710     printf("testing 5/3 DWT\n");
04711     for(i=0; i<width*height; i++)
04712         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04713 
04714     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04715     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04716 
04717     for(i=0; i<width*height; i++)
04718         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04719 
04720     printf("testing 9/7 DWT\n");
04721     s.spatial_decomposition_type=0;
04722     for(i=0; i<width*height; i++)
04723         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
04724 
04725     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04726     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04727 
04728     for(i=0; i<width*height; i++)
04729         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
04730 
04731 #if 0
04732     printf("testing AC coder\n");
04733     memset(s.header_state, 0, sizeof(s.header_state));
04734     ff_init_range_encoder(&s.c, buffer[0], 256*256);
04735     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04736 
04737     for(i=-256; i<256; i++){
04738         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
04739     }
04740     ff_rac_terminate(&s.c);
04741 
04742     memset(s.header_state, 0, sizeof(s.header_state));
04743     ff_init_range_decoder(&s.c, buffer[0], 256*256);
04744     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04745 
04746     for(i=-256; i<256; i++){
04747         int j;
04748         j= get_symbol(&s.c, s.header_state, 1);
04749         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
04750     }
04751 #endif
04752     {
04753     int level, orientation, x, y;
04754     int64_t errors[8][4];
04755     int64_t g=0;
04756 
04757         memset(errors, 0, sizeof(errors));
04758         s.spatial_decomposition_count=3;
04759         s.spatial_decomposition_type=0;
04760         for(level=0; level<s.spatial_decomposition_count; level++){
04761             for(orientation=level ? 1 : 0; orientation<4; orientation++){
04762                 int w= width  >> (s.spatial_decomposition_count-level);
04763                 int h= height >> (s.spatial_decomposition_count-level);
04764                 int stride= width  << (s.spatial_decomposition_count-level);
04765                 DWTELEM *buf= buffer[0];
04766                 int64_t error=0;
04767 
04768                 if(orientation&1) buf+=w;
04769                 if(orientation>1) buf+=stride>>1;
04770 
04771                 memset(buffer[0], 0, sizeof(int)*width*height);
04772                 buf[w/2 + h/2*stride]= 256*256;
04773                 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04774                 for(y=0; y<height; y++){
04775                     for(x=0; x<width; x++){
04776                         int64_t d= buffer[0][x + y*width];
04777                         error += d*d;
04778                         if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
04779                     }
04780                     if(FFABS(height/2-y)<9 && level==2) printf("\n");
04781                 }
04782                 error= (int)(sqrt(error)+0.5);
04783                 errors[level][orientation]= error;
04784                 if(g) g=av_gcd(g, error);
04785                 else g= error;
04786             }
04787         }
04788         printf("static int const visual_weight[][4]={\n");
04789         for(level=0; level<s.spatial_decomposition_count; level++){
04790             printf("  {");
04791             for(orientation=0; orientation<4; orientation++){
04792                 printf("%8"PRId64",", errors[level][orientation]/g);
04793             }
04794             printf("},\n");
04795         }
04796         printf("};\n");
04797         {
04798             int level=2;
04799             int w= width  >> (s.spatial_decomposition_count-level);
04800             //int h= height >> (s.spatial_decomposition_count-level);
04801             int stride= width  << (s.spatial_decomposition_count-level);
04802             DWTELEM *buf= buffer[0];
04803             int64_t error=0;
04804 
04805             buf+=w;
04806             buf+=stride>>1;
04807 
04808             memset(buffer[0], 0, sizeof(int)*width*height);
04809 #if 1
04810             for(y=0; y<height; y++){
04811                 for(x=0; x<width; x++){
04812                     int tab[4]={0,2,3,1};
04813                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
04814                 }
04815             }
04816             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04817 #else
04818             for(y=0; y<h; y++){
04819                 for(x=0; x<w; x++){
04820                     buf[x + y*stride  ]=169;
04821                     buf[x + y*stride-w]=64;
04822                 }
04823             }
04824             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04825 #endif
04826             for(y=0; y<height; y++){
04827                 for(x=0; x<width; x++){
04828                     int64_t d= buffer[0][x + y*width];
04829                     error += d*d;
04830                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
04831                 }
04832                 if(FFABS(height/2-y)<9) printf("\n");
04833             }
04834         }
04835 
04836     }
04837     return 0;
04838 }
04839 #endif /* TEST */

Generated on Tue Nov 4 2014 12:59:23 for ffmpeg by  doxygen 1.7.1