Libav 0.7.1
libavcodec/vp8.c
Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "vp8.h"
00028 #include "vp8data.h"
00029 #include "rectangle.h"
00030 #include "thread.h"
00031 
00032 #if ARCH_ARM
00033 #   include "arm/vp8.h"
00034 #endif
00035 
00036 static void vp8_decode_flush(AVCodecContext *avctx)
00037 {
00038     VP8Context *s = avctx->priv_data;
00039     int i;
00040 
00041     if (!avctx->is_copy) {
00042         for (i = 0; i < 5; i++)
00043             if (s->frames[i].data[0])
00044                 ff_thread_release_buffer(avctx, &s->frames[i]);
00045     }
00046     memset(s->framep, 0, sizeof(s->framep));
00047 
00048     av_freep(&s->macroblocks_base);
00049     av_freep(&s->filter_strength);
00050     av_freep(&s->intra4x4_pred_mode_top);
00051     av_freep(&s->top_nnz);
00052     av_freep(&s->edge_emu_buffer);
00053     av_freep(&s->top_border);
00054     av_freep(&s->segmentation_map);
00055 
00056     s->macroblocks        = NULL;
00057 }
00058 
00059 static int update_dimensions(VP8Context *s, int width, int height)
00060 {
00061     if (width  != s->avctx->width ||
00062         height != s->avctx->height) {
00063         if (av_image_check_size(width, height, 0, s->avctx))
00064             return AVERROR_INVALIDDATA;
00065 
00066         vp8_decode_flush(s->avctx);
00067 
00068         avcodec_set_dimensions(s->avctx, width, height);
00069     }
00070 
00071     s->mb_width  = (s->avctx->coded_width +15) / 16;
00072     s->mb_height = (s->avctx->coded_height+15) / 16;
00073 
00074     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00075     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00076     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00077     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00078     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00079     s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
00080 
00081     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00082         !s->top_nnz || !s->top_border || !s->segmentation_map)
00083         return AVERROR(ENOMEM);
00084 
00085     s->macroblocks        = s->macroblocks_base + 1;
00086 
00087     return 0;
00088 }
00089 
00090 static void parse_segment_info(VP8Context *s)
00091 {
00092     VP56RangeCoder *c = &s->c;
00093     int i;
00094 
00095     s->segmentation.update_map = vp8_rac_get(c);
00096 
00097     if (vp8_rac_get(c)) { // update segment feature data
00098         s->segmentation.absolute_vals = vp8_rac_get(c);
00099 
00100         for (i = 0; i < 4; i++)
00101             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00102 
00103         for (i = 0; i < 4; i++)
00104             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00105     }
00106     if (s->segmentation.update_map)
00107         for (i = 0; i < 3; i++)
00108             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00109 }
00110 
00111 static void update_lf_deltas(VP8Context *s)
00112 {
00113     VP56RangeCoder *c = &s->c;
00114     int i;
00115 
00116     for (i = 0; i < 4; i++)
00117         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00118 
00119     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00120         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00121 }
00122 
00123 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00124 {
00125     const uint8_t *sizes = buf;
00126     int i;
00127 
00128     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00129 
00130     buf      += 3*(s->num_coeff_partitions-1);
00131     buf_size -= 3*(s->num_coeff_partitions-1);
00132     if (buf_size < 0)
00133         return -1;
00134 
00135     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00136         int size = AV_RL24(sizes + 3*i);
00137         if (buf_size - size < 0)
00138             return -1;
00139 
00140         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00141         buf      += size;
00142         buf_size -= size;
00143     }
00144     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00145 
00146     return 0;
00147 }
00148 
00149 static void get_quants(VP8Context *s)
00150 {
00151     VP56RangeCoder *c = &s->c;
00152     int i, base_qi;
00153 
00154     int yac_qi     = vp8_rac_get_uint(c, 7);
00155     int ydc_delta  = vp8_rac_get_sint(c, 4);
00156     int y2dc_delta = vp8_rac_get_sint(c, 4);
00157     int y2ac_delta = vp8_rac_get_sint(c, 4);
00158     int uvdc_delta = vp8_rac_get_sint(c, 4);
00159     int uvac_delta = vp8_rac_get_sint(c, 4);
00160 
00161     for (i = 0; i < 4; i++) {
00162         if (s->segmentation.enabled) {
00163             base_qi = s->segmentation.base_quant[i];
00164             if (!s->segmentation.absolute_vals)
00165                 base_qi += yac_qi;
00166         } else
00167             base_qi = yac_qi;
00168 
00169         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00170         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00171         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00172         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00173         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00174         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00175 
00176         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00177         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00178     }
00179 }
00180 
00194 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00195 {
00196     VP56RangeCoder *c = &s->c;
00197 
00198     if (update)
00199         return VP56_FRAME_CURRENT;
00200 
00201     switch (vp8_rac_get_uint(c, 2)) {
00202     case 1:
00203         return VP56_FRAME_PREVIOUS;
00204     case 2:
00205         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00206     }
00207     return VP56_FRAME_NONE;
00208 }
00209 
00210 static void update_refs(VP8Context *s)
00211 {
00212     VP56RangeCoder *c = &s->c;
00213 
00214     int update_golden = vp8_rac_get(c);
00215     int update_altref = vp8_rac_get(c);
00216 
00217     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00218     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00219 }
00220 
00221 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00222 {
00223     VP56RangeCoder *c = &s->c;
00224     int header_size, hscale, vscale, i, j, k, l, m, ret;
00225     int width  = s->avctx->width;
00226     int height = s->avctx->height;
00227 
00228     s->keyframe  = !(buf[0] & 1);
00229     s->profile   =  (buf[0]>>1) & 7;
00230     s->invisible = !(buf[0] & 0x10);
00231     header_size  = AV_RL24(buf) >> 5;
00232     buf      += 3;
00233     buf_size -= 3;
00234 
00235     if (s->profile > 3)
00236         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00237 
00238     if (!s->profile)
00239         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00240     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00241         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00242 
00243     if (header_size > buf_size - 7*s->keyframe) {
00244         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00245         return AVERROR_INVALIDDATA;
00246     }
00247 
00248     if (s->keyframe) {
00249         if (AV_RL24(buf) != 0x2a019d) {
00250             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00251             return AVERROR_INVALIDDATA;
00252         }
00253         width  = AV_RL16(buf+3) & 0x3fff;
00254         height = AV_RL16(buf+5) & 0x3fff;
00255         hscale = buf[4] >> 6;
00256         vscale = buf[6] >> 6;
00257         buf      += 7;
00258         buf_size -= 7;
00259 
00260         if (hscale || vscale)
00261             av_log_missing_feature(s->avctx, "Upscaling", 1);
00262 
00263         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00264         for (i = 0; i < 4; i++)
00265             for (j = 0; j < 16; j++)
00266                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00267                        sizeof(s->prob->token[i][j]));
00268         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00269         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00270         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00271         memset(&s->segmentation, 0, sizeof(s->segmentation));
00272     }
00273 
00274     if (!s->macroblocks_base || /* first frame */
00275         width != s->avctx->width || height != s->avctx->height) {
00276         if ((ret = update_dimensions(s, width, height) < 0))
00277             return ret;
00278     }
00279 
00280     ff_vp56_init_range_decoder(c, buf, header_size);
00281     buf      += header_size;
00282     buf_size -= header_size;
00283 
00284     if (s->keyframe) {
00285         if (vp8_rac_get(c))
00286             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00287         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00288     }
00289 
00290     if ((s->segmentation.enabled = vp8_rac_get(c)))
00291         parse_segment_info(s);
00292     else
00293         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00294 
00295     s->filter.simple    = vp8_rac_get(c);
00296     s->filter.level     = vp8_rac_get_uint(c, 6);
00297     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00298 
00299     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00300         if (vp8_rac_get(c))
00301             update_lf_deltas(s);
00302 
00303     if (setup_partitions(s, buf, buf_size)) {
00304         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00305         return AVERROR_INVALIDDATA;
00306     }
00307 
00308     get_quants(s);
00309 
00310     if (!s->keyframe) {
00311         update_refs(s);
00312         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00313         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00314     }
00315 
00316     // if we aren't saving this frame's probabilities for future frames,
00317     // make a copy of the current probabilities
00318     if (!(s->update_probabilities = vp8_rac_get(c)))
00319         s->prob[1] = s->prob[0];
00320 
00321     s->update_last = s->keyframe || vp8_rac_get(c);
00322 
00323     for (i = 0; i < 4; i++)
00324         for (j = 0; j < 8; j++)
00325             for (k = 0; k < 3; k++)
00326                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00327                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00328                         int prob = vp8_rac_get_uint(c, 8);
00329                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00330                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00331                     }
00332 
00333     if ((s->mbskip_enabled = vp8_rac_get(c)))
00334         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00335 
00336     if (!s->keyframe) {
00337         s->prob->intra  = vp8_rac_get_uint(c, 8);
00338         s->prob->last   = vp8_rac_get_uint(c, 8);
00339         s->prob->golden = vp8_rac_get_uint(c, 8);
00340 
00341         if (vp8_rac_get(c))
00342             for (i = 0; i < 4; i++)
00343                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00344         if (vp8_rac_get(c))
00345             for (i = 0; i < 3; i++)
00346                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00347 
00348         // 17.2 MV probability update
00349         for (i = 0; i < 2; i++)
00350             for (j = 0; j < 19; j++)
00351                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00352                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00353     }
00354 
00355     return 0;
00356 }
00357 
00358 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00359 {
00360     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00361     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00362 }
00363 
00367 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00368 {
00369     int bit, x = 0;
00370 
00371     if (vp56_rac_get_prob_branchy(c, p[0])) {
00372         int i;
00373 
00374         for (i = 0; i < 3; i++)
00375             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00376         for (i = 9; i > 3; i--)
00377             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00378         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00379             x += 8;
00380     } else {
00381         // small_mvtree
00382         const uint8_t *ps = p+2;
00383         bit = vp56_rac_get_prob(c, *ps);
00384         ps += 1 + 3*bit;
00385         x  += 4*bit;
00386         bit = vp56_rac_get_prob(c, *ps);
00387         ps += 1 + bit;
00388         x  += 2*bit;
00389         x  += vp56_rac_get_prob(c, *ps);
00390     }
00391 
00392     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00393 }
00394 
00395 static av_always_inline
00396 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00397 {
00398     if (left == top)
00399         return vp8_submv_prob[4-!!left];
00400     if (!top)
00401         return vp8_submv_prob[2];
00402     return vp8_submv_prob[1-!!left];
00403 }
00404 
00409 static av_always_inline
00410 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00411 {
00412     int part_idx;
00413     int n, num;
00414     VP8Macroblock *top_mb  = &mb[2];
00415     VP8Macroblock *left_mb = &mb[-1];
00416     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00417                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00418                   *mbsplits_cur, *firstidx;
00419     VP56mv *top_mv  = top_mb->bmv;
00420     VP56mv *left_mv = left_mb->bmv;
00421     VP56mv *cur_mv  = mb->bmv;
00422 
00423     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00424         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00425             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00426         } else {
00427             part_idx = VP8_SPLITMVMODE_8x8;
00428         }
00429     } else {
00430         part_idx = VP8_SPLITMVMODE_4x4;
00431     }
00432 
00433     num = vp8_mbsplit_count[part_idx];
00434     mbsplits_cur = vp8_mbsplits[part_idx],
00435     firstidx = vp8_mbfirstidx[part_idx];
00436     mb->partitioning = part_idx;
00437 
00438     for (n = 0; n < num; n++) {
00439         int k = firstidx[n];
00440         uint32_t left, above;
00441         const uint8_t *submv_prob;
00442 
00443         if (!(k & 3))
00444             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00445         else
00446             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00447         if (k <= 3)
00448             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00449         else
00450             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00451 
00452         submv_prob = get_submv_prob(left, above);
00453 
00454         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00455             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00456                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00457                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00458                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00459                 } else {
00460                     AV_ZERO32(&mb->bmv[n]);
00461                 }
00462             } else {
00463                 AV_WN32A(&mb->bmv[n], above);
00464             }
00465         } else {
00466             AV_WN32A(&mb->bmv[n], left);
00467         }
00468     }
00469 
00470     return num;
00471 }
00472 
00473 static av_always_inline
00474 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00475 {
00476     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00477                                   mb - 1 /* left */,
00478                                   mb + 1 /* top-left */ };
00479     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00480     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00481     int idx = CNT_ZERO;
00482     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00483     int8_t *sign_bias = s->sign_bias;
00484     VP56mv near_mv[4];
00485     uint8_t cnt[4] = { 0 };
00486     VP56RangeCoder *c = &s->c;
00487 
00488     AV_ZERO32(&near_mv[0]);
00489     AV_ZERO32(&near_mv[1]);
00490 
00491     /* Process MB on top, left and top-left */
00492     #define MV_EDGE_CHECK(n)\
00493     {\
00494         VP8Macroblock *edge = mb_edge[n];\
00495         int edge_ref = edge->ref_frame;\
00496         if (edge_ref != VP56_FRAME_CURRENT) {\
00497             uint32_t mv = AV_RN32A(&edge->mv);\
00498             if (mv) {\
00499                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00500                     /* SWAR negate of the values in mv. */\
00501                     mv = ~mv;\
00502                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00503                 }\
00504                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00505                     AV_WN32A(&near_mv[++idx], mv);\
00506                 cnt[idx]      += 1 + (n != 2);\
00507             } else\
00508                 cnt[CNT_ZERO] += 1 + (n != 2);\
00509         }\
00510     }
00511 
00512     MV_EDGE_CHECK(0)
00513     MV_EDGE_CHECK(1)
00514     MV_EDGE_CHECK(2)
00515 
00516     mb->partitioning = VP8_SPLITMVMODE_NONE;
00517     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00518         mb->mode = VP8_MVMODE_MV;
00519 
00520         /* If we have three distinct MVs, merge first and last if they're the same */
00521         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00522             cnt[CNT_NEAREST] += 1;
00523 
00524         /* Swap near and nearest if necessary */
00525         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00526             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00527             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00528         }
00529 
00530         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00531             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00532 
00533                 /* Choose the best mv out of 0,0 and the nearest mv */
00534                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00535                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00536                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00537                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00538 
00539                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00540                     mb->mode = VP8_MVMODE_SPLIT;
00541                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00542                 } else {
00543                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00544                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00545                     mb->bmv[0] = mb->mv;
00546                 }
00547             } else {
00548                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00549                 mb->bmv[0] = mb->mv;
00550             }
00551         } else {
00552             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00553             mb->bmv[0] = mb->mv;
00554         }
00555     } else {
00556         mb->mode = VP8_MVMODE_ZERO;
00557         AV_ZERO32(&mb->mv);
00558         mb->bmv[0] = mb->mv;
00559     }
00560 }
00561 
00562 static av_always_inline
00563 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00564                            int mb_x, int keyframe)
00565 {
00566     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00567     if (keyframe) {
00568         int x, y;
00569         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00570         uint8_t* const left = s->intra4x4_pred_mode_left;
00571         for (y = 0; y < 4; y++) {
00572             for (x = 0; x < 4; x++) {
00573                 const uint8_t *ctx;
00574                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00575                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00576                 left[y] = top[x] = *intra4x4;
00577                 intra4x4++;
00578             }
00579         }
00580     } else {
00581         int i;
00582         for (i = 0; i < 16; i++)
00583             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00584     }
00585 }
00586 
00587 static av_always_inline
00588 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00589 {
00590     VP56RangeCoder *c = &s->c;
00591 
00592     if (s->segmentation.update_map)
00593         *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
00594     else
00595         *segment = ref ? *ref : *segment;
00596     s->segment = *segment;
00597 
00598     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00599 
00600     if (s->keyframe) {
00601         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00602 
00603         if (mb->mode == MODE_I4x4) {
00604             decode_intra4x4_modes(s, c, mb_x, 1);
00605         } else {
00606             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00607             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00608             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00609         }
00610 
00611         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00612         mb->ref_frame = VP56_FRAME_CURRENT;
00613     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00614         // inter MB, 16.2
00615         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00616             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00617                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00618         else
00619             mb->ref_frame = VP56_FRAME_PREVIOUS;
00620         s->ref_count[mb->ref_frame-1]++;
00621 
00622         // motion vectors, 16.3
00623         decode_mvs(s, mb, mb_x, mb_y);
00624     } else {
00625         // intra MB, 16.1
00626         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00627 
00628         if (mb->mode == MODE_I4x4)
00629             decode_intra4x4_modes(s, c, mb_x, 0);
00630 
00631         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00632         mb->ref_frame = VP56_FRAME_CURRENT;
00633         mb->partitioning = VP8_SPLITMVMODE_NONE;
00634         AV_ZERO32(&mb->bmv[0]);
00635     }
00636 }
00637 
00638 #ifndef decode_block_coeffs_internal
00639 
00648 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00649                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00650                                         int i, uint8_t *token_prob, int16_t qmul[2])
00651 {
00652     goto skip_eob;
00653     do {
00654         int coeff;
00655         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00656             return i;
00657 
00658 skip_eob:
00659         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00660             if (++i == 16)
00661                 return i; // invalid input; blocks should end with EOB
00662             token_prob = probs[i][0];
00663             goto skip_eob;
00664         }
00665 
00666         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00667             coeff = 1;
00668             token_prob = probs[i+1][1];
00669         } else {
00670             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00671                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00672                 if (coeff)
00673                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00674                 coeff += 2;
00675             } else {
00676                 // DCT_CAT*
00677                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00678                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00679                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00680                     } else {                                    // DCT_CAT2
00681                         coeff  = 7;
00682                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00683                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00684                     }
00685                 } else {    // DCT_CAT3 and up
00686                     int a = vp56_rac_get_prob(c, token_prob[8]);
00687                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00688                     int cat = (a<<1) + b;
00689                     coeff  = 3 + (8<<cat);
00690                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00691                 }
00692             }
00693             token_prob = probs[i+1][2];
00694         }
00695         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00696     } while (++i < 16);
00697 
00698     return i;
00699 }
00700 #endif
00701 
00713 static av_always_inline
00714 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00715                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00716                         int i, int zero_nhood, int16_t qmul[2])
00717 {
00718     uint8_t *token_prob = probs[i][zero_nhood];
00719     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00720         return 0;
00721     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00722 }
00723 
00724 static av_always_inline
00725 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00726                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00727 {
00728     int i, x, y, luma_start = 0, luma_ctx = 3;
00729     int nnz_pred, nnz, nnz_total = 0;
00730     int segment = s->segment;
00731     int block_dc = 0;
00732 
00733     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00734         nnz_pred = t_nnz[8] + l_nnz[8];
00735 
00736         // decode DC values and do hadamard
00737         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00738                                   s->qmat[segment].luma_dc_qmul);
00739         l_nnz[8] = t_nnz[8] = !!nnz;
00740         if (nnz) {
00741             nnz_total += nnz;
00742             block_dc = 1;
00743             if (nnz == 1)
00744                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00745             else
00746                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00747         }
00748         luma_start = 1;
00749         luma_ctx = 0;
00750     }
00751 
00752     // luma blocks
00753     for (y = 0; y < 4; y++)
00754         for (x = 0; x < 4; x++) {
00755             nnz_pred = l_nnz[y] + t_nnz[x];
00756             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00757                                       nnz_pred, s->qmat[segment].luma_qmul);
00758             // nnz+block_dc may be one more than the actual last index, but we don't care
00759             s->non_zero_count_cache[y][x] = nnz + block_dc;
00760             t_nnz[x] = l_nnz[y] = !!nnz;
00761             nnz_total += nnz;
00762         }
00763 
00764     // chroma blocks
00765     // TODO: what to do about dimensions? 2nd dim for luma is x,
00766     // but for chroma it's (y<<1)|x
00767     for (i = 4; i < 6; i++)
00768         for (y = 0; y < 2; y++)
00769             for (x = 0; x < 2; x++) {
00770                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00771                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00772                                           nnz_pred, s->qmat[segment].chroma_qmul);
00773                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00774                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00775                 nnz_total += nnz;
00776             }
00777 
00778     // if there were no coded coeffs despite the macroblock not being marked skip,
00779     // we MUST not do the inner loop filter and should not do IDCT
00780     // Since skip isn't used for bitstream prediction, just manually set it.
00781     if (!nnz_total)
00782         mb->skip = 1;
00783 }
00784 
00785 static av_always_inline
00786 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00787                       int linesize, int uvlinesize, int simple)
00788 {
00789     AV_COPY128(top_border, src_y + 15*linesize);
00790     if (!simple) {
00791         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00792         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00793     }
00794 }
00795 
00796 static av_always_inline
00797 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00798                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00799                     int simple, int xchg)
00800 {
00801     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00802     src_y  -=   linesize;
00803     src_cb -= uvlinesize;
00804     src_cr -= uvlinesize;
00805 
00806 #define XCHG(a,b,xchg) do {                     \
00807         if (xchg) AV_SWAP64(b,a);               \
00808         else      AV_COPY64(b,a);               \
00809     } while (0)
00810 
00811     XCHG(top_border_m1+8, src_y-8, xchg);
00812     XCHG(top_border,      src_y,   xchg);
00813     XCHG(top_border+8,    src_y+8, 1);
00814     if (mb_x < mb_width-1)
00815         XCHG(top_border+32, src_y+16, 1);
00816 
00817     // only copy chroma for normal loop filter
00818     // or to initialize the top row to 127
00819     if (!simple || !mb_y) {
00820         XCHG(top_border_m1+16, src_cb-8, xchg);
00821         XCHG(top_border_m1+24, src_cr-8, xchg);
00822         XCHG(top_border+16,    src_cb, 1);
00823         XCHG(top_border+24,    src_cr, 1);
00824     }
00825 }
00826 
00827 static av_always_inline
00828 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00829 {
00830     if (!mb_x) {
00831         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00832     } else {
00833         return mb_y ? mode : LEFT_DC_PRED8x8;
00834     }
00835 }
00836 
00837 static av_always_inline
00838 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00839 {
00840     if (!mb_x) {
00841         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00842     } else {
00843         return mb_y ? mode : HOR_PRED8x8;
00844     }
00845 }
00846 
00847 static av_always_inline
00848 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00849 {
00850     if (mode == DC_PRED8x8) {
00851         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00852     } else {
00853         return mode;
00854     }
00855 }
00856 
00857 static av_always_inline
00858 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00859 {
00860     switch (mode) {
00861     case DC_PRED8x8:
00862         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00863     case VERT_PRED8x8:
00864         return !mb_y ? DC_127_PRED8x8 : mode;
00865     case HOR_PRED8x8:
00866         return !mb_x ? DC_129_PRED8x8 : mode;
00867     case PLANE_PRED8x8 /*TM*/:
00868         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00869     }
00870     return mode;
00871 }
00872 
00873 static av_always_inline
00874 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00875 {
00876     if (!mb_x) {
00877         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00878     } else {
00879         return mb_y ? mode : HOR_VP8_PRED;
00880     }
00881 }
00882 
00883 static av_always_inline
00884 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00885 {
00886     switch (mode) {
00887     case VERT_PRED:
00888         if (!mb_x && mb_y) {
00889             *copy_buf = 1;
00890             return mode;
00891         }
00892         /* fall-through */
00893     case DIAG_DOWN_LEFT_PRED:
00894     case VERT_LEFT_PRED:
00895         return !mb_y ? DC_127_PRED : mode;
00896     case HOR_PRED:
00897         if (!mb_y) {
00898             *copy_buf = 1;
00899             return mode;
00900         }
00901         /* fall-through */
00902     case HOR_UP_PRED:
00903         return !mb_x ? DC_129_PRED : mode;
00904     case TM_VP8_PRED:
00905         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00906     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00907     case DIAG_DOWN_RIGHT_PRED:
00908     case VERT_RIGHT_PRED:
00909     case HOR_DOWN_PRED:
00910         if (!mb_y || !mb_x)
00911             *copy_buf = 1;
00912         return mode;
00913     }
00914     return mode;
00915 }
00916 
00917 static av_always_inline
00918 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00919                    int mb_x, int mb_y)
00920 {
00921     AVCodecContext *avctx = s->avctx;
00922     int x, y, mode, nnz;
00923     uint32_t tr;
00924 
00925     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00926     // otherwise, skip it if we aren't going to deblock
00927     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00928         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00929                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00930                        s->filter.simple, 1);
00931 
00932     if (mb->mode < MODE_I4x4) {
00933         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00934             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00935         } else {
00936             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00937         }
00938         s->hpc.pred16x16[mode](dst[0], s->linesize);
00939     } else {
00940         uint8_t *ptr = dst[0];
00941         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00942         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00943 
00944         // all blocks on the right edge of the macroblock use bottom edge
00945         // the top macroblock for their topright edge
00946         uint8_t *tr_right = ptr - s->linesize + 16;
00947 
00948         // if we're on the right edge of the frame, said edge is extended
00949         // from the top macroblock
00950         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
00951             mb_x == s->mb_width-1) {
00952             tr = tr_right[-1]*0x01010101u;
00953             tr_right = (uint8_t *)&tr;
00954         }
00955 
00956         if (mb->skip)
00957             AV_ZERO128(s->non_zero_count_cache);
00958 
00959         for (y = 0; y < 4; y++) {
00960             uint8_t *topright = ptr + 4 - s->linesize;
00961             for (x = 0; x < 4; x++) {
00962                 int copy = 0, linesize = s->linesize;
00963                 uint8_t *dst = ptr+4*x;
00964                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
00965 
00966                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
00967                     topright = tr_top;
00968                 } else if (x == 3)
00969                     topright = tr_right;
00970 
00971                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
00972                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
00973                     if (copy) {
00974                         dst = copy_dst + 12;
00975                         linesize = 8;
00976                         if (!(mb_y + y)) {
00977                             copy_dst[3] = 127U;
00978                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
00979                         } else {
00980                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
00981                             if (!(mb_x + x)) {
00982                                 copy_dst[3] = 129U;
00983                             } else {
00984                                 copy_dst[3] = ptr[4*x-s->linesize-1];
00985                             }
00986                         }
00987                         if (!(mb_x + x)) {
00988                             copy_dst[11] =
00989                             copy_dst[19] =
00990                             copy_dst[27] =
00991                             copy_dst[35] = 129U;
00992                         } else {
00993                             copy_dst[11] = ptr[4*x              -1];
00994                             copy_dst[19] = ptr[4*x+s->linesize  -1];
00995                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
00996                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
00997                         }
00998                     }
00999                 } else {
01000                     mode = intra4x4[x];
01001                 }
01002                 s->hpc.pred4x4[mode](dst, topright, linesize);
01003                 if (copy) {
01004                     AV_COPY32(ptr+4*x              , copy_dst+12);
01005                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01006                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01007                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01008                 }
01009 
01010                 nnz = s->non_zero_count_cache[y][x];
01011                 if (nnz) {
01012                     if (nnz == 1)
01013                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01014                     else
01015                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01016                 }
01017                 topright += 4;
01018             }
01019 
01020             ptr   += 4*s->linesize;
01021             intra4x4 += 4;
01022         }
01023     }
01024 
01025     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01026         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01027     } else {
01028         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01029     }
01030     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01031     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01032 
01033     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01034         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01035                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01036                        s->filter.simple, 0);
01037 }
01038 
01039 static const uint8_t subpel_idx[3][8] = {
01040     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01041                                 // also function pointer index
01042     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01043     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01044 };
01045 
01062 static av_always_inline
01063 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01064                  int x_off, int y_off, int block_w, int block_h,
01065                  int width, int height, int linesize,
01066                  vp8_mc_func mc_func[3][3])
01067 {
01068     uint8_t *src = ref->data[0];
01069 
01070     if (AV_RN32A(mv)) {
01071 
01072         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01073         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01074 
01075         x_off += mv->x >> 2;
01076         y_off += mv->y >> 2;
01077 
01078         // edge emulation
01079         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01080         src += y_off * linesize + x_off;
01081         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01082             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01083             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01084                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01085                                     x_off - mx_idx, y_off - my_idx, width, height);
01086             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01087         }
01088         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01089     } else {
01090         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01091         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01092     }
01093 }
01094 
01112 static av_always_inline
01113 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01114                    const VP56mv *mv, int x_off, int y_off,
01115                    int block_w, int block_h, int width, int height, int linesize,
01116                    vp8_mc_func mc_func[3][3])
01117 {
01118     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01119 
01120     if (AV_RN32A(mv)) {
01121         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01122         int my = mv->y&7, my_idx = subpel_idx[0][my];
01123 
01124         x_off += mv->x >> 3;
01125         y_off += mv->y >> 3;
01126 
01127         // edge emulation
01128         src1 += y_off * linesize + x_off;
01129         src2 += y_off * linesize + x_off;
01130         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01131         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01132             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01133             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01134                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01135                                     x_off - mx_idx, y_off - my_idx, width, height);
01136             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01137             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01138 
01139             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01140                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01141                                     x_off - mx_idx, y_off - my_idx, width, height);
01142             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01143             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01144         } else {
01145             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01146             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01147         }
01148     } else {
01149         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01150         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01151         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01152     }
01153 }
01154 
01155 static av_always_inline
01156 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01157                  AVFrame *ref_frame, int x_off, int y_off,
01158                  int bx_off, int by_off,
01159                  int block_w, int block_h,
01160                  int width, int height, VP56mv *mv)
01161 {
01162     VP56mv uvmv = *mv;
01163 
01164     /* Y */
01165     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01166                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01167                 block_w, block_h, width, height, s->linesize,
01168                 s->put_pixels_tab[block_w == 8]);
01169 
01170     /* U/V */
01171     if (s->profile == 3) {
01172         uvmv.x &= ~7;
01173         uvmv.y &= ~7;
01174     }
01175     x_off   >>= 1; y_off   >>= 1;
01176     bx_off  >>= 1; by_off  >>= 1;
01177     width   >>= 1; height  >>= 1;
01178     block_w >>= 1; block_h >>= 1;
01179     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01180                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01181                   &uvmv, x_off + bx_off, y_off + by_off,
01182                   block_w, block_h, width, height, s->uvlinesize,
01183                   s->put_pixels_tab[1 + (block_w == 4)]);
01184 }
01185 
01186 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01187  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01188 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01189 {
01190     /* Don't prefetch refs that haven't been used very often this frame. */
01191     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01192         int x_off = mb_x << 4, y_off = mb_y << 4;
01193         int mx = (mb->mv.x>>2) + x_off + 8;
01194         int my = (mb->mv.y>>2) + y_off;
01195         uint8_t **src= s->framep[ref]->data;
01196         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01197         /* For threading, a ff_thread_await_progress here might be useful, but
01198          * it actually slows down the decoder. Since a bad prefetch doesn't
01199          * generate bad decoder output, we don't run it here. */
01200         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01201         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01202         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01203     }
01204 }
01205 
01209 static av_always_inline
01210 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01211                    int mb_x, int mb_y)
01212 {
01213     int x_off = mb_x << 4, y_off = mb_y << 4;
01214     int width = 16*s->mb_width, height = 16*s->mb_height;
01215     AVFrame *ref = s->framep[mb->ref_frame];
01216     VP56mv *bmv = mb->bmv;
01217 
01218     switch (mb->partitioning) {
01219     case VP8_SPLITMVMODE_NONE:
01220         vp8_mc_part(s, dst, ref, x_off, y_off,
01221                     0, 0, 16, 16, width, height, &mb->mv);
01222         break;
01223     case VP8_SPLITMVMODE_4x4: {
01224         int x, y;
01225         VP56mv uvmv;
01226 
01227         /* Y */
01228         for (y = 0; y < 4; y++) {
01229             for (x = 0; x < 4; x++) {
01230                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01231                             ref, &bmv[4*y + x],
01232                             4*x + x_off, 4*y + y_off, 4, 4,
01233                             width, height, s->linesize,
01234                             s->put_pixels_tab[2]);
01235             }
01236         }
01237 
01238         /* U/V */
01239         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01240         for (y = 0; y < 2; y++) {
01241             for (x = 0; x < 2; x++) {
01242                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01243                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01244                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01245                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01246                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01247                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01248                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01249                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01250                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01251                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01252                 if (s->profile == 3) {
01253                     uvmv.x &= ~7;
01254                     uvmv.y &= ~7;
01255                 }
01256                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01257                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01258                               4*x + x_off, 4*y + y_off, 4, 4,
01259                               width, height, s->uvlinesize,
01260                               s->put_pixels_tab[2]);
01261             }
01262         }
01263         break;
01264     }
01265     case VP8_SPLITMVMODE_16x8:
01266         vp8_mc_part(s, dst, ref, x_off, y_off,
01267                     0, 0, 16, 8, width, height, &bmv[0]);
01268         vp8_mc_part(s, dst, ref, x_off, y_off,
01269                     0, 8, 16, 8, width, height, &bmv[1]);
01270         break;
01271     case VP8_SPLITMVMODE_8x16:
01272         vp8_mc_part(s, dst, ref, x_off, y_off,
01273                     0, 0, 8, 16, width, height, &bmv[0]);
01274         vp8_mc_part(s, dst, ref, x_off, y_off,
01275                     8, 0, 8, 16, width, height, &bmv[1]);
01276         break;
01277     case VP8_SPLITMVMODE_8x8:
01278         vp8_mc_part(s, dst, ref, x_off, y_off,
01279                     0, 0, 8, 8, width, height, &bmv[0]);
01280         vp8_mc_part(s, dst, ref, x_off, y_off,
01281                     8, 0, 8, 8, width, height, &bmv[1]);
01282         vp8_mc_part(s, dst, ref, x_off, y_off,
01283                     0, 8, 8, 8, width, height, &bmv[2]);
01284         vp8_mc_part(s, dst, ref, x_off, y_off,
01285                     8, 8, 8, 8, width, height, &bmv[3]);
01286         break;
01287     }
01288 }
01289 
01290 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01291 {
01292     int x, y, ch;
01293 
01294     if (mb->mode != MODE_I4x4) {
01295         uint8_t *y_dst = dst[0];
01296         for (y = 0; y < 4; y++) {
01297             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01298             if (nnz4) {
01299                 if (nnz4&~0x01010101) {
01300                     for (x = 0; x < 4; x++) {
01301                         if ((uint8_t)nnz4 == 1)
01302                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01303                         else if((uint8_t)nnz4 > 1)
01304                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01305                         nnz4 >>= 8;
01306                         if (!nnz4)
01307                             break;
01308                     }
01309                 } else {
01310                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01311                 }
01312             }
01313             y_dst += 4*s->linesize;
01314         }
01315     }
01316 
01317     for (ch = 0; ch < 2; ch++) {
01318         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01319         if (nnz4) {
01320             uint8_t *ch_dst = dst[1+ch];
01321             if (nnz4&~0x01010101) {
01322                 for (y = 0; y < 2; y++) {
01323                     for (x = 0; x < 2; x++) {
01324                         if ((uint8_t)nnz4 == 1)
01325                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01326                         else if((uint8_t)nnz4 > 1)
01327                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01328                         nnz4 >>= 8;
01329                         if (!nnz4)
01330                             goto chroma_idct_end;
01331                     }
01332                     ch_dst += 4*s->uvlinesize;
01333                 }
01334             } else {
01335                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01336             }
01337         }
01338 chroma_idct_end: ;
01339     }
01340 }
01341 
01342 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01343 {
01344     int interior_limit, filter_level;
01345 
01346     if (s->segmentation.enabled) {
01347         filter_level = s->segmentation.filter_level[s->segment];
01348         if (!s->segmentation.absolute_vals)
01349             filter_level += s->filter.level;
01350     } else
01351         filter_level = s->filter.level;
01352 
01353     if (s->lf_delta.enabled) {
01354         filter_level += s->lf_delta.ref[mb->ref_frame];
01355         filter_level += s->lf_delta.mode[mb->mode];
01356     }
01357 
01358     filter_level = av_clip_uintp2(filter_level, 6);
01359 
01360     interior_limit = filter_level;
01361     if (s->filter.sharpness) {
01362         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01363         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01364     }
01365     interior_limit = FFMAX(interior_limit, 1);
01366 
01367     f->filter_level = filter_level;
01368     f->inner_limit = interior_limit;
01369     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01370 }
01371 
01372 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01373 {
01374     int mbedge_lim, bedge_lim, hev_thresh;
01375     int filter_level = f->filter_level;
01376     int inner_limit = f->inner_limit;
01377     int inner_filter = f->inner_filter;
01378     int linesize = s->linesize;
01379     int uvlinesize = s->uvlinesize;
01380     static const uint8_t hev_thresh_lut[2][64] = {
01381         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01382           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01383           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01384           3, 3, 3, 3 },
01385         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01386           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01387           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01388           2, 2, 2, 2 }
01389     };
01390 
01391     if (!filter_level)
01392         return;
01393 
01394      bedge_lim = 2*filter_level + inner_limit;
01395     mbedge_lim = bedge_lim + 4;
01396 
01397     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01398 
01399     if (mb_x) {
01400         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01401                                        mbedge_lim, inner_limit, hev_thresh);
01402         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01403                                        mbedge_lim, inner_limit, hev_thresh);
01404     }
01405 
01406     if (inner_filter) {
01407         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01408                                              inner_limit, hev_thresh);
01409         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01410                                              inner_limit, hev_thresh);
01411         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01412                                              inner_limit, hev_thresh);
01413         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01414                                              uvlinesize,  bedge_lim,
01415                                              inner_limit, hev_thresh);
01416     }
01417 
01418     if (mb_y) {
01419         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01420                                        mbedge_lim, inner_limit, hev_thresh);
01421         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01422                                        mbedge_lim, inner_limit, hev_thresh);
01423     }
01424 
01425     if (inner_filter) {
01426         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01427                                              linesize,    bedge_lim,
01428                                              inner_limit, hev_thresh);
01429         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01430                                              linesize,    bedge_lim,
01431                                              inner_limit, hev_thresh);
01432         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01433                                              linesize,    bedge_lim,
01434                                              inner_limit, hev_thresh);
01435         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01436                                              dst[2] + 4 * uvlinesize,
01437                                              uvlinesize,  bedge_lim,
01438                                              inner_limit, hev_thresh);
01439     }
01440 }
01441 
01442 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01443 {
01444     int mbedge_lim, bedge_lim;
01445     int filter_level = f->filter_level;
01446     int inner_limit = f->inner_limit;
01447     int inner_filter = f->inner_filter;
01448     int linesize = s->linesize;
01449 
01450     if (!filter_level)
01451         return;
01452 
01453      bedge_lim = 2*filter_level + inner_limit;
01454     mbedge_lim = bedge_lim + 4;
01455 
01456     if (mb_x)
01457         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01458     if (inner_filter) {
01459         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01460         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01461         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01462     }
01463 
01464     if (mb_y)
01465         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01466     if (inner_filter) {
01467         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01468         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01469         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01470     }
01471 }
01472 
01473 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01474 {
01475     VP8FilterStrength *f = s->filter_strength;
01476     uint8_t *dst[3] = {
01477         curframe->data[0] + 16*mb_y*s->linesize,
01478         curframe->data[1] +  8*mb_y*s->uvlinesize,
01479         curframe->data[2] +  8*mb_y*s->uvlinesize
01480     };
01481     int mb_x;
01482 
01483     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01484         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01485         filter_mb(s, dst, f++, mb_x, mb_y);
01486         dst[0] += 16;
01487         dst[1] += 8;
01488         dst[2] += 8;
01489     }
01490 }
01491 
01492 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01493 {
01494     VP8FilterStrength *f = s->filter_strength;
01495     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01496     int mb_x;
01497 
01498     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01499         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01500         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01501         dst += 16;
01502     }
01503 }
01504 
01505 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01506                             AVPacket *avpkt)
01507 {
01508     VP8Context *s = avctx->priv_data;
01509     int ret, mb_x, mb_y, i, y, referenced;
01510     enum AVDiscard skip_thresh;
01511     AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
01512 
01513     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01514         return ret;
01515 
01516     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01517                                 || s->update_altref == VP56_FRAME_CURRENT;
01518 
01519     skip_thresh = !referenced ? AVDISCARD_NONREF :
01520                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01521 
01522     if (avctx->skip_frame >= skip_thresh) {
01523         s->invisible = 1;
01524         goto skip_decode;
01525     }
01526     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01527 
01528     // release no longer referenced frames
01529     for (i = 0; i < 5; i++)
01530         if (s->frames[i].data[0] &&
01531             &s->frames[i] != prev_frame &&
01532             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01533             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01534             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01535             ff_thread_release_buffer(avctx, &s->frames[i]);
01536 
01537     // find a free buffer
01538     for (i = 0; i < 5; i++)
01539         if (&s->frames[i] != prev_frame &&
01540             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01541             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01542             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01543             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01544             break;
01545         }
01546     if (i == 5) {
01547         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01548         abort();
01549     }
01550     if (curframe->data[0])
01551         ff_thread_release_buffer(avctx, curframe);
01552 
01553     curframe->key_frame = s->keyframe;
01554     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01555     curframe->reference = referenced ? 3 : 0;
01556     curframe->ref_index[0] = s->segmentation_map;
01557     if ((ret = ff_thread_get_buffer(avctx, curframe))) {
01558         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01559         return ret;
01560     }
01561 
01562     // check if golden and altref are swapped
01563     if (s->update_altref != VP56_FRAME_NONE) {
01564         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01565     } else {
01566         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01567     }
01568     if (s->update_golden != VP56_FRAME_NONE) {
01569         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01570     } else {
01571         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01572     }
01573     if (s->update_last) {
01574         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01575     } else {
01576         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01577     }
01578     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01579 
01580     ff_thread_finish_setup(avctx);
01581 
01582     // Given that arithmetic probabilities are updated every frame, it's quite likely
01583     // that the values we have on a random interframe are complete junk if we didn't
01584     // start decode on a keyframe. So just don't display anything rather than junk.
01585     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01586                          !s->framep[VP56_FRAME_GOLDEN] ||
01587                          !s->framep[VP56_FRAME_GOLDEN2])) {
01588         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01589         return AVERROR_INVALIDDATA;
01590     }
01591 
01592     s->linesize   = curframe->linesize[0];
01593     s->uvlinesize = curframe->linesize[1];
01594 
01595     if (!s->edge_emu_buffer)
01596         s->edge_emu_buffer = av_malloc(21*s->linesize);
01597 
01598     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01599 
01600     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01601     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01602 
01603     // top edge of 127 for intra prediction
01604     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01605         s->top_border[0][15] = s->top_border[0][23] = 127;
01606         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01607     }
01608     memset(s->ref_count, 0, sizeof(s->ref_count));
01609     if (s->keyframe)
01610         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01611 
01612 #define MARGIN (16 << 2)
01613     s->mv_min.y = -MARGIN;
01614     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01615 
01616     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01617         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01618         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01619         int mb_xy = mb_y*s->mb_width;
01620         uint8_t *dst[3] = {
01621             curframe->data[0] + 16*mb_y*s->linesize,
01622             curframe->data[1] +  8*mb_y*s->uvlinesize,
01623             curframe->data[2] +  8*mb_y*s->uvlinesize
01624         };
01625 
01626         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01627         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01628         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01629 
01630         // left edge of 129 for intra prediction
01631         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01632             for (i = 0; i < 3; i++)
01633                 for (y = 0; y < 16>>!!i; y++)
01634                     dst[i][y*curframe->linesize[i]-1] = 129;
01635             if (mb_y == 1) // top left edge is also 129
01636                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01637         }
01638 
01639         s->mv_min.x = -MARGIN;
01640         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01641         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01642             ff_thread_await_progress(prev_frame, mb_y, 0);
01643 
01644         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01645             /* Prefetch the current frame, 4 MBs ahead */
01646             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01647             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01648 
01649             decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
01650                            prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
01651 
01652             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01653 
01654             if (!mb->skip)
01655                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01656 
01657             if (mb->mode <= MODE_I4x4)
01658                 intra_predict(s, dst, mb, mb_x, mb_y);
01659             else
01660                 inter_predict(s, dst, mb, mb_x, mb_y);
01661 
01662             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01663 
01664             if (!mb->skip) {
01665                 idct_mb(s, dst, mb);
01666             } else {
01667                 AV_ZERO64(s->left_nnz);
01668                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01669 
01670                 // Reset DC block predictors if they would exist if the mb had coefficients
01671                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01672                     s->left_nnz[8]      = 0;
01673                     s->top_nnz[mb_x][8] = 0;
01674                 }
01675             }
01676 
01677             if (s->deblock_filter)
01678                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01679 
01680             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01681 
01682             dst[0] += 16;
01683             dst[1] += 8;
01684             dst[2] += 8;
01685             s->mv_min.x -= 64;
01686             s->mv_max.x -= 64;
01687         }
01688         if (s->deblock_filter) {
01689             if (s->filter.simple)
01690                 filter_mb_row_simple(s, curframe, mb_y);
01691             else
01692                 filter_mb_row(s, curframe, mb_y);
01693         }
01694         s->mv_min.y -= 64;
01695         s->mv_max.y -= 64;
01696 
01697         ff_thread_report_progress(curframe, mb_y, 0);
01698     }
01699 
01700     ff_thread_report_progress(curframe, INT_MAX, 0);
01701 skip_decode:
01702     // if future frames don't use the updated probabilities,
01703     // reset them to the values we saved
01704     if (!s->update_probabilities)
01705         s->prob[0] = s->prob[1];
01706 
01707     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01708 
01709     if (!s->invisible) {
01710         *(AVFrame*)data = *curframe;
01711         *data_size = sizeof(AVFrame);
01712     }
01713 
01714     return avpkt->size;
01715 }
01716 
01717 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01718 {
01719     VP8Context *s = avctx->priv_data;
01720 
01721     s->avctx = avctx;
01722     avctx->pix_fmt = PIX_FMT_YUV420P;
01723 
01724     dsputil_init(&s->dsp, avctx);
01725     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
01726     ff_vp8dsp_init(&s->vp8dsp);
01727 
01728     return 0;
01729 }
01730 
01731 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01732 {
01733     vp8_decode_flush(avctx);
01734     return 0;
01735 }
01736 
01737 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01738 {
01739     VP8Context *s = avctx->priv_data;
01740 
01741     s->avctx = avctx;
01742 
01743     return 0;
01744 }
01745 
01746 #define REBASE(pic) \
01747     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01748 
01749 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01750 {
01751     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01752 
01753     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01754     s->segmentation = s_src->segmentation;
01755     s->lf_delta = s_src->lf_delta;
01756     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01757 
01758     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01759     s->framep[0] = REBASE(s_src->next_framep[0]);
01760     s->framep[1] = REBASE(s_src->next_framep[1]);
01761     s->framep[2] = REBASE(s_src->next_framep[2]);
01762     s->framep[3] = REBASE(s_src->next_framep[3]);
01763 
01764     return 0;
01765 }
01766 
01767 AVCodec ff_vp8_decoder = {
01768     "vp8",
01769     AVMEDIA_TYPE_VIDEO,
01770     CODEC_ID_VP8,
01771     sizeof(VP8Context),
01772     vp8_decode_init,
01773     NULL,
01774     vp8_decode_free,
01775     vp8_decode_frame,
01776     CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01777     .flush = vp8_decode_flush,
01778     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01779     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01780     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01781 };