• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libswscale/rgb2rgb_template.c

Go to the documentation of this file.
00001 /*
00002  * software RGB to RGB converter
00003  * pluralize by software PAL8 to RGB converter
00004  *              software YUV to YUV converter
00005  *              software YUV to RGB converter
00006  * Written by Nick Kurshev.
00007  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
00008  * lot of big-endian byte order fixes by Alex Beregszaszi
00009  *
00010  * This file is part of FFmpeg.
00011  *
00012  * FFmpeg is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU General Public License as published by
00014  * the Free Software Foundation; either version 2 of the License, or
00015  * (at your option) any later version.
00016  *
00017  * FFmpeg is distributed in the hope that it will be useful,
00018  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  * GNU General Public License for more details.
00021  *
00022  * You should have received a copy of the GNU General Public License
00023  * along with FFmpeg; if not, write to the Free Software
00024  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00025  *
00026  * The C code (not assembly, MMX, ...) of this file can be used
00027  * under the LGPL license.
00028  */
00029 
00030 #include <stddef.h>
00031 
00032 #undef PREFETCH
00033 #undef MOVNTQ
00034 #undef EMMS
00035 #undef SFENCE
00036 #undef MMREG_SIZE
00037 #undef PREFETCHW
00038 #undef PAVGB
00039 
00040 #if HAVE_SSE2
00041 #define MMREG_SIZE 16
00042 #else
00043 #define MMREG_SIZE 8
00044 #endif
00045 
00046 #if HAVE_AMD3DNOW
00047 #define PREFETCH  "prefetch"
00048 #define PREFETCHW "prefetchw"
00049 #define PAVGB     "pavgusb"
00050 #elif HAVE_MMX2
00051 #define PREFETCH "prefetchnta"
00052 #define PREFETCHW "prefetcht0"
00053 #define PAVGB     "pavgb"
00054 #else
00055 #define PREFETCH  " # nop"
00056 #define PREFETCHW " # nop"
00057 #endif
00058 
00059 #if HAVE_AMD3DNOW
00060 /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
00061 #define EMMS     "femms"
00062 #else
00063 #define EMMS     "emms"
00064 #endif
00065 
00066 #if HAVE_MMX2
00067 #define MOVNTQ "movntq"
00068 #define SFENCE "sfence"
00069 #else
00070 #define MOVNTQ "movq"
00071 #define SFENCE " # nop"
00072 #endif
00073 
00074 static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
00075 {
00076     uint8_t *dest = dst;
00077     const uint8_t *s = src;
00078     const uint8_t *end;
00079     #if HAVE_MMX
00080         const uint8_t *mm_end;
00081     #endif
00082     end = s + src_size;
00083     #if HAVE_MMX
00084         __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
00085         mm_end = end - 23;
00086         __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
00087         while (s < mm_end)
00088         {
00089             __asm__ volatile(
00090             PREFETCH"    32%1           \n\t"
00091             "movd          %1, %%mm0    \n\t"
00092             "punpckldq    3%1, %%mm0    \n\t"
00093             "movd         6%1, %%mm1    \n\t"
00094             "punpckldq    9%1, %%mm1    \n\t"
00095             "movd        12%1, %%mm2    \n\t"
00096             "punpckldq   15%1, %%mm2    \n\t"
00097             "movd        18%1, %%mm3    \n\t"
00098             "punpckldq   21%1, %%mm3    \n\t"
00099             "por        %%mm7, %%mm0    \n\t"
00100             "por        %%mm7, %%mm1    \n\t"
00101             "por        %%mm7, %%mm2    \n\t"
00102             "por        %%mm7, %%mm3    \n\t"
00103             MOVNTQ"     %%mm0,   %0     \n\t"
00104             MOVNTQ"     %%mm1,  8%0     \n\t"
00105             MOVNTQ"     %%mm2, 16%0     \n\t"
00106             MOVNTQ"     %%mm3, 24%0"
00107             :"=m"(*dest)
00108             :"m"(*s)
00109             :"memory");
00110             dest += 32;
00111             s += 24;
00112         }
00113         __asm__ volatile(SFENCE:::"memory");
00114         __asm__ volatile(EMMS:::"memory");
00115     #endif
00116     while (s < end)
00117     {
00118     #ifdef WORDS_BIGENDIAN
00119         /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
00120         *dest++ = 255;
00121         *dest++ = s[2];
00122         *dest++ = s[1];
00123         *dest++ = s[0];
00124         s+=3;
00125     #else
00126         *dest++ = *s++;
00127         *dest++ = *s++;
00128         *dest++ = *s++;
00129         *dest++ = 255;
00130     #endif
00131     }
00132 }
00133 
00134 static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00135 {
00136     uint8_t *dest = dst;
00137     const uint8_t *s = src;
00138     const uint8_t *end;
00139 #if HAVE_MMX
00140     const uint8_t *mm_end;
00141 #endif
00142     end = s + src_size;
00143 #if HAVE_MMX
00144     __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
00145     mm_end = end - 31;
00146     while (s < mm_end)
00147     {
00148         __asm__ volatile(
00149         PREFETCH"    32%1           \n\t"
00150         "movq          %1, %%mm0    \n\t"
00151         "movq         8%1, %%mm1    \n\t"
00152         "movq        16%1, %%mm4    \n\t"
00153         "movq        24%1, %%mm5    \n\t"
00154         "movq       %%mm0, %%mm2    \n\t"
00155         "movq       %%mm1, %%mm3    \n\t"
00156         "movq       %%mm4, %%mm6    \n\t"
00157         "movq       %%mm5, %%mm7    \n\t"
00158         "psrlq         $8, %%mm2    \n\t"
00159         "psrlq         $8, %%mm3    \n\t"
00160         "psrlq         $8, %%mm6    \n\t"
00161         "psrlq         $8, %%mm7    \n\t"
00162         "pand          %2, %%mm0    \n\t"
00163         "pand          %2, %%mm1    \n\t"
00164         "pand          %2, %%mm4    \n\t"
00165         "pand          %2, %%mm5    \n\t"
00166         "pand          %3, %%mm2    \n\t"
00167         "pand          %3, %%mm3    \n\t"
00168         "pand          %3, %%mm6    \n\t"
00169         "pand          %3, %%mm7    \n\t"
00170         "por        %%mm2, %%mm0    \n\t"
00171         "por        %%mm3, %%mm1    \n\t"
00172         "por        %%mm6, %%mm4    \n\t"
00173         "por        %%mm7, %%mm5    \n\t"
00174 
00175         "movq       %%mm1, %%mm2    \n\t"
00176         "movq       %%mm4, %%mm3    \n\t"
00177         "psllq        $48, %%mm2    \n\t"
00178         "psllq        $32, %%mm3    \n\t"
00179         "pand          %4, %%mm2    \n\t"
00180         "pand          %5, %%mm3    \n\t"
00181         "por        %%mm2, %%mm0    \n\t"
00182         "psrlq        $16, %%mm1    \n\t"
00183         "psrlq        $32, %%mm4    \n\t"
00184         "psllq        $16, %%mm5    \n\t"
00185         "por        %%mm3, %%mm1    \n\t"
00186         "pand          %6, %%mm5    \n\t"
00187         "por        %%mm5, %%mm4    \n\t"
00188 
00189         MOVNTQ"     %%mm0,   %0     \n\t"
00190         MOVNTQ"     %%mm1,  8%0     \n\t"
00191         MOVNTQ"     %%mm4, 16%0"
00192         :"=m"(*dest)
00193         :"m"(*s),"m"(mask24l),
00194          "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
00195         :"memory");
00196         dest += 24;
00197         s += 32;
00198     }
00199     __asm__ volatile(SFENCE:::"memory");
00200     __asm__ volatile(EMMS:::"memory");
00201 #endif
00202     while (s < end)
00203     {
00204 #ifdef WORDS_BIGENDIAN
00205         /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
00206         s++;
00207         dest[2] = *s++;
00208         dest[1] = *s++;
00209         dest[0] = *s++;
00210         dest += 3;
00211 #else
00212         *dest++ = *s++;
00213         *dest++ = *s++;
00214         *dest++ = *s++;
00215         s++;
00216 #endif
00217     }
00218 }
00219 
00220 /*
00221  original by Strepto/Astral
00222  ported to gcc & bugfixed: A'rpi
00223  MMX2, 3DNOW optimization by Nick Kurshev
00224  32-bit C version, and and&add trick by Michael Niedermayer
00225 */
00226 static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
00227 {
00228     register const uint8_t* s=src;
00229     register uint8_t* d=dst;
00230     register const uint8_t *end;
00231     const uint8_t *mm_end;
00232     end = s + src_size;
00233 #if HAVE_MMX
00234     __asm__ volatile(PREFETCH"    %0"::"m"(*s));
00235     __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
00236     mm_end = end - 15;
00237     while (s<mm_end)
00238     {
00239         __asm__ volatile(
00240         PREFETCH"  32%1         \n\t"
00241         "movq        %1, %%mm0  \n\t"
00242         "movq       8%1, %%mm2  \n\t"
00243         "movq     %%mm0, %%mm1  \n\t"
00244         "movq     %%mm2, %%mm3  \n\t"
00245         "pand     %%mm4, %%mm0  \n\t"
00246         "pand     %%mm4, %%mm2  \n\t"
00247         "paddw    %%mm1, %%mm0  \n\t"
00248         "paddw    %%mm3, %%mm2  \n\t"
00249         MOVNTQ"   %%mm0,  %0    \n\t"
00250         MOVNTQ"   %%mm2, 8%0"
00251         :"=m"(*d)
00252         :"m"(*s)
00253         );
00254         d+=16;
00255         s+=16;
00256     }
00257     __asm__ volatile(SFENCE:::"memory");
00258     __asm__ volatile(EMMS:::"memory");
00259 #endif
00260     mm_end = end - 3;
00261     while (s < mm_end)
00262     {
00263         register unsigned x= *((const uint32_t *)s);
00264         *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
00265         d+=4;
00266         s+=4;
00267     }
00268     if (s < end)
00269     {
00270         register unsigned short x= *((const uint16_t *)s);
00271         *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
00272     }
00273 }
00274 
00275 static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
00276 {
00277     register const uint8_t* s=src;
00278     register uint8_t* d=dst;
00279     register const uint8_t *end;
00280     const uint8_t *mm_end;
00281     end = s + src_size;
00282 #if HAVE_MMX
00283     __asm__ volatile(PREFETCH"    %0"::"m"(*s));
00284     __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
00285     __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
00286     mm_end = end - 15;
00287     while (s<mm_end)
00288     {
00289         __asm__ volatile(
00290         PREFETCH"  32%1         \n\t"
00291         "movq        %1, %%mm0  \n\t"
00292         "movq       8%1, %%mm2  \n\t"
00293         "movq     %%mm0, %%mm1  \n\t"
00294         "movq     %%mm2, %%mm3  \n\t"
00295         "psrlq       $1, %%mm0  \n\t"
00296         "psrlq       $1, %%mm2  \n\t"
00297         "pand     %%mm7, %%mm0  \n\t"
00298         "pand     %%mm7, %%mm2  \n\t"
00299         "pand     %%mm6, %%mm1  \n\t"
00300         "pand     %%mm6, %%mm3  \n\t"
00301         "por      %%mm1, %%mm0  \n\t"
00302         "por      %%mm3, %%mm2  \n\t"
00303         MOVNTQ"   %%mm0,  %0    \n\t"
00304         MOVNTQ"   %%mm2, 8%0"
00305         :"=m"(*d)
00306         :"m"(*s)
00307         );
00308         d+=16;
00309         s+=16;
00310     }
00311     __asm__ volatile(SFENCE:::"memory");
00312     __asm__ volatile(EMMS:::"memory");
00313 #endif
00314     mm_end = end - 3;
00315     while (s < mm_end)
00316     {
00317         register uint32_t x= *((const uint32_t*)s);
00318         *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
00319         s+=4;
00320         d+=4;
00321     }
00322     if (s < end)
00323     {
00324         register uint16_t x= *((const uint16_t*)s);
00325         *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
00326         s+=2;
00327         d+=2;
00328     }
00329 }
00330 
00331 static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
00332 {
00333     const uint8_t *s = src;
00334     const uint8_t *end;
00335 #if HAVE_MMX
00336     const uint8_t *mm_end;
00337 #endif
00338     uint16_t *d = (uint16_t *)dst;
00339     end = s + src_size;
00340 #if HAVE_MMX
00341     mm_end = end - 15;
00342 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00343     __asm__ volatile(
00344     "movq           %3, %%mm5   \n\t"
00345     "movq           %4, %%mm6   \n\t"
00346     "movq           %5, %%mm7   \n\t"
00347     "jmp 2f                     \n\t"
00348     ASMALIGN(4)
00349     "1:                         \n\t"
00350     PREFETCH"   32(%1)          \n\t"
00351     "movd         (%1), %%mm0   \n\t"
00352     "movd        4(%1), %%mm3   \n\t"
00353     "punpckldq   8(%1), %%mm0   \n\t"
00354     "punpckldq  12(%1), %%mm3   \n\t"
00355     "movq        %%mm0, %%mm1   \n\t"
00356     "movq        %%mm3, %%mm4   \n\t"
00357     "pand        %%mm6, %%mm0   \n\t"
00358     "pand        %%mm6, %%mm3   \n\t"
00359     "pmaddwd     %%mm7, %%mm0   \n\t"
00360     "pmaddwd     %%mm7, %%mm3   \n\t"
00361     "pand        %%mm5, %%mm1   \n\t"
00362     "pand        %%mm5, %%mm4   \n\t"
00363     "por         %%mm1, %%mm0   \n\t"
00364     "por         %%mm4, %%mm3   \n\t"
00365     "psrld          $5, %%mm0   \n\t"
00366     "pslld         $11, %%mm3   \n\t"
00367     "por         %%mm3, %%mm0   \n\t"
00368     MOVNTQ"      %%mm0, (%0)    \n\t"
00369     "add           $16,  %1     \n\t"
00370     "add            $8,  %0     \n\t"
00371     "2:                         \n\t"
00372     "cmp            %2,  %1     \n\t"
00373     " jb            1b          \n\t"
00374     : "+r" (d), "+r"(s)
00375     : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
00376     );
00377 #else
00378     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00379     __asm__ volatile(
00380         "movq    %0, %%mm7    \n\t"
00381         "movq    %1, %%mm6    \n\t"
00382         ::"m"(red_16mask),"m"(green_16mask));
00383     while (s < mm_end)
00384     {
00385         __asm__ volatile(
00386         PREFETCH"    32%1           \n\t"
00387         "movd          %1, %%mm0    \n\t"
00388         "movd         4%1, %%mm3    \n\t"
00389         "punpckldq    8%1, %%mm0    \n\t"
00390         "punpckldq   12%1, %%mm3    \n\t"
00391         "movq       %%mm0, %%mm1    \n\t"
00392         "movq       %%mm0, %%mm2    \n\t"
00393         "movq       %%mm3, %%mm4    \n\t"
00394         "movq       %%mm3, %%mm5    \n\t"
00395         "psrlq         $3, %%mm0    \n\t"
00396         "psrlq         $3, %%mm3    \n\t"
00397         "pand          %2, %%mm0    \n\t"
00398         "pand          %2, %%mm3    \n\t"
00399         "psrlq         $5, %%mm1    \n\t"
00400         "psrlq         $5, %%mm4    \n\t"
00401         "pand       %%mm6, %%mm1    \n\t"
00402         "pand       %%mm6, %%mm4    \n\t"
00403         "psrlq         $8, %%mm2    \n\t"
00404         "psrlq         $8, %%mm5    \n\t"
00405         "pand       %%mm7, %%mm2    \n\t"
00406         "pand       %%mm7, %%mm5    \n\t"
00407         "por        %%mm1, %%mm0    \n\t"
00408         "por        %%mm4, %%mm3    \n\t"
00409         "por        %%mm2, %%mm0    \n\t"
00410         "por        %%mm5, %%mm3    \n\t"
00411         "psllq        $16, %%mm3    \n\t"
00412         "por        %%mm3, %%mm0    \n\t"
00413         MOVNTQ"     %%mm0, %0       \n\t"
00414         :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00415         d += 4;
00416         s += 16;
00417     }
00418 #endif
00419     __asm__ volatile(SFENCE:::"memory");
00420     __asm__ volatile(EMMS:::"memory");
00421 #endif
00422     while (s < end)
00423     {
00424         register int rgb = *(const uint32_t*)s; s += 4;
00425         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
00426     }
00427 }
00428 
00429 static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00430 {
00431     const uint8_t *s = src;
00432     const uint8_t *end;
00433 #if HAVE_MMX
00434     const uint8_t *mm_end;
00435 #endif
00436     uint16_t *d = (uint16_t *)dst;
00437     end = s + src_size;
00438 #if HAVE_MMX
00439     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00440     __asm__ volatile(
00441         "movq          %0, %%mm7    \n\t"
00442         "movq          %1, %%mm6    \n\t"
00443         ::"m"(red_16mask),"m"(green_16mask));
00444     mm_end = end - 15;
00445     while (s < mm_end)
00446     {
00447         __asm__ volatile(
00448         PREFETCH"    32%1           \n\t"
00449         "movd          %1, %%mm0    \n\t"
00450         "movd         4%1, %%mm3    \n\t"
00451         "punpckldq    8%1, %%mm0    \n\t"
00452         "punpckldq   12%1, %%mm3    \n\t"
00453         "movq       %%mm0, %%mm1    \n\t"
00454         "movq       %%mm0, %%mm2    \n\t"
00455         "movq       %%mm3, %%mm4    \n\t"
00456         "movq       %%mm3, %%mm5    \n\t"
00457         "psllq         $8, %%mm0    \n\t"
00458         "psllq         $8, %%mm3    \n\t"
00459         "pand       %%mm7, %%mm0    \n\t"
00460         "pand       %%mm7, %%mm3    \n\t"
00461         "psrlq         $5, %%mm1    \n\t"
00462         "psrlq         $5, %%mm4    \n\t"
00463         "pand       %%mm6, %%mm1    \n\t"
00464         "pand       %%mm6, %%mm4    \n\t"
00465         "psrlq        $19, %%mm2    \n\t"
00466         "psrlq        $19, %%mm5    \n\t"
00467         "pand          %2, %%mm2    \n\t"
00468         "pand          %2, %%mm5    \n\t"
00469         "por        %%mm1, %%mm0    \n\t"
00470         "por        %%mm4, %%mm3    \n\t"
00471         "por        %%mm2, %%mm0    \n\t"
00472         "por        %%mm5, %%mm3    \n\t"
00473         "psllq        $16, %%mm3    \n\t"
00474         "por        %%mm3, %%mm0    \n\t"
00475         MOVNTQ"     %%mm0, %0       \n\t"
00476         :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00477         d += 4;
00478         s += 16;
00479     }
00480     __asm__ volatile(SFENCE:::"memory");
00481     __asm__ volatile(EMMS:::"memory");
00482 #endif
00483     while (s < end)
00484     {
00485         register int rgb = *(const uint32_t*)s; s += 4;
00486         *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
00487     }
00488 }
00489 
00490 static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
00491 {
00492     const uint8_t *s = src;
00493     const uint8_t *end;
00494 #if HAVE_MMX
00495     const uint8_t *mm_end;
00496 #endif
00497     uint16_t *d = (uint16_t *)dst;
00498     end = s + src_size;
00499 #if HAVE_MMX
00500     mm_end = end - 15;
00501 #if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
00502     __asm__ volatile(
00503     "movq           %3, %%mm5   \n\t"
00504     "movq           %4, %%mm6   \n\t"
00505     "movq           %5, %%mm7   \n\t"
00506     "jmp            2f          \n\t"
00507     ASMALIGN(4)
00508     "1:                         \n\t"
00509     PREFETCH"   32(%1)          \n\t"
00510     "movd         (%1), %%mm0   \n\t"
00511     "movd        4(%1), %%mm3   \n\t"
00512     "punpckldq   8(%1), %%mm0   \n\t"
00513     "punpckldq  12(%1), %%mm3   \n\t"
00514     "movq        %%mm0, %%mm1   \n\t"
00515     "movq        %%mm3, %%mm4   \n\t"
00516     "pand        %%mm6, %%mm0   \n\t"
00517     "pand        %%mm6, %%mm3   \n\t"
00518     "pmaddwd     %%mm7, %%mm0   \n\t"
00519     "pmaddwd     %%mm7, %%mm3   \n\t"
00520     "pand        %%mm5, %%mm1   \n\t"
00521     "pand        %%mm5, %%mm4   \n\t"
00522     "por         %%mm1, %%mm0   \n\t"
00523     "por         %%mm4, %%mm3   \n\t"
00524     "psrld          $6, %%mm0   \n\t"
00525     "pslld         $10, %%mm3   \n\t"
00526     "por         %%mm3, %%mm0   \n\t"
00527     MOVNTQ"      %%mm0, (%0)    \n\t"
00528     "add           $16,  %1     \n\t"
00529     "add            $8,  %0     \n\t"
00530     "2:                         \n\t"
00531     "cmp            %2,  %1     \n\t"
00532     " jb            1b          \n\t"
00533     : "+r" (d), "+r"(s)
00534     : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
00535     );
00536 #else
00537     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00538     __asm__ volatile(
00539         "movq          %0, %%mm7    \n\t"
00540         "movq          %1, %%mm6    \n\t"
00541         ::"m"(red_15mask),"m"(green_15mask));
00542     while (s < mm_end)
00543     {
00544         __asm__ volatile(
00545         PREFETCH"    32%1           \n\t"
00546         "movd          %1, %%mm0    \n\t"
00547         "movd         4%1, %%mm3    \n\t"
00548         "punpckldq    8%1, %%mm0    \n\t"
00549         "punpckldq   12%1, %%mm3    \n\t"
00550         "movq       %%mm0, %%mm1    \n\t"
00551         "movq       %%mm0, %%mm2    \n\t"
00552         "movq       %%mm3, %%mm4    \n\t"
00553         "movq       %%mm3, %%mm5    \n\t"
00554         "psrlq         $3, %%mm0    \n\t"
00555         "psrlq         $3, %%mm3    \n\t"
00556         "pand          %2, %%mm0    \n\t"
00557         "pand          %2, %%mm3    \n\t"
00558         "psrlq         $6, %%mm1    \n\t"
00559         "psrlq         $6, %%mm4    \n\t"
00560         "pand       %%mm6, %%mm1    \n\t"
00561         "pand       %%mm6, %%mm4    \n\t"
00562         "psrlq         $9, %%mm2    \n\t"
00563         "psrlq         $9, %%mm5    \n\t"
00564         "pand       %%mm7, %%mm2    \n\t"
00565         "pand       %%mm7, %%mm5    \n\t"
00566         "por        %%mm1, %%mm0    \n\t"
00567         "por        %%mm4, %%mm3    \n\t"
00568         "por        %%mm2, %%mm0    \n\t"
00569         "por        %%mm5, %%mm3    \n\t"
00570         "psllq        $16, %%mm3    \n\t"
00571         "por        %%mm3, %%mm0    \n\t"
00572         MOVNTQ"     %%mm0, %0       \n\t"
00573         :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00574         d += 4;
00575         s += 16;
00576     }
00577 #endif
00578     __asm__ volatile(SFENCE:::"memory");
00579     __asm__ volatile(EMMS:::"memory");
00580 #endif
00581     while (s < end)
00582     {
00583         register int rgb = *(const uint32_t*)s; s += 4;
00584         *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
00585     }
00586 }
00587 
00588 static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00589 {
00590     const uint8_t *s = src;
00591     const uint8_t *end;
00592 #if HAVE_MMX
00593     const uint8_t *mm_end;
00594 #endif
00595     uint16_t *d = (uint16_t *)dst;
00596     end = s + src_size;
00597 #if HAVE_MMX
00598     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00599     __asm__ volatile(
00600         "movq          %0, %%mm7    \n\t"
00601         "movq          %1, %%mm6    \n\t"
00602         ::"m"(red_15mask),"m"(green_15mask));
00603     mm_end = end - 15;
00604     while (s < mm_end)
00605     {
00606         __asm__ volatile(
00607         PREFETCH"    32%1           \n\t"
00608         "movd          %1, %%mm0    \n\t"
00609         "movd         4%1, %%mm3    \n\t"
00610         "punpckldq    8%1, %%mm0    \n\t"
00611         "punpckldq   12%1, %%mm3    \n\t"
00612         "movq       %%mm0, %%mm1    \n\t"
00613         "movq       %%mm0, %%mm2    \n\t"
00614         "movq       %%mm3, %%mm4    \n\t"
00615         "movq       %%mm3, %%mm5    \n\t"
00616         "psllq         $7, %%mm0    \n\t"
00617         "psllq         $7, %%mm3    \n\t"
00618         "pand       %%mm7, %%mm0    \n\t"
00619         "pand       %%mm7, %%mm3    \n\t"
00620         "psrlq         $6, %%mm1    \n\t"
00621         "psrlq         $6, %%mm4    \n\t"
00622         "pand       %%mm6, %%mm1    \n\t"
00623         "pand       %%mm6, %%mm4    \n\t"
00624         "psrlq        $19, %%mm2    \n\t"
00625         "psrlq        $19, %%mm5    \n\t"
00626         "pand          %2, %%mm2    \n\t"
00627         "pand          %2, %%mm5    \n\t"
00628         "por        %%mm1, %%mm0    \n\t"
00629         "por        %%mm4, %%mm3    \n\t"
00630         "por        %%mm2, %%mm0    \n\t"
00631         "por        %%mm5, %%mm3    \n\t"
00632         "psllq        $16, %%mm3    \n\t"
00633         "por        %%mm3, %%mm0    \n\t"
00634         MOVNTQ"     %%mm0, %0       \n\t"
00635         :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00636         d += 4;
00637         s += 16;
00638     }
00639     __asm__ volatile(SFENCE:::"memory");
00640     __asm__ volatile(EMMS:::"memory");
00641 #endif
00642     while (s < end)
00643     {
00644         register int rgb = *(const uint32_t*)s; s += 4;
00645         *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
00646     }
00647 }
00648 
00649 static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
00650 {
00651     const uint8_t *s = src;
00652     const uint8_t *end;
00653 #if HAVE_MMX
00654     const uint8_t *mm_end;
00655 #endif
00656     uint16_t *d = (uint16_t *)dst;
00657     end = s + src_size;
00658 #if HAVE_MMX
00659     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00660     __asm__ volatile(
00661         "movq         %0, %%mm7     \n\t"
00662         "movq         %1, %%mm6     \n\t"
00663         ::"m"(red_16mask),"m"(green_16mask));
00664     mm_end = end - 11;
00665     while (s < mm_end)
00666     {
00667         __asm__ volatile(
00668         PREFETCH"    32%1           \n\t"
00669         "movd          %1, %%mm0    \n\t"
00670         "movd         3%1, %%mm3    \n\t"
00671         "punpckldq    6%1, %%mm0    \n\t"
00672         "punpckldq    9%1, %%mm3    \n\t"
00673         "movq       %%mm0, %%mm1    \n\t"
00674         "movq       %%mm0, %%mm2    \n\t"
00675         "movq       %%mm3, %%mm4    \n\t"
00676         "movq       %%mm3, %%mm5    \n\t"
00677         "psrlq         $3, %%mm0    \n\t"
00678         "psrlq         $3, %%mm3    \n\t"
00679         "pand          %2, %%mm0    \n\t"
00680         "pand          %2, %%mm3    \n\t"
00681         "psrlq         $5, %%mm1    \n\t"
00682         "psrlq         $5, %%mm4    \n\t"
00683         "pand       %%mm6, %%mm1    \n\t"
00684         "pand       %%mm6, %%mm4    \n\t"
00685         "psrlq         $8, %%mm2    \n\t"
00686         "psrlq         $8, %%mm5    \n\t"
00687         "pand       %%mm7, %%mm2    \n\t"
00688         "pand       %%mm7, %%mm5    \n\t"
00689         "por        %%mm1, %%mm0    \n\t"
00690         "por        %%mm4, %%mm3    \n\t"
00691         "por        %%mm2, %%mm0    \n\t"
00692         "por        %%mm5, %%mm3    \n\t"
00693         "psllq        $16, %%mm3    \n\t"
00694         "por        %%mm3, %%mm0    \n\t"
00695         MOVNTQ"     %%mm0, %0       \n\t"
00696         :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00697         d += 4;
00698         s += 12;
00699     }
00700     __asm__ volatile(SFENCE:::"memory");
00701     __asm__ volatile(EMMS:::"memory");
00702 #endif
00703     while (s < end)
00704     {
00705         const int b = *s++;
00706         const int g = *s++;
00707         const int r = *s++;
00708         *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00709     }
00710 }
00711 
00712 static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
00713 {
00714     const uint8_t *s = src;
00715     const uint8_t *end;
00716 #if HAVE_MMX
00717     const uint8_t *mm_end;
00718 #endif
00719     uint16_t *d = (uint16_t *)dst;
00720     end = s + src_size;
00721 #if HAVE_MMX
00722     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00723     __asm__ volatile(
00724         "movq         %0, %%mm7     \n\t"
00725         "movq         %1, %%mm6     \n\t"
00726         ::"m"(red_16mask),"m"(green_16mask));
00727     mm_end = end - 15;
00728     while (s < mm_end)
00729     {
00730         __asm__ volatile(
00731         PREFETCH"    32%1           \n\t"
00732         "movd          %1, %%mm0    \n\t"
00733         "movd         3%1, %%mm3    \n\t"
00734         "punpckldq    6%1, %%mm0    \n\t"
00735         "punpckldq    9%1, %%mm3    \n\t"
00736         "movq       %%mm0, %%mm1    \n\t"
00737         "movq       %%mm0, %%mm2    \n\t"
00738         "movq       %%mm3, %%mm4    \n\t"
00739         "movq       %%mm3, %%mm5    \n\t"
00740         "psllq         $8, %%mm0    \n\t"
00741         "psllq         $8, %%mm3    \n\t"
00742         "pand       %%mm7, %%mm0    \n\t"
00743         "pand       %%mm7, %%mm3    \n\t"
00744         "psrlq         $5, %%mm1    \n\t"
00745         "psrlq         $5, %%mm4    \n\t"
00746         "pand       %%mm6, %%mm1    \n\t"
00747         "pand       %%mm6, %%mm4    \n\t"
00748         "psrlq        $19, %%mm2    \n\t"
00749         "psrlq        $19, %%mm5    \n\t"
00750         "pand          %2, %%mm2    \n\t"
00751         "pand          %2, %%mm5    \n\t"
00752         "por        %%mm1, %%mm0    \n\t"
00753         "por        %%mm4, %%mm3    \n\t"
00754         "por        %%mm2, %%mm0    \n\t"
00755         "por        %%mm5, %%mm3    \n\t"
00756         "psllq        $16, %%mm3    \n\t"
00757         "por        %%mm3, %%mm0    \n\t"
00758         MOVNTQ"     %%mm0, %0       \n\t"
00759         :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
00760         d += 4;
00761         s += 12;
00762     }
00763     __asm__ volatile(SFENCE:::"memory");
00764     __asm__ volatile(EMMS:::"memory");
00765 #endif
00766     while (s < end)
00767     {
00768         const int r = *s++;
00769         const int g = *s++;
00770         const int b = *s++;
00771         *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
00772     }
00773 }
00774 
00775 static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
00776 {
00777     const uint8_t *s = src;
00778     const uint8_t *end;
00779 #if HAVE_MMX
00780     const uint8_t *mm_end;
00781 #endif
00782     uint16_t *d = (uint16_t *)dst;
00783     end = s + src_size;
00784 #if HAVE_MMX
00785     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00786     __asm__ volatile(
00787         "movq          %0, %%mm7    \n\t"
00788         "movq          %1, %%mm6    \n\t"
00789         ::"m"(red_15mask),"m"(green_15mask));
00790     mm_end = end - 11;
00791     while (s < mm_end)
00792     {
00793         __asm__ volatile(
00794         PREFETCH"    32%1           \n\t"
00795         "movd          %1, %%mm0    \n\t"
00796         "movd         3%1, %%mm3    \n\t"
00797         "punpckldq    6%1, %%mm0    \n\t"
00798         "punpckldq    9%1, %%mm3    \n\t"
00799         "movq       %%mm0, %%mm1    \n\t"
00800         "movq       %%mm0, %%mm2    \n\t"
00801         "movq       %%mm3, %%mm4    \n\t"
00802         "movq       %%mm3, %%mm5    \n\t"
00803         "psrlq         $3, %%mm0    \n\t"
00804         "psrlq         $3, %%mm3    \n\t"
00805         "pand          %2, %%mm0    \n\t"
00806         "pand          %2, %%mm3    \n\t"
00807         "psrlq         $6, %%mm1    \n\t"
00808         "psrlq         $6, %%mm4    \n\t"
00809         "pand       %%mm6, %%mm1    \n\t"
00810         "pand       %%mm6, %%mm4    \n\t"
00811         "psrlq         $9, %%mm2    \n\t"
00812         "psrlq         $9, %%mm5    \n\t"
00813         "pand       %%mm7, %%mm2    \n\t"
00814         "pand       %%mm7, %%mm5    \n\t"
00815         "por        %%mm1, %%mm0    \n\t"
00816         "por        %%mm4, %%mm3    \n\t"
00817         "por        %%mm2, %%mm0    \n\t"
00818         "por        %%mm5, %%mm3    \n\t"
00819         "psllq        $16, %%mm3    \n\t"
00820         "por        %%mm3, %%mm0    \n\t"
00821         MOVNTQ"     %%mm0, %0       \n\t"
00822         :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00823         d += 4;
00824         s += 12;
00825     }
00826     __asm__ volatile(SFENCE:::"memory");
00827     __asm__ volatile(EMMS:::"memory");
00828 #endif
00829     while (s < end)
00830     {
00831         const int b = *s++;
00832         const int g = *s++;
00833         const int r = *s++;
00834         *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00835     }
00836 }
00837 
00838 static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
00839 {
00840     const uint8_t *s = src;
00841     const uint8_t *end;
00842 #if HAVE_MMX
00843     const uint8_t *mm_end;
00844 #endif
00845     uint16_t *d = (uint16_t *)dst;
00846     end = s + src_size;
00847 #if HAVE_MMX
00848     __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
00849     __asm__ volatile(
00850         "movq         %0, %%mm7     \n\t"
00851         "movq         %1, %%mm6     \n\t"
00852         ::"m"(red_15mask),"m"(green_15mask));
00853     mm_end = end - 15;
00854     while (s < mm_end)
00855     {
00856         __asm__ volatile(
00857         PREFETCH"   32%1            \n\t"
00858         "movd         %1, %%mm0     \n\t"
00859         "movd        3%1, %%mm3     \n\t"
00860         "punpckldq   6%1, %%mm0     \n\t"
00861         "punpckldq   9%1, %%mm3     \n\t"
00862         "movq      %%mm0, %%mm1     \n\t"
00863         "movq      %%mm0, %%mm2     \n\t"
00864         "movq      %%mm3, %%mm4     \n\t"
00865         "movq      %%mm3, %%mm5     \n\t"
00866         "psllq        $7, %%mm0     \n\t"
00867         "psllq        $7, %%mm3     \n\t"
00868         "pand      %%mm7, %%mm0     \n\t"
00869         "pand      %%mm7, %%mm3     \n\t"
00870         "psrlq        $6, %%mm1     \n\t"
00871         "psrlq        $6, %%mm4     \n\t"
00872         "pand      %%mm6, %%mm1     \n\t"
00873         "pand      %%mm6, %%mm4     \n\t"
00874         "psrlq       $19, %%mm2     \n\t"
00875         "psrlq       $19, %%mm5     \n\t"
00876         "pand         %2, %%mm2     \n\t"
00877         "pand         %2, %%mm5     \n\t"
00878         "por       %%mm1, %%mm0     \n\t"
00879         "por       %%mm4, %%mm3     \n\t"
00880         "por       %%mm2, %%mm0     \n\t"
00881         "por       %%mm5, %%mm3     \n\t"
00882         "psllq       $16, %%mm3     \n\t"
00883         "por       %%mm3, %%mm0     \n\t"
00884         MOVNTQ"    %%mm0, %0        \n\t"
00885         :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
00886         d += 4;
00887         s += 12;
00888     }
00889     __asm__ volatile(SFENCE:::"memory");
00890     __asm__ volatile(EMMS:::"memory");
00891 #endif
00892     while (s < end)
00893     {
00894         const int r = *s++;
00895         const int g = *s++;
00896         const int b = *s++;
00897         *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
00898     }
00899 }
00900 
00901 /*
00902   I use less accurate approximation here by simply left-shifting the input
00903   value and filling the low order bits with zeroes. This method improves PNG
00904   compression but this scheme cannot reproduce white exactly, since it does
00905   not generate an all-ones maximum value; the net effect is to darken the
00906   image slightly.
00907 
00908   The better method should be "left bit replication":
00909 
00910    4 3 2 1 0
00911    ---------
00912    1 1 0 1 1
00913 
00914    7 6 5 4 3  2 1 0
00915    ----------------
00916    1 1 0 1 1  1 1 0
00917    |=======|  |===|
00918        |      leftmost bits repeated to fill open bits
00919        |
00920    original bits
00921 */
00922 static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
00923 {
00924     const uint16_t *end;
00925 #if HAVE_MMX
00926     const uint16_t *mm_end;
00927 #endif
00928     uint8_t *d = dst;
00929     const uint16_t *s = (const uint16_t*)src;
00930     end = s + src_size/2;
00931 #if HAVE_MMX
00932     __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
00933     mm_end = end - 7;
00934     while (s < mm_end)
00935     {
00936         __asm__ volatile(
00937         PREFETCH"    32%1           \n\t"
00938         "movq          %1, %%mm0    \n\t"
00939         "movq          %1, %%mm1    \n\t"
00940         "movq          %1, %%mm2    \n\t"
00941         "pand          %2, %%mm0    \n\t"
00942         "pand          %3, %%mm1    \n\t"
00943         "pand          %4, %%mm2    \n\t"
00944         "psllq         $3, %%mm0    \n\t"
00945         "psrlq         $2, %%mm1    \n\t"
00946         "psrlq         $7, %%mm2    \n\t"
00947         "movq       %%mm0, %%mm3    \n\t"
00948         "movq       %%mm1, %%mm4    \n\t"
00949         "movq       %%mm2, %%mm5    \n\t"
00950         "punpcklwd     %5, %%mm0    \n\t"
00951         "punpcklwd     %5, %%mm1    \n\t"
00952         "punpcklwd     %5, %%mm2    \n\t"
00953         "punpckhwd     %5, %%mm3    \n\t"
00954         "punpckhwd     %5, %%mm4    \n\t"
00955         "punpckhwd     %5, %%mm5    \n\t"
00956         "psllq         $8, %%mm1    \n\t"
00957         "psllq        $16, %%mm2    \n\t"
00958         "por        %%mm1, %%mm0    \n\t"
00959         "por        %%mm2, %%mm0    \n\t"
00960         "psllq         $8, %%mm4    \n\t"
00961         "psllq        $16, %%mm5    \n\t"
00962         "por        %%mm4, %%mm3    \n\t"
00963         "por        %%mm5, %%mm3    \n\t"
00964 
00965         "movq       %%mm0, %%mm6    \n\t"
00966         "movq       %%mm3, %%mm7    \n\t"
00967 
00968         "movq         8%1, %%mm0    \n\t"
00969         "movq         8%1, %%mm1    \n\t"
00970         "movq         8%1, %%mm2    \n\t"
00971         "pand          %2, %%mm0    \n\t"
00972         "pand          %3, %%mm1    \n\t"
00973         "pand          %4, %%mm2    \n\t"
00974         "psllq         $3, %%mm0    \n\t"
00975         "psrlq         $2, %%mm1    \n\t"
00976         "psrlq         $7, %%mm2    \n\t"
00977         "movq       %%mm0, %%mm3    \n\t"
00978         "movq       %%mm1, %%mm4    \n\t"
00979         "movq       %%mm2, %%mm5    \n\t"
00980         "punpcklwd     %5, %%mm0    \n\t"
00981         "punpcklwd     %5, %%mm1    \n\t"
00982         "punpcklwd     %5, %%mm2    \n\t"
00983         "punpckhwd     %5, %%mm3    \n\t"
00984         "punpckhwd     %5, %%mm4    \n\t"
00985         "punpckhwd     %5, %%mm5    \n\t"
00986         "psllq         $8, %%mm1    \n\t"
00987         "psllq        $16, %%mm2    \n\t"
00988         "por        %%mm1, %%mm0    \n\t"
00989         "por        %%mm2, %%mm0    \n\t"
00990         "psllq         $8, %%mm4    \n\t"
00991         "psllq        $16, %%mm5    \n\t"
00992         "por        %%mm4, %%mm3    \n\t"
00993         "por        %%mm5, %%mm3    \n\t"
00994 
00995         :"=m"(*d)
00996         :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
00997         :"memory");
00998         /* borrowed 32 to 24 */
00999         __asm__ volatile(
01000         "movq       %%mm0, %%mm4    \n\t"
01001         "movq       %%mm3, %%mm5    \n\t"
01002         "movq       %%mm6, %%mm0    \n\t"
01003         "movq       %%mm7, %%mm1    \n\t"
01004 
01005         "movq       %%mm4, %%mm6    \n\t"
01006         "movq       %%mm5, %%mm7    \n\t"
01007         "movq       %%mm0, %%mm2    \n\t"
01008         "movq       %%mm1, %%mm3    \n\t"
01009 
01010         "psrlq         $8, %%mm2    \n\t"
01011         "psrlq         $8, %%mm3    \n\t"
01012         "psrlq         $8, %%mm6    \n\t"
01013         "psrlq         $8, %%mm7    \n\t"
01014         "pand          %2, %%mm0    \n\t"
01015         "pand          %2, %%mm1    \n\t"
01016         "pand          %2, %%mm4    \n\t"
01017         "pand          %2, %%mm5    \n\t"
01018         "pand          %3, %%mm2    \n\t"
01019         "pand          %3, %%mm3    \n\t"
01020         "pand          %3, %%mm6    \n\t"
01021         "pand          %3, %%mm7    \n\t"
01022         "por        %%mm2, %%mm0    \n\t"
01023         "por        %%mm3, %%mm1    \n\t"
01024         "por        %%mm6, %%mm4    \n\t"
01025         "por        %%mm7, %%mm5    \n\t"
01026 
01027         "movq       %%mm1, %%mm2    \n\t"
01028         "movq       %%mm4, %%mm3    \n\t"
01029         "psllq        $48, %%mm2    \n\t"
01030         "psllq        $32, %%mm3    \n\t"
01031         "pand          %4, %%mm2    \n\t"
01032         "pand          %5, %%mm3    \n\t"
01033         "por        %%mm2, %%mm0    \n\t"
01034         "psrlq        $16, %%mm1    \n\t"
01035         "psrlq        $32, %%mm4    \n\t"
01036         "psllq        $16, %%mm5    \n\t"
01037         "por        %%mm3, %%mm1    \n\t"
01038         "pand          %6, %%mm5    \n\t"
01039         "por        %%mm5, %%mm4    \n\t"
01040 
01041         MOVNTQ"     %%mm0,   %0     \n\t"
01042         MOVNTQ"     %%mm1,  8%0     \n\t"
01043         MOVNTQ"     %%mm4, 16%0"
01044 
01045         :"=m"(*d)
01046         :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01047         :"memory");
01048         d += 24;
01049         s += 8;
01050     }
01051     __asm__ volatile(SFENCE:::"memory");
01052     __asm__ volatile(EMMS:::"memory");
01053 #endif
01054     while (s < end)
01055     {
01056         register uint16_t bgr;
01057         bgr = *s++;
01058         *d++ = (bgr&0x1F)<<3;
01059         *d++ = (bgr&0x3E0)>>2;
01060         *d++ = (bgr&0x7C00)>>7;
01061     }
01062 }
01063 
01064 static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01065 {
01066     const uint16_t *end;
01067 #if HAVE_MMX
01068     const uint16_t *mm_end;
01069 #endif
01070     uint8_t *d = (uint8_t *)dst;
01071     const uint16_t *s = (const uint16_t *)src;
01072     end = s + src_size/2;
01073 #if HAVE_MMX
01074     __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
01075     mm_end = end - 7;
01076     while (s < mm_end)
01077     {
01078         __asm__ volatile(
01079         PREFETCH"    32%1           \n\t"
01080         "movq          %1, %%mm0    \n\t"
01081         "movq          %1, %%mm1    \n\t"
01082         "movq          %1, %%mm2    \n\t"
01083         "pand          %2, %%mm0    \n\t"
01084         "pand          %3, %%mm1    \n\t"
01085         "pand          %4, %%mm2    \n\t"
01086         "psllq         $3, %%mm0    \n\t"
01087         "psrlq         $3, %%mm1    \n\t"
01088         "psrlq         $8, %%mm2    \n\t"
01089         "movq       %%mm0, %%mm3    \n\t"
01090         "movq       %%mm1, %%mm4    \n\t"
01091         "movq       %%mm2, %%mm5    \n\t"
01092         "punpcklwd     %5, %%mm0    \n\t"
01093         "punpcklwd     %5, %%mm1    \n\t"
01094         "punpcklwd     %5, %%mm2    \n\t"
01095         "punpckhwd     %5, %%mm3    \n\t"
01096         "punpckhwd     %5, %%mm4    \n\t"
01097         "punpckhwd     %5, %%mm5    \n\t"
01098         "psllq         $8, %%mm1    \n\t"
01099         "psllq        $16, %%mm2    \n\t"
01100         "por        %%mm1, %%mm0    \n\t"
01101         "por        %%mm2, %%mm0    \n\t"
01102         "psllq         $8, %%mm4    \n\t"
01103         "psllq        $16, %%mm5    \n\t"
01104         "por        %%mm4, %%mm3    \n\t"
01105         "por        %%mm5, %%mm3    \n\t"
01106 
01107         "movq       %%mm0, %%mm6    \n\t"
01108         "movq       %%mm3, %%mm7    \n\t"
01109 
01110         "movq         8%1, %%mm0    \n\t"
01111         "movq         8%1, %%mm1    \n\t"
01112         "movq         8%1, %%mm2    \n\t"
01113         "pand          %2, %%mm0    \n\t"
01114         "pand          %3, %%mm1    \n\t"
01115         "pand          %4, %%mm2    \n\t"
01116         "psllq         $3, %%mm0    \n\t"
01117         "psrlq         $3, %%mm1    \n\t"
01118         "psrlq         $8, %%mm2    \n\t"
01119         "movq       %%mm0, %%mm3    \n\t"
01120         "movq       %%mm1, %%mm4    \n\t"
01121         "movq       %%mm2, %%mm5    \n\t"
01122         "punpcklwd     %5, %%mm0    \n\t"
01123         "punpcklwd     %5, %%mm1    \n\t"
01124         "punpcklwd     %5, %%mm2    \n\t"
01125         "punpckhwd     %5, %%mm3    \n\t"
01126         "punpckhwd     %5, %%mm4    \n\t"
01127         "punpckhwd     %5, %%mm5    \n\t"
01128         "psllq         $8, %%mm1    \n\t"
01129         "psllq        $16, %%mm2    \n\t"
01130         "por        %%mm1, %%mm0    \n\t"
01131         "por        %%mm2, %%mm0    \n\t"
01132         "psllq         $8, %%mm4    \n\t"
01133         "psllq        $16, %%mm5    \n\t"
01134         "por        %%mm4, %%mm3    \n\t"
01135         "por        %%mm5, %%mm3    \n\t"
01136         :"=m"(*d)
01137         :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
01138         :"memory");
01139         /* borrowed 32 to 24 */
01140         __asm__ volatile(
01141         "movq       %%mm0, %%mm4    \n\t"
01142         "movq       %%mm3, %%mm5    \n\t"
01143         "movq       %%mm6, %%mm0    \n\t"
01144         "movq       %%mm7, %%mm1    \n\t"
01145 
01146         "movq       %%mm4, %%mm6    \n\t"
01147         "movq       %%mm5, %%mm7    \n\t"
01148         "movq       %%mm0, %%mm2    \n\t"
01149         "movq       %%mm1, %%mm3    \n\t"
01150 
01151         "psrlq         $8, %%mm2    \n\t"
01152         "psrlq         $8, %%mm3    \n\t"
01153         "psrlq         $8, %%mm6    \n\t"
01154         "psrlq         $8, %%mm7    \n\t"
01155         "pand          %2, %%mm0    \n\t"
01156         "pand          %2, %%mm1    \n\t"
01157         "pand          %2, %%mm4    \n\t"
01158         "pand          %2, %%mm5    \n\t"
01159         "pand          %3, %%mm2    \n\t"
01160         "pand          %3, %%mm3    \n\t"
01161         "pand          %3, %%mm6    \n\t"
01162         "pand          %3, %%mm7    \n\t"
01163         "por        %%mm2, %%mm0    \n\t"
01164         "por        %%mm3, %%mm1    \n\t"
01165         "por        %%mm6, %%mm4    \n\t"
01166         "por        %%mm7, %%mm5    \n\t"
01167 
01168         "movq       %%mm1, %%mm2    \n\t"
01169         "movq       %%mm4, %%mm3    \n\t"
01170         "psllq        $48, %%mm2    \n\t"
01171         "psllq        $32, %%mm3    \n\t"
01172         "pand          %4, %%mm2    \n\t"
01173         "pand          %5, %%mm3    \n\t"
01174         "por        %%mm2, %%mm0    \n\t"
01175         "psrlq        $16, %%mm1    \n\t"
01176         "psrlq        $32, %%mm4    \n\t"
01177         "psllq        $16, %%mm5    \n\t"
01178         "por        %%mm3, %%mm1    \n\t"
01179         "pand          %6, %%mm5    \n\t"
01180         "por        %%mm5, %%mm4    \n\t"
01181 
01182         MOVNTQ"     %%mm0,   %0     \n\t"
01183         MOVNTQ"     %%mm1,  8%0     \n\t"
01184         MOVNTQ"     %%mm4, 16%0"
01185 
01186         :"=m"(*d)
01187         :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
01188         :"memory");
01189         d += 24;
01190         s += 8;
01191     }
01192     __asm__ volatile(SFENCE:::"memory");
01193     __asm__ volatile(EMMS:::"memory");
01194 #endif
01195     while (s < end)
01196     {
01197         register uint16_t bgr;
01198         bgr = *s++;
01199         *d++ = (bgr&0x1F)<<3;
01200         *d++ = (bgr&0x7E0)>>3;
01201         *d++ = (bgr&0xF800)>>8;
01202     }
01203 }
01204 
01205 static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
01206 {
01207     const uint16_t *end;
01208 #if HAVE_MMX
01209     const uint16_t *mm_end;
01210 #endif
01211     uint8_t *d = dst;
01212     const uint16_t *s = (const uint16_t *)src;
01213     end = s + src_size/2;
01214 #if HAVE_MMX
01215     __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
01216     __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
01217     mm_end = end - 3;
01218     while (s < mm_end)
01219     {
01220         __asm__ volatile(
01221         PREFETCH"    32%1           \n\t"
01222         "movq          %1, %%mm0    \n\t"
01223         "movq          %1, %%mm1    \n\t"
01224         "movq          %1, %%mm2    \n\t"
01225         "pand          %2, %%mm0    \n\t"
01226         "pand          %3, %%mm1    \n\t"
01227         "pand          %4, %%mm2    \n\t"
01228         "psllq         $3, %%mm0    \n\t"
01229         "psrlq         $2, %%mm1    \n\t"
01230         "psrlq         $7, %%mm2    \n\t"
01231         "movq       %%mm0, %%mm3    \n\t"
01232         "movq       %%mm1, %%mm4    \n\t"
01233         "movq       %%mm2, %%mm5    \n\t"
01234         "punpcklwd  %%mm7, %%mm0    \n\t"
01235         "punpcklwd  %%mm7, %%mm1    \n\t"
01236         "punpcklwd  %%mm7, %%mm2    \n\t"
01237         "punpckhwd  %%mm7, %%mm3    \n\t"
01238         "punpckhwd  %%mm7, %%mm4    \n\t"
01239         "punpckhwd  %%mm7, %%mm5    \n\t"
01240         "psllq         $8, %%mm1    \n\t"
01241         "psllq        $16, %%mm2    \n\t"
01242         "por        %%mm1, %%mm0    \n\t"
01243         "por        %%mm2, %%mm0    \n\t"
01244         "psllq         $8, %%mm4    \n\t"
01245         "psllq        $16, %%mm5    \n\t"
01246         "por        %%mm4, %%mm3    \n\t"
01247         "por        %%mm5, %%mm3    \n\t"
01248         MOVNTQ"     %%mm0,  %0      \n\t"
01249         MOVNTQ"     %%mm3, 8%0      \n\t"
01250         :"=m"(*d)
01251         :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
01252         :"memory");
01253         d += 16;
01254         s += 4;
01255     }
01256     __asm__ volatile(SFENCE:::"memory");
01257     __asm__ volatile(EMMS:::"memory");
01258 #endif
01259     while (s < end)
01260     {
01261 #if 0 //slightly slower on Athlon
01262         int bgr= *s++;
01263         *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
01264 #else
01265         register uint16_t bgr;
01266         bgr = *s++;
01267 #ifdef WORDS_BIGENDIAN
01268         *d++ = 255;
01269         *d++ = (bgr&0x7C00)>>7;
01270         *d++ = (bgr&0x3E0)>>2;
01271         *d++ = (bgr&0x1F)<<3;
01272 #else
01273         *d++ = (bgr&0x1F)<<3;
01274         *d++ = (bgr&0x3E0)>>2;
01275         *d++ = (bgr&0x7C00)>>7;
01276         *d++ = 255;
01277 #endif
01278 
01279 #endif
01280     }
01281 }
01282 
01283 static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
01284 {
01285     const uint16_t *end;
01286 #if HAVE_MMX
01287     const uint16_t *mm_end;
01288 #endif
01289     uint8_t *d = dst;
01290     const uint16_t *s = (const uint16_t*)src;
01291     end = s + src_size/2;
01292 #if HAVE_MMX
01293     __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
01294     __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
01295     mm_end = end - 3;
01296     while (s < mm_end)
01297     {
01298         __asm__ volatile(
01299         PREFETCH"    32%1           \n\t"
01300         "movq          %1, %%mm0    \n\t"
01301         "movq          %1, %%mm1    \n\t"
01302         "movq          %1, %%mm2    \n\t"
01303         "pand          %2, %%mm0    \n\t"
01304         "pand          %3, %%mm1    \n\t"
01305         "pand          %4, %%mm2    \n\t"
01306         "psllq         $3, %%mm0    \n\t"
01307         "psrlq         $3, %%mm1    \n\t"
01308         "psrlq         $8, %%mm2    \n\t"
01309         "movq       %%mm0, %%mm3    \n\t"
01310         "movq       %%mm1, %%mm4    \n\t"
01311         "movq       %%mm2, %%mm5    \n\t"
01312         "punpcklwd  %%mm7, %%mm0    \n\t"
01313         "punpcklwd  %%mm7, %%mm1    \n\t"
01314         "punpcklwd  %%mm7, %%mm2    \n\t"
01315         "punpckhwd  %%mm7, %%mm3    \n\t"
01316         "punpckhwd  %%mm7, %%mm4    \n\t"
01317         "punpckhwd  %%mm7, %%mm5    \n\t"
01318         "psllq         $8, %%mm1    \n\t"
01319         "psllq        $16, %%mm2    \n\t"
01320         "por        %%mm1, %%mm0    \n\t"
01321         "por        %%mm2, %%mm0    \n\t"
01322         "psllq         $8, %%mm4    \n\t"
01323         "psllq        $16, %%mm5    \n\t"
01324         "por        %%mm4, %%mm3    \n\t"
01325         "por        %%mm5, %%mm3    \n\t"
01326         MOVNTQ"     %%mm0, %0       \n\t"
01327         MOVNTQ"     %%mm3, 8%0      \n\t"
01328         :"=m"(*d)
01329         :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
01330         :"memory");
01331         d += 16;
01332         s += 4;
01333     }
01334     __asm__ volatile(SFENCE:::"memory");
01335     __asm__ volatile(EMMS:::"memory");
01336 #endif
01337     while (s < end)
01338     {
01339         register uint16_t bgr;
01340         bgr = *s++;
01341 #ifdef WORDS_BIGENDIAN
01342         *d++ = 255;
01343         *d++ = (bgr&0xF800)>>8;
01344         *d++ = (bgr&0x7E0)>>3;
01345         *d++ = (bgr&0x1F)<<3;
01346 #else
01347         *d++ = (bgr&0x1F)<<3;
01348         *d++ = (bgr&0x7E0)>>3;
01349         *d++ = (bgr&0xF800)>>8;
01350         *d++ = 255;
01351 #endif
01352     }
01353 }
01354 
01355 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
01356 {
01357     long idx = 15 - src_size;
01358     const uint8_t *s = src-idx;
01359     uint8_t *d = dst-idx;
01360 #if HAVE_MMX
01361     __asm__ volatile(
01362     "test          %0, %0           \n\t"
01363     "jns           2f               \n\t"
01364     PREFETCH"       (%1, %0)        \n\t"
01365     "movq          %3, %%mm7        \n\t"
01366     "pxor          %4, %%mm7        \n\t"
01367     "movq       %%mm7, %%mm6        \n\t"
01368     "pxor          %5, %%mm7        \n\t"
01369     ASMALIGN(4)
01370     "1:                             \n\t"
01371     PREFETCH"     32(%1, %0)        \n\t"
01372     "movq           (%1, %0), %%mm0 \n\t"
01373     "movq          8(%1, %0), %%mm1 \n\t"
01374 # if HAVE_MMX2
01375     "pshufw      $177, %%mm0, %%mm3 \n\t"
01376     "pshufw      $177, %%mm1, %%mm5 \n\t"
01377     "pand       %%mm7, %%mm0        \n\t"
01378     "pand       %%mm6, %%mm3        \n\t"
01379     "pand       %%mm7, %%mm1        \n\t"
01380     "pand       %%mm6, %%mm5        \n\t"
01381     "por        %%mm3, %%mm0        \n\t"
01382     "por        %%mm5, %%mm1        \n\t"
01383 # else
01384     "movq       %%mm0, %%mm2        \n\t"
01385     "movq       %%mm1, %%mm4        \n\t"
01386     "pand       %%mm7, %%mm0        \n\t"
01387     "pand       %%mm6, %%mm2        \n\t"
01388     "pand       %%mm7, %%mm1        \n\t"
01389     "pand       %%mm6, %%mm4        \n\t"
01390     "movq       %%mm2, %%mm3        \n\t"
01391     "movq       %%mm4, %%mm5        \n\t"
01392     "pslld        $16, %%mm2        \n\t"
01393     "psrld        $16, %%mm3        \n\t"
01394     "pslld        $16, %%mm4        \n\t"
01395     "psrld        $16, %%mm5        \n\t"
01396     "por        %%mm2, %%mm0        \n\t"
01397     "por        %%mm4, %%mm1        \n\t"
01398     "por        %%mm3, %%mm0        \n\t"
01399     "por        %%mm5, %%mm1        \n\t"
01400 # endif
01401     MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
01402     MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
01403     "add          $16, %0           \n\t"
01404     "js            1b               \n\t"
01405     SFENCE"                         \n\t"
01406     EMMS"                           \n\t"
01407     "2:                             \n\t"
01408     : "+&r"(idx)
01409     : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
01410     : "memory");
01411 #endif
01412     for (; idx<15; idx+=4) {
01413         register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
01414         v &= 0xff00ff;
01415         *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
01416     }
01417 }
01418 
01419 static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
01420 {
01421     unsigned i;
01422 #if HAVE_MMX
01423     long mmx_size= 23 - src_size;
01424     __asm__ volatile (
01425     "test             %%"REG_a", %%"REG_a"          \n\t"
01426     "jns                     2f                     \n\t"
01427     "movq     "MANGLE(mask24r)", %%mm5              \n\t"
01428     "movq     "MANGLE(mask24g)", %%mm6              \n\t"
01429     "movq     "MANGLE(mask24b)", %%mm7              \n\t"
01430     ASMALIGN(4)
01431     "1:                                             \n\t"
01432     PREFETCH" 32(%1, %%"REG_a")                     \n\t"
01433     "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
01434     "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
01435     "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
01436     "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
01437     "pand                 %%mm5, %%mm0              \n\t"
01438     "pand                 %%mm6, %%mm1              \n\t"
01439     "pand                 %%mm7, %%mm2              \n\t"
01440     "por                  %%mm0, %%mm1              \n\t"
01441     "por                  %%mm2, %%mm1              \n\t"
01442     "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
01443     MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
01444     "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
01445     "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
01446     "pand                 %%mm7, %%mm0              \n\t"
01447     "pand                 %%mm5, %%mm1              \n\t"
01448     "pand                 %%mm6, %%mm2              \n\t"
01449     "por                  %%mm0, %%mm1              \n\t"
01450     "por                  %%mm2, %%mm1              \n\t"
01451     "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
01452     MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
01453     "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
01454     "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
01455     "pand                 %%mm6, %%mm0              \n\t"
01456     "pand                 %%mm7, %%mm1              \n\t"
01457     "pand                 %%mm5, %%mm2              \n\t"
01458     "por                  %%mm0, %%mm1              \n\t"
01459     "por                  %%mm2, %%mm1              \n\t"
01460     MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
01461     "add                    $24, %%"REG_a"          \n\t"
01462     " js                     1b                     \n\t"
01463     "2:                                             \n\t"
01464     : "+a" (mmx_size)
01465     : "r" (src-mmx_size), "r"(dst-mmx_size)
01466     );
01467 
01468     __asm__ volatile(SFENCE:::"memory");
01469     __asm__ volatile(EMMS:::"memory");
01470 
01471     if (mmx_size==23) return; //finished, was multiple of 8
01472 
01473     src+= src_size;
01474     dst+= src_size;
01475     src_size= 23-mmx_size;
01476     src-= src_size;
01477     dst-= src_size;
01478 #endif
01479     for (i=0; i<src_size; i+=3)
01480     {
01481         register uint8_t x;
01482         x          = src[i + 2];
01483         dst[i + 1] = src[i + 1];
01484         dst[i + 2] = src[i + 0];
01485         dst[i + 0] = x;
01486     }
01487 }
01488 
01489 static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01490                                            long width, long height,
01491                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01492 {
01493     long y;
01494     const long chromWidth= width>>1;
01495     for (y=0; y<height; y++)
01496     {
01497 #if HAVE_MMX
01498 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
01499         __asm__ volatile(
01500         "xor                 %%"REG_a", %%"REG_a"   \n\t"
01501         ASMALIGN(4)
01502         "1:                                         \n\t"
01503         PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
01504         PREFETCH"    32(%2, %%"REG_a")              \n\t"
01505         PREFETCH"    32(%3, %%"REG_a")              \n\t"
01506         "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
01507         "movq                    %%mm0, %%mm2       \n\t" // U(0)
01508         "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
01509         "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
01510         "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
01511 
01512         "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
01513         "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
01514         "movq                    %%mm3, %%mm4       \n\t" // Y(0)
01515         "movq                    %%mm5, %%mm6       \n\t" // Y(8)
01516         "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
01517         "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
01518         "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
01519         "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
01520 
01521         MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
01522         MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
01523         MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
01524         MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
01525 
01526         "add                        $8, %%"REG_a"   \n\t"
01527         "cmp                        %4, %%"REG_a"   \n\t"
01528         " jb                        1b              \n\t"
01529         ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01530         : "%"REG_a
01531         );
01532 #else
01533 
01534 #if ARCH_ALPHA && HAVE_MVI
01535 #define pl2yuy2(n)                  \
01536     y1 = yc[n];                     \
01537     y2 = yc2[n];                    \
01538     u = uc[n];                      \
01539     v = vc[n];                      \
01540     __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
01541     __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
01542     __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
01543     __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
01544     yuv1 = (u << 8) + (v << 24);                \
01545     yuv2 = yuv1 + y2;               \
01546     yuv1 += y1;                     \
01547     qdst[n]  = yuv1;                \
01548     qdst2[n] = yuv2;
01549 
01550         int i;
01551         uint64_t *qdst = (uint64_t *) dst;
01552         uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
01553         const uint32_t *yc = (uint32_t *) ysrc;
01554         const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
01555         const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
01556         for (i = 0; i < chromWidth; i += 8){
01557             uint64_t y1, y2, yuv1, yuv2;
01558             uint64_t u, v;
01559             /* Prefetch */
01560             __asm__("ldq $31,64(%0)" :: "r"(yc));
01561             __asm__("ldq $31,64(%0)" :: "r"(yc2));
01562             __asm__("ldq $31,64(%0)" :: "r"(uc));
01563             __asm__("ldq $31,64(%0)" :: "r"(vc));
01564 
01565             pl2yuy2(0);
01566             pl2yuy2(1);
01567             pl2yuy2(2);
01568             pl2yuy2(3);
01569 
01570             yc    += 4;
01571             yc2   += 4;
01572             uc    += 4;
01573             vc    += 4;
01574             qdst  += 4;
01575             qdst2 += 4;
01576         }
01577         y++;
01578         ysrc += lumStride;
01579         dst += dstStride;
01580 
01581 #elif HAVE_FAST_64BIT
01582         int i;
01583         uint64_t *ldst = (uint64_t *) dst;
01584         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01585         for (i = 0; i < chromWidth; i += 2){
01586             uint64_t k, l;
01587             k = yc[0] + (uc[0] << 8) +
01588                 (yc[1] << 16) + (vc[0] << 24);
01589             l = yc[2] + (uc[1] << 8) +
01590                 (yc[3] << 16) + (vc[1] << 24);
01591             *ldst++ = k + (l << 32);
01592             yc += 4;
01593             uc += 2;
01594             vc += 2;
01595         }
01596 
01597 #else
01598         int i, *idst = (int32_t *) dst;
01599         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01600         for (i = 0; i < chromWidth; i++){
01601 #ifdef WORDS_BIGENDIAN
01602             *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
01603                 (yc[1] << 8) + (vc[0] << 0);
01604 #else
01605             *idst++ = yc[0] + (uc[0] << 8) +
01606                 (yc[1] << 16) + (vc[0] << 24);
01607 #endif
01608             yc += 2;
01609             uc++;
01610             vc++;
01611         }
01612 #endif
01613 #endif
01614         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01615         {
01616             usrc += chromStride;
01617             vsrc += chromStride;
01618         }
01619         ysrc += lumStride;
01620         dst  += dstStride;
01621     }
01622 #if HAVE_MMX
01623 __asm__(    EMMS"       \n\t"
01624         SFENCE"     \n\t"
01625         :::"memory");
01626 #endif
01627 }
01628 
01633 static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01634                                       long width, long height,
01635                                       long lumStride, long chromStride, long dstStride)
01636 {
01637     //FIXME interpolate chroma
01638     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01639 }
01640 
01641 static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01642                                            long width, long height,
01643                                            long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
01644 {
01645     long y;
01646     const long chromWidth= width>>1;
01647     for (y=0; y<height; y++)
01648     {
01649 #if HAVE_MMX
01650 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
01651         __asm__ volatile(
01652         "xor                %%"REG_a", %%"REG_a"    \n\t"
01653         ASMALIGN(4)
01654         "1:                                         \n\t"
01655         PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
01656         PREFETCH"   32(%2, %%"REG_a")               \n\t"
01657         PREFETCH"   32(%3, %%"REG_a")               \n\t"
01658         "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
01659         "movq                   %%mm0, %%mm2        \n\t" // U(0)
01660         "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
01661         "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
01662         "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
01663 
01664         "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
01665         "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
01666         "movq                   %%mm0, %%mm4        \n\t" // Y(0)
01667         "movq                   %%mm2, %%mm6        \n\t" // Y(8)
01668         "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
01669         "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
01670         "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
01671         "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
01672 
01673         MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
01674         MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
01675         MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
01676         MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
01677 
01678         "add                       $8, %%"REG_a"    \n\t"
01679         "cmp                       %4, %%"REG_a"    \n\t"
01680         " jb                       1b               \n\t"
01681         ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
01682         : "%"REG_a
01683         );
01684 #else
01685 //FIXME adapt the Alpha ASM code from yv12->yuy2
01686 
01687 #if HAVE_FAST_64BIT
01688         int i;
01689         uint64_t *ldst = (uint64_t *) dst;
01690         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01691         for (i = 0; i < chromWidth; i += 2){
01692             uint64_t k, l;
01693             k = uc[0] + (yc[0] << 8) +
01694                 (vc[0] << 16) + (yc[1] << 24);
01695             l = uc[1] + (yc[2] << 8) +
01696                 (vc[1] << 16) + (yc[3] << 24);
01697             *ldst++ = k + (l << 32);
01698             yc += 4;
01699             uc += 2;
01700             vc += 2;
01701         }
01702 
01703 #else
01704         int i, *idst = (int32_t *) dst;
01705         const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
01706         for (i = 0; i < chromWidth; i++){
01707 #ifdef WORDS_BIGENDIAN
01708             *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
01709                 (vc[0] << 8) + (yc[1] << 0);
01710 #else
01711             *idst++ = uc[0] + (yc[0] << 8) +
01712                (vc[0] << 16) + (yc[1] << 24);
01713 #endif
01714             yc += 2;
01715             uc++;
01716             vc++;
01717         }
01718 #endif
01719 #endif
01720         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
01721         {
01722             usrc += chromStride;
01723             vsrc += chromStride;
01724         }
01725         ysrc += lumStride;
01726         dst += dstStride;
01727     }
01728 #if HAVE_MMX
01729 __asm__(    EMMS"       \n\t"
01730         SFENCE"     \n\t"
01731         :::"memory");
01732 #endif
01733 }
01734 
01739 static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01740                                       long width, long height,
01741                                       long lumStride, long chromStride, long dstStride)
01742 {
01743     //FIXME interpolate chroma
01744     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
01745 }
01746 
01750 static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01751                                          long width, long height,
01752                                          long lumStride, long chromStride, long dstStride)
01753 {
01754     RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01755 }
01756 
01760 static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
01761                                          long width, long height,
01762                                          long lumStride, long chromStride, long dstStride)
01763 {
01764     RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
01765 }
01766 
01771 static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01772                                       long width, long height,
01773                                       long lumStride, long chromStride, long srcStride)
01774 {
01775     long y;
01776     const long chromWidth= width>>1;
01777     for (y=0; y<height; y+=2)
01778     {
01779 #if HAVE_MMX
01780         __asm__ volatile(
01781         "xor                 %%"REG_a", %%"REG_a"   \n\t"
01782         "pcmpeqw                 %%mm7, %%mm7       \n\t"
01783         "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
01784         ASMALIGN(4)
01785         "1:                \n\t"
01786         PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
01787         "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
01788         "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
01789         "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
01790         "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
01791         "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
01792         "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
01793         "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
01794         "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
01795         "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
01796         "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
01797 
01798         MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
01799 
01800         "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
01801         "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
01802         "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
01803         "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
01804         "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
01805         "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
01806         "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
01807         "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
01808         "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
01809         "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
01810 
01811         MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
01812 
01813         "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
01814         "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
01815         "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
01816         "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
01817         "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
01818         "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
01819         "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
01820         "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
01821 
01822         MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
01823         MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
01824 
01825         "add                        $8, %%"REG_a"   \n\t"
01826         "cmp                        %4, %%"REG_a"   \n\t"
01827         " jb                        1b              \n\t"
01828         ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01829         : "memory", "%"REG_a
01830         );
01831 
01832         ydst += lumStride;
01833         src  += srcStride;
01834 
01835         __asm__ volatile(
01836         "xor                 %%"REG_a", %%"REG_a"   \n\t"
01837         ASMALIGN(4)
01838         "1:                                         \n\t"
01839         PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
01840         "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
01841         "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
01842         "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
01843         "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
01844         "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
01845         "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
01846         "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
01847         "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
01848         "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
01849         "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
01850 
01851         MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
01852         MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
01853 
01854         "add                        $8, %%"REG_a"   \n\t"
01855         "cmp                        %4, %%"REG_a"   \n\t"
01856         " jb                        1b              \n\t"
01857 
01858         ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
01859         : "memory", "%"REG_a
01860         );
01861 #else
01862         long i;
01863         for (i=0; i<chromWidth; i++)
01864         {
01865             ydst[2*i+0]     = src[4*i+0];
01866             udst[i]     = src[4*i+1];
01867             ydst[2*i+1]     = src[4*i+2];
01868             vdst[i]     = src[4*i+3];
01869         }
01870         ydst += lumStride;
01871         src  += srcStride;
01872 
01873         for (i=0; i<chromWidth; i++)
01874         {
01875             ydst[2*i+0]     = src[4*i+0];
01876             ydst[2*i+1]     = src[4*i+2];
01877         }
01878 #endif
01879         udst += chromStride;
01880         vdst += chromStride;
01881         ydst += lumStride;
01882         src  += srcStride;
01883     }
01884 #if HAVE_MMX
01885 __asm__ volatile(   EMMS"       \n\t"
01886                 SFENCE"     \n\t"
01887                 :::"memory");
01888 #endif
01889 }
01890 
01891 static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
01892                                       uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
01893                                       long width, long height, long lumStride, long chromStride)
01894 {
01895     /* Y Plane */
01896     memcpy(ydst, ysrc, width*height);
01897 
01898     /* XXX: implement upscaling for U,V */
01899 }
01900 
01901 static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
01902 {
01903     long x,y;
01904 
01905     dst[0]= src[0];
01906 
01907     // first line
01908     for (x=0; x<srcWidth-1; x++){
01909         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
01910         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
01911     }
01912     dst[2*srcWidth-1]= src[srcWidth-1];
01913 
01914         dst+= dstStride;
01915 
01916     for (y=1; y<srcHeight; y++){
01917 #if HAVE_MMX2 || HAVE_AMD3DNOW
01918         const long mmxSize= srcWidth&~15;
01919         __asm__ volatile(
01920         "mov           %4, %%"REG_a"            \n\t"
01921         "1:                                     \n\t"
01922         "movq         (%0, %%"REG_a"), %%mm0    \n\t"
01923         "movq         (%1, %%"REG_a"), %%mm1    \n\t"
01924         "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
01925         "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
01926         "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
01927         "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
01928         PAVGB"                  %%mm0, %%mm5    \n\t"
01929         PAVGB"                  %%mm0, %%mm3    \n\t"
01930         PAVGB"                  %%mm0, %%mm5    \n\t"
01931         PAVGB"                  %%mm0, %%mm3    \n\t"
01932         PAVGB"                  %%mm1, %%mm4    \n\t"
01933         PAVGB"                  %%mm1, %%mm2    \n\t"
01934         PAVGB"                  %%mm1, %%mm4    \n\t"
01935         PAVGB"                  %%mm1, %%mm2    \n\t"
01936         "movq                   %%mm5, %%mm7    \n\t"
01937         "movq                   %%mm4, %%mm6    \n\t"
01938         "punpcklbw              %%mm3, %%mm5    \n\t"
01939         "punpckhbw              %%mm3, %%mm7    \n\t"
01940         "punpcklbw              %%mm2, %%mm4    \n\t"
01941         "punpckhbw              %%mm2, %%mm6    \n\t"
01942 #if 1
01943         MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
01944         MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
01945         MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
01946         MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
01947 #else
01948         "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
01949         "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
01950         "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
01951         "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
01952 #endif
01953         "add                       $8, %%"REG_a"            \n\t"
01954         " js                       1b                       \n\t"
01955         :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
01956            "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
01957            "g" (-mmxSize)
01958         : "%"REG_a
01959 
01960         );
01961 #else
01962         const long mmxSize=1;
01963 #endif
01964         dst[0        ]= (3*src[0] +   src[srcStride])>>2;
01965         dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
01966 
01967         for (x=mmxSize-1; x<srcWidth-1; x++){
01968             dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
01969             dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
01970             dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
01971             dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
01972         }
01973         dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
01974         dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
01975 
01976         dst+=dstStride*2;
01977         src+=srcStride;
01978     }
01979 
01980     // last line
01981 #if 1
01982     dst[0]= src[0];
01983 
01984     for (x=0; x<srcWidth-1; x++){
01985         dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
01986         dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
01987     }
01988     dst[2*srcWidth-1]= src[srcWidth-1];
01989 #else
01990     for (x=0; x<srcWidth; x++){
01991         dst[2*x+0]=
01992         dst[2*x+1]= src[x];
01993     }
01994 #endif
01995 
01996 #if HAVE_MMX
01997 __asm__ volatile(   EMMS"       \n\t"
01998                 SFENCE"     \n\t"
01999                 :::"memory");
02000 #endif
02001 }
02002 
02009 static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02010                                       long width, long height,
02011                                       long lumStride, long chromStride, long srcStride)
02012 {
02013     long y;
02014     const long chromWidth= width>>1;
02015     for (y=0; y<height; y+=2)
02016     {
02017 #if HAVE_MMX
02018         __asm__ volatile(
02019         "xor                 %%"REG_a", %%"REG_a"   \n\t"
02020         "pcmpeqw             %%mm7, %%mm7   \n\t"
02021         "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
02022         ASMALIGN(4)
02023         "1:                                 \n\t"
02024         PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
02025         "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
02026         "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
02027         "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
02028         "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
02029         "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
02030         "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
02031         "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
02032         "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
02033         "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
02034         "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
02035 
02036         MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
02037 
02038         "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
02039         "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
02040         "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
02041         "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
02042         "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
02043         "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
02044         "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
02045         "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
02046         "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
02047         "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
02048 
02049         MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
02050 
02051         "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
02052         "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
02053         "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
02054         "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
02055         "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
02056         "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
02057         "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
02058         "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
02059 
02060         MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
02061         MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
02062 
02063         "add                    $8, %%"REG_a"   \n\t"
02064         "cmp                    %4, %%"REG_a"   \n\t"
02065         " jb                    1b          \n\t"
02066         ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02067         : "memory", "%"REG_a
02068         );
02069 
02070         ydst += lumStride;
02071         src  += srcStride;
02072 
02073         __asm__ volatile(
02074         "xor                 %%"REG_a", %%"REG_a"   \n\t"
02075         ASMALIGN(4)
02076         "1:                                 \n\t"
02077         PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
02078         "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
02079         "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
02080         "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
02081         "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
02082         "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
02083         "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
02084         "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
02085         "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
02086         "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
02087         "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
02088 
02089         MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
02090         MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
02091 
02092         "add                    $8, %%"REG_a"   \n\t"
02093         "cmp                    %4, %%"REG_a"   \n\t"
02094         " jb                    1b          \n\t"
02095 
02096         ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
02097         : "memory", "%"REG_a
02098         );
02099 #else
02100         long i;
02101         for (i=0; i<chromWidth; i++)
02102         {
02103             udst[i]     = src[4*i+0];
02104             ydst[2*i+0] = src[4*i+1];
02105             vdst[i]     = src[4*i+2];
02106             ydst[2*i+1] = src[4*i+3];
02107         }
02108         ydst += lumStride;
02109         src  += srcStride;
02110 
02111         for (i=0; i<chromWidth; i++)
02112         {
02113             ydst[2*i+0] = src[4*i+1];
02114             ydst[2*i+1] = src[4*i+3];
02115         }
02116 #endif
02117         udst += chromStride;
02118         vdst += chromStride;
02119         ydst += lumStride;
02120         src  += srcStride;
02121     }
02122 #if HAVE_MMX
02123 __asm__ volatile(   EMMS"       \n\t"
02124                 SFENCE"     \n\t"
02125                 :::"memory");
02126 #endif
02127 }
02128 
02136 static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
02137                                        long width, long height,
02138                                        long lumStride, long chromStride, long srcStride)
02139 {
02140     long y;
02141     const long chromWidth= width>>1;
02142 #if HAVE_MMX
02143     for (y=0; y<height-2; y+=2)
02144     {
02145         long i;
02146         for (i=0; i<2; i++)
02147         {
02148             __asm__ volatile(
02149             "mov                        %2, %%"REG_a"   \n\t"
02150             "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
02151             "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
02152             "pxor                    %%mm7, %%mm7       \n\t"
02153             "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
02154             ASMALIGN(4)
02155             "1:                                         \n\t"
02156             PREFETCH"    64(%0, %%"REG_d")              \n\t"
02157             "movd          (%0, %%"REG_d"), %%mm0       \n\t"
02158             "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
02159             "punpcklbw               %%mm7, %%mm0       \n\t"
02160             "punpcklbw               %%mm7, %%mm1       \n\t"
02161             "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
02162             "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
02163             "punpcklbw               %%mm7, %%mm2       \n\t"
02164             "punpcklbw               %%mm7, %%mm3       \n\t"
02165             "pmaddwd                 %%mm6, %%mm0       \n\t"
02166             "pmaddwd                 %%mm6, %%mm1       \n\t"
02167             "pmaddwd                 %%mm6, %%mm2       \n\t"
02168             "pmaddwd                 %%mm6, %%mm3       \n\t"
02169 #ifndef FAST_BGR2YV12
02170             "psrad                      $8, %%mm0       \n\t"
02171             "psrad                      $8, %%mm1       \n\t"
02172             "psrad                      $8, %%mm2       \n\t"
02173             "psrad                      $8, %%mm3       \n\t"
02174 #endif
02175             "packssdw                %%mm1, %%mm0       \n\t"
02176             "packssdw                %%mm3, %%mm2       \n\t"
02177             "pmaddwd                 %%mm5, %%mm0       \n\t"
02178             "pmaddwd                 %%mm5, %%mm2       \n\t"
02179             "packssdw                %%mm2, %%mm0       \n\t"
02180             "psraw                      $7, %%mm0       \n\t"
02181 
02182             "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
02183             "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
02184             "punpcklbw               %%mm7, %%mm4       \n\t"
02185             "punpcklbw               %%mm7, %%mm1       \n\t"
02186             "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
02187             "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
02188             "punpcklbw               %%mm7, %%mm2       \n\t"
02189             "punpcklbw               %%mm7, %%mm3       \n\t"
02190             "pmaddwd                 %%mm6, %%mm4       \n\t"
02191             "pmaddwd                 %%mm6, %%mm1       \n\t"
02192             "pmaddwd                 %%mm6, %%mm2       \n\t"
02193             "pmaddwd                 %%mm6, %%mm3       \n\t"
02194 #ifndef FAST_BGR2YV12
02195             "psrad                      $8, %%mm4       \n\t"
02196             "psrad                      $8, %%mm1       \n\t"
02197             "psrad                      $8, %%mm2       \n\t"
02198             "psrad                      $8, %%mm3       \n\t"
02199 #endif
02200             "packssdw                %%mm1, %%mm4       \n\t"
02201             "packssdw                %%mm3, %%mm2       \n\t"
02202             "pmaddwd                 %%mm5, %%mm4       \n\t"
02203             "pmaddwd                 %%mm5, %%mm2       \n\t"
02204             "add                       $24, %%"REG_d"   \n\t"
02205             "packssdw                %%mm2, %%mm4       \n\t"
02206             "psraw                      $7, %%mm4       \n\t"
02207 
02208             "packuswb                %%mm4, %%mm0       \n\t"
02209             "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
02210 
02211             MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
02212             "add                        $8,      %%"REG_a"  \n\t"
02213             " js                        1b                  \n\t"
02214             : : "r" (src+width*3), "r" (ydst+width), "g" (-width)
02215             : "%"REG_a, "%"REG_d
02216             );
02217             ydst += lumStride;
02218             src  += srcStride;
02219         }
02220         src -= srcStride*2;
02221         __asm__ volatile(
02222         "mov                        %4, %%"REG_a"   \n\t"
02223         "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
02224         "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
02225         "pxor                    %%mm7, %%mm7       \n\t"
02226         "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
02227         "add                 %%"REG_d", %%"REG_d"   \n\t"
02228         ASMALIGN(4)
02229         "1:                                         \n\t"
02230         PREFETCH"    64(%0, %%"REG_d")              \n\t"
02231         PREFETCH"    64(%1, %%"REG_d")              \n\t"
02232 #if HAVE_MMX2 || HAVE_AMD3DNOW
02233         "movq          (%0, %%"REG_d"), %%mm0       \n\t"
02234         "movq          (%1, %%"REG_d"), %%mm1       \n\t"
02235         "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
02236         "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
02237         PAVGB"                   %%mm1, %%mm0       \n\t"
02238         PAVGB"                   %%mm3, %%mm2       \n\t"
02239         "movq                    %%mm0, %%mm1       \n\t"
02240         "movq                    %%mm2, %%mm3       \n\t"
02241         "psrlq                     $24, %%mm0       \n\t"
02242         "psrlq                     $24, %%mm2       \n\t"
02243         PAVGB"                   %%mm1, %%mm0       \n\t"
02244         PAVGB"                   %%mm3, %%mm2       \n\t"
02245         "punpcklbw               %%mm7, %%mm0       \n\t"
02246         "punpcklbw               %%mm7, %%mm2       \n\t"
02247 #else
02248         "movd          (%0, %%"REG_d"), %%mm0       \n\t"
02249         "movd          (%1, %%"REG_d"), %%mm1       \n\t"
02250         "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
02251         "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
02252         "punpcklbw               %%mm7, %%mm0       \n\t"
02253         "punpcklbw               %%mm7, %%mm1       \n\t"
02254         "punpcklbw               %%mm7, %%mm2       \n\t"
02255         "punpcklbw               %%mm7, %%mm3       \n\t"
02256         "paddw                   %%mm1, %%mm0       \n\t"
02257         "paddw                   %%mm3, %%mm2       \n\t"
02258         "paddw                   %%mm2, %%mm0       \n\t"
02259         "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
02260         "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
02261         "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
02262         "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
02263         "punpcklbw               %%mm7, %%mm4       \n\t"
02264         "punpcklbw               %%mm7, %%mm1       \n\t"
02265         "punpcklbw               %%mm7, %%mm2       \n\t"
02266         "punpcklbw               %%mm7, %%mm3       \n\t"
02267         "paddw                   %%mm1, %%mm4       \n\t"
02268         "paddw                   %%mm3, %%mm2       \n\t"
02269         "paddw                   %%mm4, %%mm2       \n\t"
02270         "psrlw                      $2, %%mm0       \n\t"
02271         "psrlw                      $2, %%mm2       \n\t"
02272 #endif
02273         "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
02274         "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
02275 
02276         "pmaddwd                 %%mm0, %%mm1       \n\t"
02277         "pmaddwd                 %%mm2, %%mm3       \n\t"
02278         "pmaddwd                 %%mm6, %%mm0       \n\t"
02279         "pmaddwd                 %%mm6, %%mm2       \n\t"
02280 #ifndef FAST_BGR2YV12
02281         "psrad                      $8, %%mm0       \n\t"
02282         "psrad                      $8, %%mm1       \n\t"
02283         "psrad                      $8, %%mm2       \n\t"
02284         "psrad                      $8, %%mm3       \n\t"
02285 #endif
02286         "packssdw                %%mm2, %%mm0       \n\t"
02287         "packssdw                %%mm3, %%mm1       \n\t"
02288         "pmaddwd                 %%mm5, %%mm0       \n\t"
02289         "pmaddwd                 %%mm5, %%mm1       \n\t"
02290         "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
02291         "psraw                      $7, %%mm0       \n\t"
02292 
02293 #if HAVE_MMX2 || HAVE_AMD3DNOW
02294         "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
02295         "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
02296         "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
02297         "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
02298         PAVGB"                   %%mm1, %%mm4       \n\t"
02299         PAVGB"                   %%mm3, %%mm2       \n\t"
02300         "movq                    %%mm4, %%mm1       \n\t"
02301         "movq                    %%mm2, %%mm3       \n\t"
02302         "psrlq                     $24, %%mm4       \n\t"
02303         "psrlq                     $24, %%mm2       \n\t"
02304         PAVGB"                   %%mm1, %%mm4       \n\t"
02305         PAVGB"                   %%mm3, %%mm2       \n\t"
02306         "punpcklbw               %%mm7, %%mm4       \n\t"
02307         "punpcklbw               %%mm7, %%mm2       \n\t"
02308 #else
02309         "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
02310         "movd        12(%1, %%"REG_d"), %%mm1       \n\t"
02311         "movd        15(%0, %%"REG_d"), %%mm2       \n\t"
02312         "movd        15(%1, %%"REG_d"), %%mm3       \n\t"
02313         "punpcklbw               %%mm7, %%mm4       \n\t"
02314         "punpcklbw               %%mm7, %%mm1       \n\t"
02315         "punpcklbw               %%mm7, %%mm2       \n\t"
02316         "punpcklbw               %%mm7, %%mm3       \n\t"
02317         "paddw                   %%mm1, %%mm4       \n\t"
02318         "paddw                   %%mm3, %%mm2       \n\t"
02319         "paddw                   %%mm2, %%mm4       \n\t"
02320         "movd        18(%0, %%"REG_d"), %%mm5       \n\t"
02321         "movd        18(%1, %%"REG_d"), %%mm1       \n\t"
02322         "movd        21(%0, %%"REG_d"), %%mm2       \n\t"
02323         "movd        21(%1, %%"REG_d"), %%mm3       \n\t"
02324         "punpcklbw               %%mm7, %%mm5       \n\t"
02325         "punpcklbw               %%mm7, %%mm1       \n\t"
02326         "punpcklbw               %%mm7, %%mm2       \n\t"
02327         "punpcklbw               %%mm7, %%mm3       \n\t"
02328         "paddw                   %%mm1, %%mm5       \n\t"
02329         "paddw                   %%mm3, %%mm2       \n\t"
02330         "paddw                   %%mm5, %%mm2       \n\t"
02331         "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
02332         "psrlw                      $2, %%mm4       \n\t"
02333         "psrlw                      $2, %%mm2       \n\t"
02334 #endif
02335         "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
02336         "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
02337 
02338         "pmaddwd                 %%mm4, %%mm1       \n\t"
02339         "pmaddwd                 %%mm2, %%mm3       \n\t"
02340         "pmaddwd                 %%mm6, %%mm4       \n\t"
02341         "pmaddwd                 %%mm6, %%mm2       \n\t"
02342 #ifndef FAST_BGR2YV12
02343         "psrad                      $8, %%mm4       \n\t"
02344         "psrad                      $8, %%mm1       \n\t"
02345         "psrad                      $8, %%mm2       \n\t"
02346         "psrad                      $8, %%mm3       \n\t"
02347 #endif
02348         "packssdw                %%mm2, %%mm4       \n\t"
02349         "packssdw                %%mm3, %%mm1       \n\t"
02350         "pmaddwd                 %%mm5, %%mm4       \n\t"
02351         "pmaddwd                 %%mm5, %%mm1       \n\t"
02352         "add                       $24, %%"REG_d"   \n\t"
02353         "packssdw                %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
02354         "psraw                      $7, %%mm4       \n\t"
02355 
02356         "movq                    %%mm0, %%mm1           \n\t"
02357         "punpckldq               %%mm4, %%mm0           \n\t"
02358         "punpckhdq               %%mm4, %%mm1           \n\t"
02359         "packsswb                %%mm1, %%mm0           \n\t"
02360         "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0         \n\t"
02361         "movd                    %%mm0, (%2, %%"REG_a") \n\t"
02362         "punpckhdq               %%mm0, %%mm0           \n\t"
02363         "movd                    %%mm0, (%3, %%"REG_a") \n\t"
02364         "add                        $4, %%"REG_a"       \n\t"
02365         " js                        1b                  \n\t"
02366         : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
02367         : "%"REG_a, "%"REG_d
02368         );
02369 
02370         udst += chromStride;
02371         vdst += chromStride;
02372         src  += srcStride*2;
02373     }
02374 
02375     __asm__ volatile(   EMMS"       \n\t"
02376                     SFENCE"     \n\t"
02377                     :::"memory");
02378 #else
02379     y=0;
02380 #endif
02381     for (; y<height; y+=2)
02382     {
02383         long i;
02384         for (i=0; i<chromWidth; i++)
02385         {
02386             unsigned int b = src[6*i+0];
02387             unsigned int g = src[6*i+1];
02388             unsigned int r = src[6*i+2];
02389 
02390             unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02391             unsigned int V  =  ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128;
02392             unsigned int U  =  ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128;
02393 
02394             udst[i]     = U;
02395             vdst[i]     = V;
02396             ydst[2*i]   = Y;
02397 
02398             b = src[6*i+3];
02399             g = src[6*i+4];
02400             r = src[6*i+5];
02401 
02402             Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02403             ydst[2*i+1]     = Y;
02404         }
02405         ydst += lumStride;
02406         src  += srcStride;
02407 
02408         for (i=0; i<chromWidth; i++)
02409         {
02410             unsigned int b = src[6*i+0];
02411             unsigned int g = src[6*i+1];
02412             unsigned int r = src[6*i+2];
02413 
02414             unsigned int Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02415 
02416             ydst[2*i]     = Y;
02417 
02418             b = src[6*i+3];
02419             g = src[6*i+4];
02420             r = src[6*i+5];
02421 
02422             Y  =  ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
02423             ydst[2*i+1]     = Y;
02424         }
02425         udst += chromStride;
02426         vdst += chromStride;
02427         ydst += lumStride;
02428         src  += srcStride;
02429     }
02430 }
02431 
02432 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
02433                              long width, long height, long src1Stride,
02434                              long src2Stride, long dstStride){
02435     long h;
02436 
02437     for (h=0; h < height; h++)
02438     {
02439         long w;
02440 
02441 #if HAVE_MMX
02442 #if HAVE_SSE2
02443         __asm__(
02444         "xor              %%"REG_a", %%"REG_a"  \n\t"
02445         "1:                                     \n\t"
02446         PREFETCH" 64(%1, %%"REG_a")             \n\t"
02447         PREFETCH" 64(%2, %%"REG_a")             \n\t"
02448         "movdqa     (%1, %%"REG_a"), %%xmm0     \n\t"
02449         "movdqa     (%1, %%"REG_a"), %%xmm1     \n\t"
02450         "movdqa     (%2, %%"REG_a"), %%xmm2     \n\t"
02451         "punpcklbw           %%xmm2, %%xmm0     \n\t"
02452         "punpckhbw           %%xmm2, %%xmm1     \n\t"
02453         "movntdq             %%xmm0,   (%0, %%"REG_a", 2)   \n\t"
02454         "movntdq             %%xmm1, 16(%0, %%"REG_a", 2)   \n\t"
02455         "add                    $16, %%"REG_a"  \n\t"
02456         "cmp                     %3, %%"REG_a"  \n\t"
02457         " jb                     1b             \n\t"
02458         ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02459         : "memory", "%"REG_a""
02460         );
02461 #else
02462         __asm__(
02463         "xor %%"REG_a", %%"REG_a"               \n\t"
02464         "1:                                     \n\t"
02465         PREFETCH" 64(%1, %%"REG_a")             \n\t"
02466         PREFETCH" 64(%2, %%"REG_a")             \n\t"
02467         "movq       (%1, %%"REG_a"), %%mm0      \n\t"
02468         "movq      8(%1, %%"REG_a"), %%mm2      \n\t"
02469         "movq                 %%mm0, %%mm1      \n\t"
02470         "movq                 %%mm2, %%mm3      \n\t"
02471         "movq       (%2, %%"REG_a"), %%mm4      \n\t"
02472         "movq      8(%2, %%"REG_a"), %%mm5      \n\t"
02473         "punpcklbw            %%mm4, %%mm0      \n\t"
02474         "punpckhbw            %%mm4, %%mm1      \n\t"
02475         "punpcklbw            %%mm5, %%mm2      \n\t"
02476         "punpckhbw            %%mm5, %%mm3      \n\t"
02477         MOVNTQ"               %%mm0,   (%0, %%"REG_a", 2)   \n\t"
02478         MOVNTQ"               %%mm1,  8(%0, %%"REG_a", 2)   \n\t"
02479         MOVNTQ"               %%mm2, 16(%0, %%"REG_a", 2)   \n\t"
02480         MOVNTQ"               %%mm3, 24(%0, %%"REG_a", 2)   \n\t"
02481         "add                    $16, %%"REG_a"  \n\t"
02482         "cmp                     %3, %%"REG_a"  \n\t"
02483         " jb                     1b             \n\t"
02484         ::"r"(dest), "r"(src1), "r"(src2), "r" (width-15)
02485         : "memory", "%"REG_a
02486         );
02487 #endif
02488         for (w= (width&(~15)); w < width; w++)
02489         {
02490             dest[2*w+0] = src1[w];
02491             dest[2*w+1] = src2[w];
02492         }
02493 #else
02494         for (w=0; w < width; w++)
02495         {
02496             dest[2*w+0] = src1[w];
02497             dest[2*w+1] = src2[w];
02498         }
02499 #endif
02500         dest += dstStride;
02501                 src1 += src1Stride;
02502                 src2 += src2Stride;
02503     }
02504 #if HAVE_MMX
02505     __asm__(
02506         EMMS"       \n\t"
02507         SFENCE"     \n\t"
02508         ::: "memory"
02509         );
02510 #endif
02511 }
02512 
02513 static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
02514                                        uint8_t *dst1, uint8_t *dst2,
02515                                        long width, long height,
02516                                        long srcStride1, long srcStride2,
02517                                        long dstStride1, long dstStride2)
02518 {
02519     long y,x,w,h;
02520     w=width/2; h=height/2;
02521 #if HAVE_MMX
02522     __asm__ volatile(
02523     PREFETCH" %0    \n\t"
02524     PREFETCH" %1    \n\t"
02525     ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
02526 #endif
02527     for (y=0;y<h;y++){
02528     const uint8_t* s1=src1+srcStride1*(y>>1);
02529     uint8_t* d=dst1+dstStride1*y;
02530     x=0;
02531 #if HAVE_MMX
02532     for (;x<w-31;x+=32)
02533     {
02534         __asm__ volatile(
02535         PREFETCH"   32%1        \n\t"
02536         "movq         %1, %%mm0 \n\t"
02537         "movq        8%1, %%mm2 \n\t"
02538         "movq       16%1, %%mm4 \n\t"
02539         "movq       24%1, %%mm6 \n\t"
02540         "movq      %%mm0, %%mm1 \n\t"
02541         "movq      %%mm2, %%mm3 \n\t"
02542         "movq      %%mm4, %%mm5 \n\t"
02543         "movq      %%mm6, %%mm7 \n\t"
02544         "punpcklbw %%mm0, %%mm0 \n\t"
02545         "punpckhbw %%mm1, %%mm1 \n\t"
02546         "punpcklbw %%mm2, %%mm2 \n\t"
02547         "punpckhbw %%mm3, %%mm3 \n\t"
02548         "punpcklbw %%mm4, %%mm4 \n\t"
02549         "punpckhbw %%mm5, %%mm5 \n\t"
02550         "punpcklbw %%mm6, %%mm6 \n\t"
02551         "punpckhbw %%mm7, %%mm7 \n\t"
02552         MOVNTQ"    %%mm0,   %0  \n\t"
02553         MOVNTQ"    %%mm1,  8%0  \n\t"
02554         MOVNTQ"    %%mm2, 16%0  \n\t"
02555         MOVNTQ"    %%mm3, 24%0  \n\t"
02556         MOVNTQ"    %%mm4, 32%0  \n\t"
02557         MOVNTQ"    %%mm5, 40%0  \n\t"
02558         MOVNTQ"    %%mm6, 48%0  \n\t"
02559         MOVNTQ"    %%mm7, 56%0"
02560         :"=m"(d[2*x])
02561         :"m"(s1[x])
02562         :"memory");
02563     }
02564 #endif
02565     for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
02566     }
02567     for (y=0;y<h;y++){
02568     const uint8_t* s2=src2+srcStride2*(y>>1);
02569     uint8_t* d=dst2+dstStride2*y;
02570     x=0;
02571 #if HAVE_MMX
02572     for (;x<w-31;x+=32)
02573     {
02574         __asm__ volatile(
02575         PREFETCH"   32%1        \n\t"
02576         "movq         %1, %%mm0 \n\t"
02577         "movq        8%1, %%mm2 \n\t"
02578         "movq       16%1, %%mm4 \n\t"
02579         "movq       24%1, %%mm6 \n\t"
02580         "movq      %%mm0, %%mm1 \n\t"
02581         "movq      %%mm2, %%mm3 \n\t"
02582         "movq      %%mm4, %%mm5 \n\t"
02583         "movq      %%mm6, %%mm7 \n\t"
02584         "punpcklbw %%mm0, %%mm0 \n\t"
02585         "punpckhbw %%mm1, %%mm1 \n\t"
02586         "punpcklbw %%mm2, %%mm2 \n\t"
02587         "punpckhbw %%mm3, %%mm3 \n\t"
02588         "punpcklbw %%mm4, %%mm4 \n\t"
02589         "punpckhbw %%mm5, %%mm5 \n\t"
02590         "punpcklbw %%mm6, %%mm6 \n\t"
02591         "punpckhbw %%mm7, %%mm7 \n\t"
02592         MOVNTQ"    %%mm0,   %0  \n\t"
02593         MOVNTQ"    %%mm1,  8%0  \n\t"
02594         MOVNTQ"    %%mm2, 16%0  \n\t"
02595         MOVNTQ"    %%mm3, 24%0  \n\t"
02596         MOVNTQ"    %%mm4, 32%0  \n\t"
02597         MOVNTQ"    %%mm5, 40%0  \n\t"
02598         MOVNTQ"    %%mm6, 48%0  \n\t"
02599         MOVNTQ"    %%mm7, 56%0"
02600         :"=m"(d[2*x])
02601         :"m"(s2[x])
02602         :"memory");
02603     }
02604 #endif
02605     for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x];
02606     }
02607 #if HAVE_MMX
02608     __asm__(
02609         EMMS"       \n\t"
02610         SFENCE"     \n\t"
02611         ::: "memory"
02612         );
02613 #endif
02614 }
02615 
02616 static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
02617                                         uint8_t *dst,
02618                                         long width, long height,
02619                                         long srcStride1, long srcStride2,
02620                                         long srcStride3, long dstStride)
02621 {
02622     long y,x,w,h;
02623     w=width/2; h=height;
02624     for (y=0;y<h;y++){
02625     const uint8_t* yp=src1+srcStride1*y;
02626     const uint8_t* up=src2+srcStride2*(y>>2);
02627     const uint8_t* vp=src3+srcStride3*(y>>2);
02628     uint8_t* d=dst+dstStride*y;
02629     x=0;
02630 #if HAVE_MMX
02631     for (;x<w-7;x+=8)
02632     {
02633         __asm__ volatile(
02634         PREFETCH"   32(%1, %0)          \n\t"
02635         PREFETCH"   32(%2, %0)          \n\t"
02636         PREFETCH"   32(%3, %0)          \n\t"
02637         "movq      (%1, %0, 4), %%mm0   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
02638         "movq         (%2, %0), %%mm1   \n\t" /* U0U1U2U3U4U5U6U7 */
02639         "movq         (%3, %0), %%mm2   \n\t" /* V0V1V2V3V4V5V6V7 */
02640         "movq            %%mm0, %%mm3   \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
02641         "movq            %%mm1, %%mm4   \n\t" /* U0U1U2U3U4U5U6U7 */
02642         "movq            %%mm2, %%mm5   \n\t" /* V0V1V2V3V4V5V6V7 */
02643         "punpcklbw       %%mm1, %%mm1   \n\t" /* U0U0 U1U1 U2U2 U3U3 */
02644         "punpcklbw       %%mm2, %%mm2   \n\t" /* V0V0 V1V1 V2V2 V3V3 */
02645         "punpckhbw       %%mm4, %%mm4   \n\t" /* U4U4 U5U5 U6U6 U7U7 */
02646         "punpckhbw       %%mm5, %%mm5   \n\t" /* V4V4 V5V5 V6V6 V7V7 */
02647 
02648         "movq            %%mm1, %%mm6   \n\t"
02649         "punpcklbw       %%mm2, %%mm1   \n\t" /* U0V0 U0V0 U1V1 U1V1*/
02650         "punpcklbw       %%mm1, %%mm0   \n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
02651         "punpckhbw       %%mm1, %%mm3   \n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
02652         MOVNTQ"          %%mm0,  (%4, %0, 8)    \n\t"
02653         MOVNTQ"          %%mm3, 8(%4, %0, 8)    \n\t"
02654 
02655         "punpckhbw       %%mm2, %%mm6   \n\t" /* U2V2 U2V2 U3V3 U3V3*/
02656         "movq     8(%1, %0, 4), %%mm0   \n\t"
02657         "movq            %%mm0, %%mm3   \n\t"
02658         "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U2 Y V2 Y U2 Y V2*/
02659         "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U3 Y V3 Y U3 Y V3*/
02660         MOVNTQ"          %%mm0, 16(%4, %0, 8)   \n\t"
02661         MOVNTQ"          %%mm3, 24(%4, %0, 8)   \n\t"
02662 
02663         "movq            %%mm4, %%mm6   \n\t"
02664         "movq    16(%1, %0, 4), %%mm0   \n\t"
02665         "movq            %%mm0, %%mm3   \n\t"
02666         "punpcklbw       %%mm5, %%mm4   \n\t"
02667         "punpcklbw       %%mm4, %%mm0   \n\t" /* Y U4 Y V4 Y U4 Y V4*/
02668         "punpckhbw       %%mm4, %%mm3   \n\t" /* Y U5 Y V5 Y U5 Y V5*/
02669         MOVNTQ"          %%mm0, 32(%4, %0, 8)   \n\t"
02670         MOVNTQ"          %%mm3, 40(%4, %0, 8)   \n\t"
02671 
02672         "punpckhbw       %%mm5, %%mm6   \n\t"
02673         "movq    24(%1, %0, 4), %%mm0   \n\t"
02674         "movq            %%mm0, %%mm3   \n\t"
02675         "punpcklbw       %%mm6, %%mm0   \n\t" /* Y U6 Y V6 Y U6 Y V6*/
02676         "punpckhbw       %%mm6, %%mm3   \n\t" /* Y U7 Y V7 Y U7 Y V7*/
02677         MOVNTQ"          %%mm0, 48(%4, %0, 8)   \n\t"
02678         MOVNTQ"          %%mm3, 56(%4, %0, 8)   \n\t"
02679 
02680         : "+r" (x)
02681         : "r"(yp), "r" (up), "r"(vp), "r"(d)
02682         :"memory");
02683     }
02684 #endif
02685     for (; x<w; x++)
02686     {
02687         const long x2 = x<<2;
02688         d[8*x+0] = yp[x2];
02689         d[8*x+1] = up[x];
02690         d[8*x+2] = yp[x2+1];
02691         d[8*x+3] = vp[x];
02692         d[8*x+4] = yp[x2+2];
02693         d[8*x+5] = up[x];
02694         d[8*x+6] = yp[x2+3];
02695         d[8*x+7] = vp[x];
02696     }
02697     }
02698 #if HAVE_MMX
02699     __asm__(
02700         EMMS"       \n\t"
02701         SFENCE"     \n\t"
02702         ::: "memory"
02703         );
02704 #endif
02705 }
02706 
02707 static inline void RENAME(rgb2rgb_init)(void){
02708     rgb15to16       = RENAME(rgb15to16);
02709     rgb15tobgr24    = RENAME(rgb15tobgr24);
02710     rgb15to32       = RENAME(rgb15to32);
02711     rgb16tobgr24    = RENAME(rgb16tobgr24);
02712     rgb16to32       = RENAME(rgb16to32);
02713     rgb16to15       = RENAME(rgb16to15);
02714     rgb24tobgr16    = RENAME(rgb24tobgr16);
02715     rgb24tobgr15    = RENAME(rgb24tobgr15);
02716     rgb24tobgr32    = RENAME(rgb24tobgr32);
02717     rgb32to16       = RENAME(rgb32to16);
02718     rgb32to15       = RENAME(rgb32to15);
02719     rgb32tobgr24    = RENAME(rgb32tobgr24);
02720     rgb24to15       = RENAME(rgb24to15);
02721     rgb24to16       = RENAME(rgb24to16);
02722     rgb24tobgr24    = RENAME(rgb24tobgr24);
02723     rgb32tobgr32    = RENAME(rgb32tobgr32);
02724     rgb32tobgr16    = RENAME(rgb32tobgr16);
02725     rgb32tobgr15    = RENAME(rgb32tobgr15);
02726     yv12toyuy2      = RENAME(yv12toyuy2);
02727     yv12touyvy      = RENAME(yv12touyvy);
02728     yuv422ptoyuy2   = RENAME(yuv422ptoyuy2);
02729     yuv422ptouyvy   = RENAME(yuv422ptouyvy);
02730     yuy2toyv12      = RENAME(yuy2toyv12);
02731 //    uyvytoyv12      = RENAME(uyvytoyv12);
02732 //    yvu9toyv12      = RENAME(yvu9toyv12);
02733     planar2x        = RENAME(planar2x);
02734     rgb24toyv12     = RENAME(rgb24toyv12);
02735     interleaveBytes = RENAME(interleaveBytes);
02736     vu9_to_vu12     = RENAME(vu9_to_vu12);
02737     yvu9_to_yuy2    = RENAME(yvu9_to_yuy2);
02738 }

Generated on Tue Nov 4 2014 12:59:24 for ffmpeg by  doxygen 1.7.1