annotate ffmpeg/libavutil/x86/cpu.c @ 13:844d341cf643 tip

Back up before ISMIR
author Yading Song <yading.song@eecs.qmul.ac.uk>
date Thu, 31 Oct 2013 13:17:06 +0000
parents f445c3017523
children
rev   line source
yading@11 1 /*
yading@11 2 * CPU detection code, extracted from mmx.h
yading@11 3 * (c)1997-99 by H. Dietz and R. Fisher
yading@11 4 * Converted to C and improved by Fabrice Bellard.
yading@11 5 *
yading@11 6 * This file is part of FFmpeg.
yading@11 7 *
yading@11 8 * FFmpeg is free software; you can redistribute it and/or
yading@11 9 * modify it under the terms of the GNU Lesser General Public
yading@11 10 * License as published by the Free Software Foundation; either
yading@11 11 * version 2.1 of the License, or (at your option) any later version.
yading@11 12 *
yading@11 13 * FFmpeg is distributed in the hope that it will be useful,
yading@11 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
yading@11 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
yading@11 16 * Lesser General Public License for more details.
yading@11 17 *
yading@11 18 * You should have received a copy of the GNU Lesser General Public
yading@11 19 * License along with FFmpeg; if not, write to the Free Software
yading@11 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
yading@11 21 */
yading@11 22
yading@11 23 #include <stdlib.h>
yading@11 24 #include <string.h>
yading@11 25
yading@11 26 #include "libavutil/x86/asm.h"
yading@11 27 #include "libavutil/x86/cpu.h"
yading@11 28 #include "libavutil/cpu.h"
yading@11 29
yading@11 30 #if HAVE_YASM
yading@11 31
yading@11 32 #define cpuid(index, eax, ebx, ecx, edx) \
yading@11 33 ff_cpu_cpuid(index, &eax, &ebx, &ecx, &edx)
yading@11 34
yading@11 35 #define xgetbv(index, eax, edx) \
yading@11 36 ff_cpu_xgetbv(index, &eax, &edx)
yading@11 37
yading@11 38 #elif HAVE_INLINE_ASM
yading@11 39
yading@11 40 /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
yading@11 41 #define cpuid(index, eax, ebx, ecx, edx) \
yading@11 42 __asm__ volatile ( \
yading@11 43 "mov %%"REG_b", %%"REG_S" \n\t" \
yading@11 44 "cpuid \n\t" \
yading@11 45 "xchg %%"REG_b", %%"REG_S \
yading@11 46 : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
yading@11 47 : "0" (index))
yading@11 48
yading@11 49 #define xgetbv(index, eax, edx) \
yading@11 50 __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
yading@11 51
yading@11 52 #define get_eflags(x) \
yading@11 53 __asm__ volatile ("pushfl \n" \
yading@11 54 "pop %0 \n" \
yading@11 55 : "=r"(x))
yading@11 56
yading@11 57 #define set_eflags(x) \
yading@11 58 __asm__ volatile ("push %0 \n" \
yading@11 59 "popfl \n" \
yading@11 60 :: "r"(x))
yading@11 61
yading@11 62 #endif /* HAVE_INLINE_ASM */
yading@11 63
yading@11 64 #if ARCH_X86_64
yading@11 65
yading@11 66 #define cpuid_test() 1
yading@11 67
yading@11 68 #elif HAVE_YASM
yading@11 69
yading@11 70 #define cpuid_test ff_cpu_cpuid_test
yading@11 71
yading@11 72 #elif HAVE_INLINE_ASM
yading@11 73
yading@11 74 static int cpuid_test(void)
yading@11 75 {
yading@11 76 x86_reg a, c;
yading@11 77
yading@11 78 /* Check if CPUID is supported by attempting to toggle the ID bit in
yading@11 79 * the EFLAGS register. */
yading@11 80 get_eflags(a);
yading@11 81 set_eflags(a ^ 0x200000);
yading@11 82 get_eflags(c);
yading@11 83
yading@11 84 return a != c;
yading@11 85 }
yading@11 86 #endif
yading@11 87
yading@11 88 /* Function to test if multimedia instructions are supported... */
yading@11 89 int ff_get_cpu_flags_x86(void)
yading@11 90 {
yading@11 91 int rval = 0;
yading@11 92
yading@11 93 #ifdef cpuid
yading@11 94
yading@11 95 int eax, ebx, ecx, edx;
yading@11 96 int max_std_level, max_ext_level, std_caps = 0, ext_caps = 0;
yading@11 97 int family = 0, model = 0;
yading@11 98 union { int i[3]; char c[12]; } vendor;
yading@11 99
yading@11 100 if (!cpuid_test())
yading@11 101 return 0; /* CPUID not supported */
yading@11 102
yading@11 103 cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]);
yading@11 104
yading@11 105 if (max_std_level >= 1) {
yading@11 106 cpuid(1, eax, ebx, ecx, std_caps);
yading@11 107 family = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
yading@11 108 model = ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0);
yading@11 109 if (std_caps & (1 << 15))
yading@11 110 rval |= AV_CPU_FLAG_CMOV;
yading@11 111 if (std_caps & (1 << 23))
yading@11 112 rval |= AV_CPU_FLAG_MMX;
yading@11 113 if (std_caps & (1 << 25))
yading@11 114 rval |= AV_CPU_FLAG_MMXEXT;
yading@11 115 #if HAVE_SSE
yading@11 116 if (std_caps & (1 << 25))
yading@11 117 rval |= AV_CPU_FLAG_SSE;
yading@11 118 if (std_caps & (1 << 26))
yading@11 119 rval |= AV_CPU_FLAG_SSE2;
yading@11 120 if (ecx & 1)
yading@11 121 rval |= AV_CPU_FLAG_SSE3;
yading@11 122 if (ecx & 0x00000200 )
yading@11 123 rval |= AV_CPU_FLAG_SSSE3;
yading@11 124 if (ecx & 0x00080000 )
yading@11 125 rval |= AV_CPU_FLAG_SSE4;
yading@11 126 if (ecx & 0x00100000 )
yading@11 127 rval |= AV_CPU_FLAG_SSE42;
yading@11 128 #if HAVE_AVX
yading@11 129 /* Check OXSAVE and AVX bits */
yading@11 130 if ((ecx & 0x18000000) == 0x18000000) {
yading@11 131 /* Check for OS support */
yading@11 132 xgetbv(0, eax, edx);
yading@11 133 if ((eax & 0x6) == 0x6)
yading@11 134 rval |= AV_CPU_FLAG_AVX;
yading@11 135 }
yading@11 136 #endif /* HAVE_AVX */
yading@11 137 #endif /* HAVE_SSE */
yading@11 138 }
yading@11 139
yading@11 140 cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
yading@11 141
yading@11 142 if (max_ext_level >= 0x80000001) {
yading@11 143 cpuid(0x80000001, eax, ebx, ecx, ext_caps);
yading@11 144 if (ext_caps & (1U << 31))
yading@11 145 rval |= AV_CPU_FLAG_3DNOW;
yading@11 146 if (ext_caps & (1 << 30))
yading@11 147 rval |= AV_CPU_FLAG_3DNOWEXT;
yading@11 148 if (ext_caps & (1 << 23))
yading@11 149 rval |= AV_CPU_FLAG_MMX;
yading@11 150 if (ext_caps & (1 << 22))
yading@11 151 rval |= AV_CPU_FLAG_MMXEXT;
yading@11 152
yading@11 153 /* Allow for selectively disabling SSE2 functions on AMD processors
yading@11 154 with SSE2 support but not SSE4a. This includes Athlon64, some
yading@11 155 Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
yading@11 156 than SSE2 often enough to utilize this special-case flag.
yading@11 157 AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
yading@11 158 so that SSE2 is used unless explicitly disabled by checking
yading@11 159 AV_CPU_FLAG_SSE2SLOW. */
yading@11 160 if (!strncmp(vendor.c, "AuthenticAMD", 12) &&
yading@11 161 rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) {
yading@11 162 rval |= AV_CPU_FLAG_SSE2SLOW;
yading@11 163 }
yading@11 164
yading@11 165 /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
yading@11 166 * used unless the OS has AVX support. */
yading@11 167 if (rval & AV_CPU_FLAG_AVX) {
yading@11 168 if (ecx & 0x00000800)
yading@11 169 rval |= AV_CPU_FLAG_XOP;
yading@11 170 if (ecx & 0x00010000)
yading@11 171 rval |= AV_CPU_FLAG_FMA4;
yading@11 172 }
yading@11 173 }
yading@11 174
yading@11 175 if (!strncmp(vendor.c, "GenuineIntel", 12)) {
yading@11 176 if (family == 6 && (model == 9 || model == 13 || model == 14)) {
yading@11 177 /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
yading@11 178 * 6/14 (core1 "yonah") theoretically support sse2, but it's
yading@11 179 * usually slower than mmx, so let's just pretend they don't.
yading@11 180 * AV_CPU_FLAG_SSE2 is disabled and AV_CPU_FLAG_SSE2SLOW is
yading@11 181 * enabled so that SSE2 is not used unless explicitly enabled
yading@11 182 * by checking AV_CPU_FLAG_SSE2SLOW. The same situation
yading@11 183 * applies for AV_CPU_FLAG_SSE3 and AV_CPU_FLAG_SSE3SLOW. */
yading@11 184 if (rval & AV_CPU_FLAG_SSE2)
yading@11 185 rval ^= AV_CPU_FLAG_SSE2SLOW | AV_CPU_FLAG_SSE2;
yading@11 186 if (rval & AV_CPU_FLAG_SSE3)
yading@11 187 rval ^= AV_CPU_FLAG_SSE3SLOW | AV_CPU_FLAG_SSE3;
yading@11 188 }
yading@11 189 /* The Atom processor has SSSE3 support, which is useful in many cases,
yading@11 190 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
yading@11 191 * on the Atom, but is generally faster on other processors supporting
yading@11 192 * SSSE3. This flag allows for selectively disabling certain SSSE3
yading@11 193 * functions on the Atom. */
yading@11 194 if (family == 6 && model == 28)
yading@11 195 rval |= AV_CPU_FLAG_ATOM;
yading@11 196 }
yading@11 197
yading@11 198 #endif /* cpuid */
yading@11 199
yading@11 200 return rval;
yading@11 201 }