diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bb815f5..fc07e0f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -859,14 +859,16 @@ static inline void *reg_address(struct pt_regs *regs, char w, u8 reg) static struct { unsigned long cmov; unsigned long nopl; unsigned long fcomi; unsigned long fucomi; unsigned long fcmov; + unsigned long sse; + unsigned long sse2; } emulated_ops_counter = { - 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0 }; static int emulated_ops_proc_show(struct seq_file *m, void *v) { #if defined(CONFIG_CPU_EMU686) seq_printf(m, "cmov: %lu\n", emulated_ops_counter.cmov); #endif @@ -876,32 +878,370 @@ static int emulated_ops_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_CPU_EMU_FUCOMI seq_printf(m, "fcomi: %lu\n" "fucomi: %lu\n" "fcmov: %lu\n", emulated_ops_counter.fcomi, emulated_ops_counter.fucomi, emulated_ops_counter.fcmov); #endif +#ifdef CONFIG_CPU_EMU_SSE2 + seq_printf(m, "sse: %lu\n" "sse2: %lu\n", + emulated_ops_counter.sse, + emulated_ops_counter.sse2); +#endif return 0; } static int emulated_ops_proc_open(struct inode *inode, struct file *file) { return single_open(file, emulated_ops_proc_show, NULL); } + +/* "echo 0 > /proc/emulated_ops" to clear the counter */ +static ssize_t emulated_ops_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +{ + + if (count) { + char c; + if (get_user(c, buf)) + return -EFAULT; + + if (c != '0') + return count; + + emulated_ops_counter.cmov = 0; + emulated_ops_counter.nopl = 0; + emulated_ops_counter.fcomi = 0; + emulated_ops_counter.fucomi = 0; + emulated_ops_counter.fcmov = 0; + emulated_ops_counter.sse = 0; + emulated_ops_counter.sse2 = 0; + } + return count; +} + static const struct file_operations emulated_ops_proc_fops = { .open = emulated_ops_proc_open, .read = seq_read, + .write = emulated_ops_proc_write, .llseek = seq_lseek, .release = single_release, }; static int __init proc_emulated_ops_init(void) { - proc_create("emulated_ops", 0, NULL, &emulated_ops_proc_fops); + proc_create("emulated_ops", 0644, NULL, &emulated_ops_proc_fops); return 0; } module_init(proc_emulated_ops_init); #endif /* CONFIG_CPU_PROC_EMULATED_OPS */ +#ifdef CONFIG_CPU_EMU_SSE2 +#define EMU_COUNT_SSE(eip) emulated_ops_counter.sse++ +#define EMU_COUNT_SSE2(eip) emulated_ops_counter.sse2++ +#else +#define EMU_COUNT_SSE(eip) +#define EMU_COUNT_SSE2(eip) +#endif + +#ifdef CONFIG_CPU_EMU_SSE2 /*{*/ +/* current->thread.fpu.state is still NULL, so cram XMM into struct fpu */ +#define XMMREG_ADDR(reg) (u32*)(¤t->thread.fpu.xmm[(reg)&7]) + +static int +do_mov128(struct pt_regs *regs, long error_code, int to_reg) + /* 0F 10|11 movups */ + /* 0F 28|29 movaps */ + /* 66 0F 10|11 movupd */ + /* 66 0F 28|29 movapd */ + /* 66 0F E7 movntdq */ + /* F3 0F 6F|7F movdqu */ +{ + /* movupd: move unaligned 128bit XMM/mem to XMM */ + u8 *eip = (u8*)regs->ip; + u32 *src, *dst; + u8 xdst, modrm; + if (eip[0]==0x66 || eip[0]==0xF3) eip++; + + modrm = eip[2]; + eip += 3; /* skips opcodes and modrm */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + if (!to_reg) { + /* do we ever have this? */ + return 0; + } + src = XMMREG_ADDR(modrm & 7); + dst[0]=src[0]; + dst[1]=src[1]; + dst[2]=src[2]; + dst[3]=src[3]; + regs->ip = (u32)eip; + return 1; + } + src = modrm_address(regs, &eip, 1, modrm); + if (to_reg) { + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return 0; + } + dst[0]=src[0]; + dst[1]=src[1]; + dst[2]=src[2]; + dst[3]=src[3]; + } else { + if (!access_ok(VERIFY_WRITE, (void *)src, 16)) { + do_general_protection(regs, error_code); + return 0; + } + src[0]=dst[0]; + src[1]=dst[1]; + src[2]=dst[2]; + src[3]=dst[3]; + } + regs->ip = (u32)eip; + return 1; +} /* 66 0F 10|11 movupd */ + +static int +do_andps(struct pt_regs *regs, long error_code) + /* 0F 54 andps */ + /* 0F 55 andnps */ + /* 0F 56 orps */ + /* 0F 57 xorps */ +{ + u32 *src, *dst; + u8 *eip = (u8 *)regs->ip; + u8 xdst, modrm; + u8 eip1 = eip[1]; + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips opcodes and modrm */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return 0; + } + } + switch (eip1) { + case 0x54: + dst[0] &= src[0]; + dst[1] &= src[1]; + dst[2] &= src[2]; + dst[3] &= src[3]; break; + case 0x55: + dst[0] = (~dst[0]) & src[0]; + dst[1] = (~dst[1]) & src[1]; + dst[2] = (~dst[2]) & src[2]; + dst[3] = (~dst[3]) & src[3]; break; + case 0x56: + dst[0] |= src[0]; + dst[1] |= src[1]; + dst[2] |= src[2]; + dst[3] |= src[3]; break; + case 0x57: + dst[0] ^= src[0]; + dst[1] ^= src[1]; + dst[2] ^= src[2]; + dst[3] ^= src[3]; break; + default: return 0; + } + + regs->ip = (u32)eip; + return 1; +} /* 0F 54 andps */ /* 0F 55 andnps */ /* 0F 56 orps */ /* 0F 57 xorps */ + +static int +do_subps(struct pt_regs *regs, long error_code) + /* 0F 58 addps */ + /* 0F 59 mulps */ + /* 0F 5C subps */ + /* 0F 5D minps */ + /* 0F 5E divps */ + /* 0F 5F maxps */ +{ + u32 *src, *dst; + u8 *eip = (u8 *)regs->ip; + u8 xdst, modrm; + u8 eip1 = eip[1]; + int i; + char fsave[112]; + u16 fpsw; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips opcodes and modrm */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return 0; + } + } + + __asm__ __volatile__ ("fsave %0\n\t" : "=m" (fsave)); + + switch (eip1) { + case 0x58: /* addps */ + for (i=0;i<4;i++) { + /* dst[i] -= src[i] */ + __asm__ __volatile__( + "flds (%0)\n\t" + "fadds (%1)\n\t" + "fstps (%0)\n\t" + : + : "r" (&dst[i]), "r" (&src[i]) + ); + } + break; + case 0x59: /* mulps */ + for (i=0;i<4;i++) { + /* dst[i] -= src[i] */ + __asm__ __volatile__( + "flds (%0)\n\t" + "fmuls (%1)\n\t" + "fstps (%0)\n\t" + : + : "r" (&dst[i]), "r" (&src[i]) + ); + } + break; + case 0x5C: /* subps */ + for (i=0;i<4;i++) { + /* dst[i] -= src[i] */ + __asm__ __volatile__( + "flds (%0)\n\t" + "fsubs (%1)\n\t" + "fstps (%0)\n\t" + : + : "r" (&dst[i]), "r" (&src[i]) + ); + } + break; + case 0x5D: /* minps */ + for (i=0;i<4;i++) { + /* dst[i] = MIN(dst[i],src[i]) */ + __asm__ __volatile__( + "flds (%1)\n\t" + "flds (%2)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %0\n\t" + : "=r" (fpsw) + : "r" (&dst[i]), "r" (&src[i]) + : "ax" + ); +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + if (fpsw & FP_C2) /*unordered*/ { + dst[i] = src[i]; /* minps does so */ + } else if (fpsw & FP_C0) { + dst[i] = src[i]; /* src was smaller; rewrite */ + } + } + break; + case 0x5E: /* divps */ + for (i=0;i<4;i++) { + /* dst[i] -= src[i] */ + __asm__ __volatile__( + "flds (%0)\n\t" + "fdivs (%1)\n\t" + "fstps (%0)\n\t" + : + : "r" (&dst[i]), "r" (&src[i]) + ); + } + break; + case 0x5F: /* maxps */ + for (i=0;i<4;i++) { + /* dst[i] = MAX(dst[i],src[i]) */ + __asm__ __volatile__( + "flds (%1)\n\t" + "flds (%2)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %0\n\t" + : "=r" (fpsw) + : "r" (&src[i]), "r" (&dst[i]) + : "ax" + ); + if (fpsw & FP_C2) /*unordered*/ { + dst[i] = src[i]; /* maxps does so */ + } else if (fpsw & FP_C0) { + dst[i] = src[i]; /* src was larger; rewrite */ + } + } + break; + default: return 0; + } + __asm__ __volatile__ ("frstor %0\n\t" : "=m" (fsave)); + + regs->ip = (u32)eip; + return 1; +} /* 0F 5C|5D|5E|5F subps,minps,divps,maxps */ + +static int +do_movss(struct pt_regs *regs, long error_code) + /* F3 0F 10|11 movss: move 32bit float from XMM/mem to XMM */ +{ + u32 *src, *dst; + u8 *eip = (u8 *)regs->ip; + u8 xdst, modrm; + int to_reg = (eip[2]==0x10); + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips opcodes and modrm */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + if (!to_reg) { + /* do we ever have this? */ + return 0; + } + *dst = *src; /*32bit*/ + regs->ip = (u32)eip; + return 1; + } + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (to_reg) { + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return 0; + } + *dst = *src; /*32bit*/ + dst[1] = 0; /* other bits cleared */ + dst[2] = 0; + dst[3] = 0; + } else { + if (!access_ok(VERIFY_WRITE, (void *)src, 4)) { + do_general_protection(regs, error_code); + return 0; + } + *src = *dst; /*32bit*/ + } + regs->ip = (u32)eip; + return 1; +} /* F3 0F 10|11 movss */ + +#endif /* CONFIG_CPU_EMU_SSE2 } */ + /* [do_invalid_op] is called by exception 6 after an invalid opcode has been * encountered. It will decode the prefixes and the instruction code, to try * to emulate it, and will send a SIGILL or SIGSEGV to the process if not * possible. * REP/REPN prefixes are not supported anymore because it didn't make sense * to emulate instructions prefixed with such opcodes since no arch-specific @@ -923,12 +1263,15 @@ dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code) PREFIX_LOCK = 256, PREFIX_REPN = 512, PREFIX_REP = 1024 } prefixes = 0; u32 *src, *dst; u8 *eip = (u8 *)regs->ip; +#ifdef CONFIG_CPU_EMU_SSE2 + u32 sse_iter = 0; +#endif #ifdef CONFIG_GEODE_NOPL /*do_nopl_emu*/ { int res = is_nopl(eip); if (res) { @@ -1020,12 +1363,13 @@ dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code) #endif /* CONFIG_CPU_EMU_FUCOMI */ #ifdef BENCH_CPU_EXCEPTION_BUT_NOT_THE_CODE regs->ip += 3; return; #endif + /* we'll first read all known opcode prefixes, and discard obviously invalid combinations.*/ while (1) { /* prefix for CMOV, BSWAP, CMPXCHG, XADD */ if (*eip == 0x0F) { eip++; @@ -1298,18 +1642,3198 @@ dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code) } } /* if this opcode has not been processed, it's not a prefix. */ break; } /*while(1)*/ +sse_again: eip = (u8 *)regs->ip; /* reset */ +#if defined(CONFIG_CPU_EMU_SSE2) /*{*/ + + if (eip[0] == 0x0F && (eip[1] == 0x10 || eip[1] == 0x11)) { + /* 0F 10|11 movups */ + if (!do_mov128(regs, error_code, (eip[1]==0x10))) goto invalid_opcode; + EMU_COUNT_SSE(eip); + goto sse_return; + } + + if (eip[0] == 0x0F && (eip[1] == 0x12 || eip[1] == 0x13) && (eip[2]&0xC0) != 0xC0) { + /* movlps: move 64 bit mem/XMM from/to XMM */ + u8 xdst, modrm; + u8 to_reg = (eip[1] == 0x12); + + modrm = eip[2]; + eip += 3; /* skips all the opcodes */ + EMU_COUNT_SSE(eip); + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + src = modrm_address(regs, &eip, 1, modrm); + + /* we must verify that src is valid for this task */ + if (!access_ok((to_reg)?(VERIFY_READ):(VERIFY_WRITE), (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + + if (to_reg) { + dst[0] = src[0]; + dst[1] = src[1]; + } else { + src[0] = dst[0]; + src[1] = dst[1]; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 12|13 /r movlps */ + + if (eip[0] == 0x0F && eip[1] == 0x12 && (eip[2] & 0xC0) == 0xC0) { + /* movhlps: move high 64bit to low 64bit XMM */ + u8 xdst, modrm; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + src = XMMREG_ADDR(modrm & 7); + + dst[0] = src[2]; + dst[1] = src[3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 12 /r movhlps */ + + if (eip[0] == 0x0F && eip[1] == 0x14) { + /* unpcklps: interleave unpack lower 2 32bits */ + u8 xdst, modrm; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + dst[3] = src[1]; + dst[2] = dst[1]; + dst[1] = src[0]; + dst[0] = dst[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 14 unpcklps */ + + if (eip[0] == 0x0F && eip[1] == 0x16) { + /* movhps: move src mem to upper 64bit of xmm */ + u8 xdst, modrm; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + goto invalid_opcode; + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[2] = src[0]; + dst[3] = src[1]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 16 movhps from memory */ + + if (eip[0] == 0x0F && eip[1] == 0x17) { + /* movhps: move src xmm upper 64bit to mem */ + u8 xdst, modrm; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + goto invalid_opcode; + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_WRITE, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + src[0] = dst[2]; + src[1] = dst[3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 17 movhps to memory */ + + if (eip[0] == 0x0F && eip[1] == 0x18 && (eip[2] & 0340) == 0000) { + /* prefetch */ + EMU_COUNT_SSE(eip); + eip += 3; /* just skip over */ + regs->ip = (u32)eip; + goto sse_return; + } /* 0f 18 /1,/2,/3,/0 prefetcht0, prefetcht1, prefetcht2, prefetchnta */ + + if (eip[0] == 0x0F && (eip[1] == 0x28 || eip[1]==0x29)) { + if (!do_mov128(regs, error_code, (eip[1]==0x28))) goto invalid_opcode; + EMU_COUNT_SSE(eip); + goto sse_return; + } /* 0F 28|29 movaps */ + + if (eip[0] == 0x0F && (eip[1] == 0x2E || eip[1] == 0x2F)) { + /* ucomiss/comiss: compare 32bit float XMM/mem to XMM, set eflags */ + u8 xdst, modrm; + u8 eip1 = eip[1]; + u16 ret; + char fsave[112]; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + /* don't use FUCOMIP here, CPU may not support it */ + switch (eip1) { + case 0x2E: /* ucomiss */ + __asm__ __volatile__ ( \ + "fsave %0\n\t" \ + "flds (%3)\n\t" \ + "flds (%2)\n\t" \ + /* st0=dst st1=src */ \ + "fucompp\n\t" + "fstsw %%ax \n\t" \ + "mov %%ax,%1 \n\t" \ + "frstor %0\n\t" \ + : "+m" (fsave), "=r" (ret) \ + : "r" (dst), "r" (src) \ + : "ax" \ + ); + break; + case 0x2F: /* comiss */ + __asm__ __volatile__ ( \ + "fsave %0\n\t" \ + "flds (%3)\n\t" \ + "flds (%2)\n\t" \ + /* st0=dst st1=src */ \ + "fcompp\n\t" + "fstsw %%ax \n\t" \ + "mov %%ax,%1 \n\t" \ + "frstor %0\n\t" \ + : "+m" (fsave), "=r" (ret) \ + : "r" (dst), "r" (src) \ + : "ax" \ + ); + break; + default: goto invalid_opcode; + } + /* now ret has FPU status flags, copy them into ZF,PF,CF */ + regs->flags = flags_CtoS(ret, regs->flags); + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 2E ucomiss */ /* 0F 2F comiss */ + + if (eip[0] == 0x0F && (eip[1]&0xFC) == 0x54) { + /* 0F 54 andps */ + /* 0F 55 andnps */ + /* 0F 56 orps */ + /* 0F 57 xorps */ + if (!do_andps(regs, error_code)) goto invalid_opcode; + goto sse_return; + } + + if (eip[0] == 0x0F && eip[1] == 0x5A) { + /* cvtps2pd: expand 2 floats to 2 doubles */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__ ( \ + "fsave %0\n\t" \ + "flds 4(%2)\n\t" \ + "flds (%2)\n\t" \ + "fstpl (%1)\n\t" \ + "fstpl 8(%1)\n\t" \ + "frstor %0\n\t" \ + : "+m" (fsave) + : "r" (dst), "r" (src) \ + : "ax" \ + ); + regs->ip = (u32)eip; + goto sse_return; + } /* 0F 5A cvtps2pd */ + + if (eip[0] == 0x0F && (eip[1] == 0x58 || eip[1] == 0x59 || (eip[1]&0xFC) == 0x5C)) { + /* 0F 58 addps */ + /* 0F 59 mulps */ + /* 0F 5C subps */ + /* 0F 5D minps */ + /* 0F 5E divps */ + /* 0F 5F maxps */ + if (!do_subps(regs, error_code)) goto invalid_opcode; + goto sse_return; + } + + if ( eip[0] == 0x0F && eip[1] == 0xAE && eip[2] == 0xF0 ) { + /* mfence; do nothing */ + EMU_COUNT_SSE2(eip); + eip += 3; + regs->ip = (u32)eip; + goto sse_return; + } + /* 0F AE F0 mfence */ + + if (eip[0] == 0x0F && eip[1] == 0xC2) { + /* cmpps: compare as 32bit float XMM/mem against XMM and set bitmask */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + int cond; + int i; + u8 op; /* comparison operator, the immediate byte */ + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + __asm__ __volatile__("fsave %0\n\t" : "=m" (fsave)); + op = *eip++; + + for (i=0; i<4; i++) { + __asm__ __volatile__( + "flds (%1)\n\t" + "flds (%2)\n\t" + /* st0=dst st1=src */ + "fucompp\n\t" + "fstsw %%ax \n\t" + "mov %%ax,%0 \n\t" + : "=r" (fpsw) + : "r" (&src[i]), "r" (&dst[i]) + : "ax" + ); + + /* branch by immediate suffix byte. + * easy by circuit but a mess by program. + */ + /* bits in x87 Status Word fpsw */ +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + fpsw &= (FP_C0|FP_C2|FP_C3); + cond = 0; + switch(op) { + case 0: /*EQ*/ + if (fpsw == FP_C3) cond=1; break; + case 1: /*LT*/ + if (fpsw == FP_C0) cond=1; break; + case 2: /*LE*/ + if (fpsw == FP_C3 || + fpsw == FP_C0) cond=1; break; + case 3: /*UNORD*/ + if (fpsw ==(FP_C3|FP_C2|FP_C0)) cond=1; break; + case 4: /*NEQ*/ + if (!(fpsw == FP_C3)) cond=1; break; + case 5: /*NLT*/ + if (!(fpsw == FP_C0)) cond=1; break; + case 6: /*NLE*/ + if (!(fpsw == FP_C3 || + fpsw == FP_C0)) cond=1; break; + case 7: /*ORD*/ + if (!(fpsw ==(FP_C3|FP_C2|FP_C0))) cond=1; break; + default: + goto invalid_opcode; + } + /* set result bitnask */ + dst[i] = (cond) ? 0xFFFFFFFFUL : 0; + } + __asm__ __volatile__("frstor %0\n\t" : "=m" (fsave)); + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F C2 cmpps */ + + if (eip[0] == 0x0F && eip[1] == 0xC6) { + /* shufps: shuffle 32bits */ + u8 xdst, modrm; + union { + u8 im8; + struct { unsigned sh0:2; unsigned sh1:2; unsigned sh2:2; unsigned sh3:2; } s __attribute__((packed)); + } im; + union xmm_t stmp, dtmp; + + modrm = eip[2]; + EMU_COUNT_SSE(eip); + eip += 3; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + im.im8 = *eip++; /* immediate op */ + + /* copy, in case src==dst */ + stmp = *(union xmm_t *)src; + dtmp = *(union xmm_t *)dst; + + dst[0]= dtmp.dw.d[im.s.sh0]; + dst[1]= dtmp.dw.d[im.s.sh1]; + dst[2]= stmp.dw.d[im.s.sh2]; + dst[3]= stmp.dw.d[im.s.sh3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 0F C6 shufps */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0x10 || eip[2]==0x11)) { + if (!do_mov128(regs, error_code, (eip[2]==0x10))) goto invalid_opcode; + EMU_COUNT_SSE2(eip); + goto sse_return; + } /* 66 0F 10|11 movupd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x14) { + /* unpcklpd: move lower 64bits of XMM/mem to XMM high 64bits*/ + u8 xdst, modrm; + modrm = eip[3]; + eip += 4; /* skips all the opcodes */ + EMU_COUNT_SSE2(eip); + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[2] = src[0]; + dst[3] = src[1]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 14 unpcklpd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x17) { + /* movhpd: move src upper 64bit XMM to mem */ + u8 xdst, modrm; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + goto invalid_opcode; + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_WRITE, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + src[0] = dst[2]; + src[1] = dst[3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 17 movhpd to memory */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0x28 || eip[2]==0x29)) { + /* movapd: move 128bit XMM/mem to XMM */ + if (!do_mov128(regs, error_code, (eip[2]==0x28))) goto invalid_opcode; + EMU_COUNT_SSE2(eip); + goto sse_return; + } + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0x2E || eip[2] == 0x2F)) { + /* comisd|ucomisd: compare XMM/mem as double to XMM */ + u8 xdst, modrm; + u16 ret; + u8 eip2 = eip[2]; + char fsave[112]; + + modrm = eip[3]; + eip += 4; /* skips all the opcodes */ + EMU_COUNT_SSE2(eip); + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + /* don't use FUCOMIP here, CPU may not support it */ + switch (eip2) { + case 0x2E: /* ucomisd */ + __asm__ __volatile__ ( \ + "fsave %0\n\t" \ + "fldl (%3)\n\t" \ + "fldl (%2)\n\t" \ + /* st0=dst st1=src */ \ + "fucompp\n\t" /*XXX clobbers FPU C3,C2,C0*/ \ + "fstsw %%ax \n\t" \ + "mov %%ax,%1 \n\t" \ + "frstor %0\n\t" \ + : "+m" (fsave), "=r" (ret) \ + : "r" (dst), "r" (src) \ + : "ax" \ + ); + break; + case 0x2F: /* comisd */ + __asm__ __volatile__ ( \ + "fsave %0\n\t" \ + "fldl (%3)\n\t" \ + "fldl (%2)\n\t" \ + /* st0=dst st1=src */ \ + "fcompp\n\t" /*XXX clobbers FPU C3,C2,C0*/ \ + "fstsw %%ax \n\t" \ + "mov %%ax,%1 \n\t" \ + "frstor %0\n\t" \ + : "+m" (fsave), "=r" (ret) \ + : "r" (dst), "r" (src) \ + : "ax" \ + ); + break; + } + /* now ret has FPU status flags, copy them into ZF,PF,CF */ + regs->flags = flags_CtoS(ret, regs->flags); + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 2E ucomisd */ /* 66 0F 2F comisd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x50 && (eip[3]&0xC0) == 0xC0) { + /* movmskpd move sign bits of XMM double to reg32 */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = reg_address(regs, 1, xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + /* only reg->reg allowed */ + goto invalid_opcode; + } + + /* pack MSB of each double to dest */ + *dst = ((src[3]>>(6+24))&2) | ((src[1]>>(7+24))&1); + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 50 /r movmskpd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2]&0xFC) == 0x54) { + /* 66 0F 54 andpd: logical AND of XMM */ + /* 66 0F 55 andnpd: XMM = !XMM & XMM/mem */ + /* 66 0F 56 orpd: XMM = XMM | XMM/mem */ + /* 66 0F 57 xorpd: logical XOR of XMM */ + u8 xdst, modrm; + u8 eip2 = eip[2]; /* 0x54 - 0x57 */ + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + switch (eip2) { + case 0x54: + dst[0] &= src[0]; + dst[1] &= src[1]; + dst[2] &= src[2]; + dst[3] &= src[3]; break; + case 0x55: + dst[0] = (~dst[0]) & src[0]; + dst[1] = (~dst[1]) & src[1]; + dst[2] = (~dst[2]) & src[2]; + dst[3] = (~dst[3]) & src[3]; break; + case 0x56: + dst[0] |= src[0]; + dst[1] |= src[1]; + dst[2] |= src[2]; + dst[3] |= src[3]; break; + case 0x57: + dst[0] ^= src[0]; + dst[1] ^= src[1]; + dst[2] ^= src[2]; + dst[3] ^= src[3]; break; + default: goto invalid_opcode; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 54|55|56|57 andpd andnpd orpd xorpd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x5A) { + /* cvtpd2ps: convert 2 double XMM/mem to 2 float into XMM */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__ ( + "fsave %0\n\t" + "fldl 8(%2)\n\t" + "fldl (%2)\n\t" + "fstps (%1)\n\t" + "fstps 4(%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "+r" (dst) + : "r" (src) + ); + *(u64*)(dst+2) = 0ULL; /* high bits cleared */ + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 5A cvtpd2ps */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x5C) { + /* subpd: subtract 2 64bit double of XMM/mem from XMM */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + __asm__ __volatile__ ( + "fsave %0\n\t" + "fldl (%1)\n\t" + "fsubl (%2)\n\t" + "fstpl (%1)\n\t" + "fldl 8(%1)\n\t" + "fsubl 8(%2)\n\t" + "fstpl 8(%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "+r" (dst) + : "r" (src) + ); + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 5C subpd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x60) { + /* punpcklbw interleave lower bytes of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 *bsrc, *bdst; + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (u8*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (u8*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst[15] = bsrc[7]; + bdst[14] = bdst[7]; + bdst[13] = bsrc[6]; + bdst[12] = bdst[6]; + bdst[11] = bsrc[5]; + bdst[10] = bdst[5]; + bdst[ 9] = bsrc[4]; + bdst[ 8] = bdst[4]; + bdst[ 7] = bsrc[3]; + bdst[ 6] = bdst[3]; + bdst[ 5] = bsrc[2]; + bdst[ 4] = bdst[2]; + bdst[ 3] = bsrc[1]; + bdst[ 2] = bdst[1]; + bdst[ 1] = bsrc[0]; + bdst[ 0] = bdst[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 60 punpcklbw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x61) { + /* punpcklwd interleave lower words of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u16 *wsrc, *wdst; + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (u16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (u16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + wdst[ 7] = wsrc[3]; + wdst[ 6] = wdst[3]; + wdst[ 5] = wsrc[2]; + wdst[ 4] = wdst[2]; + wdst[ 3] = wsrc[1]; + wdst[ 2] = wdst[1]; + wdst[ 1] = wsrc[0]; + wdst[ 0] = wdst[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 61 punpcklwd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x62) { + /* punpckldq interleave lower 64bits of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[3] = src[1]; + dst[2] = dst[1]; + dst[1] = src[0]; + dst[0] = dst[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 62 punpckldq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x63) { + /* packsswb: pack mem/XMM:XMM signed shorts into XMM bytes with saturation */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm, xdst; + s16 *wsrc, *wdst; + s8 *bdst; + union xmm_t xtmp; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + wdst = (s16*)dst; + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst = (u8*)&xtmp; /* in case src==dst */ + for (i=0;i<8;i++) { + signed int x = wdst[i]; + bdst[i] = (x<-128)?-128:((x>127)?127:x); + } + for (i=0;i<8;i++) { + signed int x = wsrc[i]; + bdst[i+8] = (x<-128)?-128:((x>127)?127:x); + } + *(union xmm_t*)dst = xtmp; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 63 packsswb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x65) { + /* pcmpgtw compare packed words XMM/mem to XMM and fill flags */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + s16 *wsrc, *wdst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (s16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = (s16*)modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + for (i=0;i<8;i++) { + wdst[i] = (wdst[i] > wsrc[i]) ? 0xFFFF:0; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 65 pcmpgtw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x67) { + /* packuswb: pack mem/XMM:XMM words into XMM bytes with saturation */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm, xdst; + u8 *bdst; + s16 *wsrc, *wdst; + union xmm_t xtmp; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + wdst = (s16*)dst; + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst = (u8*)&xtmp; /* in case src==dst */ + for (i=0;i<8;i++) { + int x = wdst[i]; + bdst[i] = (x<0)?0:((x>255)?255:x); + } + for (i=0;i<8;i++) { + int x = wsrc[i]; + bdst[i+8] = (x<0)?0:((x>255)?255:x); + } + *(union xmm_t*)dst = xtmp; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 67 packuswb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x68) { + /* punpckhbw interleave high bytes of XMM/mem to XMM */ + u8 *bsrc, *bdst; + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (u8*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (u8*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst[ 0] = bdst[ 8]; + bdst[ 1] = bsrc[ 8]; + bdst[ 2] = bdst[ 9]; + bdst[ 3] = bsrc[ 9]; + bdst[ 4] = bdst[10]; + bdst[ 5] = bsrc[10]; + bdst[ 6] = bdst[11]; + bdst[ 7] = bsrc[11]; + bdst[ 8] = bdst[12]; + bdst[ 9] = bsrc[12]; + bdst[10] = bdst[13]; + bdst[11] = bsrc[13]; + bdst[12] = bdst[14]; + bdst[13] = bsrc[14]; + bdst[14] = bdst[15]; + bdst[15] = bsrc[15]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 68 punpckhbw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x69) { + /* punpckhwd interleave high words of XMM/mem to XMM */ + u16 *wsrc, *wdst; + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (u16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (u16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + wdst[0] = wdst[4]; + wdst[1] = wsrc[4]; + wdst[2] = wdst[5]; + wdst[3] = wsrc[5]; + wdst[4] = wdst[6]; + wdst[5] = wsrc[6]; + wdst[6] = wdst[7]; + wdst[7] = wsrc[7]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 69 punpckhwd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x6A) { + /* punpckhdq interleave upper 64bits of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[0] = dst[2]; + dst[1] = src[2]; + dst[2] = dst[3]; + dst[3] = src[3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 6A punpckhdq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x63) { + /* packsswb: pack mem/XMM:XMM signed shorts into XMM bytes with saturation */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm, xdst; + s32 *dsrc, *ddst; + s16 *wdst; + union xmm_t xtmp; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + ddst = (s32*)dst; + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + dsrc = XMMREG_ADDR(modrm & 7); + } else { + dsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)dsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + wdst = (s16*)&xtmp; /* in case src==dst */ + for (i=0;i<4;i++) { + signed long x = ddst[i]; + wdst[i] = (x<-32768)?-32768:((x>32767)?32767:x); + } + for (i=0;i<4;i++) { + signed long x = dsrc[i]; + wdst[i+4] = (x<-32768)?-32768:((x>32767)?32767:x); + } + *(union xmm_t*)dst = xtmp; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 63 packsswb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x6B) { + /* packssdw: pack mem/XMM:XMM signed long into XMM short with saturation */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm, xdst; + s32 *dsrc, *ddst; + s16 *wdst; + union xmm_t xtmp; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + ddst = (s32*)dst; + wdst = (s16*)&xtmp; + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + dsrc = XMMREG_ADDR(modrm & 7); + } else { + dsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)dsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + for (i=0;i<4;i++) { + signed long x = ddst[i]; + wdst[i] = (x<-32768)?-32768:((x>32767)?32767:x); + } + for (i=0;i<4;i++) { + signed long x = dsrc[i]; + wdst[i+4] = (x<-32768)?-32768:((x>32767)?32767:x); + } + *(union xmm_t*)dst = xtmp; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 6B packssdw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x6C) { + /* punpcklqdq interleave 64bits of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[3] = src[1]; + dst[2] = src[0]; + dst[1] = dst[1]; + dst[0] = dst[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 6C punpcklqdq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x6D) { + /* punpckhqdq interleave 64/64bits of XMM/mem to XMM/128 */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[0] = dst[2]; + dst[1] = dst[3]; + dst[2] = src[2]; + dst[3] = src[3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 6D punpckhqdq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x6E) { + /* movd reg32/mem to XMM/128 */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = reg_address(regs, 1, modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[0] = src[0]; + dst[1] = 0; + dst[2] = 0; + dst[3] = 0; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 6E movd reg32/mem->xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x70) { + /* pshufd: shuffle 32bits */ + u8 xdst, modrm; + union { + u8 im8; + struct { unsigned sh0:2; unsigned sh1:2; unsigned sh2:2; unsigned sh3:2; } s __attribute__((packed)); + } im; + union xmm_t tmp; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + im.im8 = *eip++; /* immediate op */ + + tmp = *(union xmm_t *)src; /* in case src==dst */ + + dst[0]= tmp.dw.d[im.s.sh0]; + dst[1]= tmp.dw.d[im.s.sh1]; + dst[2]= tmp.dw.d[im.s.sh2]; + dst[3]= tmp.dw.d[im.s.sh3]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 70 pshufd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0x6F || eip[2]==0x7F)) { + /* movdqa: move aligned 128bit XMM/mem to XMM */ + /* !!! MMX enabled CPUs never comes here !! */ + if (!do_mov128(regs, error_code, (eip[2]==0x6F))) goto invalid_opcode; + EMU_COUNT_SSE2(eip); + goto sse_return; + } /* 66 0F 6F|7F movdqa */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x71 && (eip[3]&0370) == 0320) { + /* psrlw $imm, %xmm: shift packed right logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u16 *wdst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + wdst = (u16*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; + for (i=0;i<8;i++) { + wdst[i] = wdst[i] >> imm; + } + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 71 /2 psrlw $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x71 && (eip[3]&0370) == 0360) { + /* psllw $imm, %xmm: shift packed left logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u16 *wdst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + wdst = (u16*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; + for (i=0;i<8;i++) { + wdst[i] = wdst[i] << imm; + } + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 71 /6 psllw $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x72 && (eip[3]&0370) == 0320) { + /* psrld $imm, %xmm: shift packed right logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u32 *ddst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + ddst = XMMREG_ADDR(modrm & 7); + + imm = *eip++; + for (i=0;i<4;i++) { + ddst[i] = ddst[i] >> imm; + } + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 72 /2 psrld $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x72 && (eip[3]&0370) == 0340) { + /* psrad $imm, %xmm: shift packed right arithmetic */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + s32 *ddst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + ddst = XMMREG_ADDR(modrm & 7); + + imm = *eip++; + for (i=0;i<4;i++) { + ddst[i] = ddst[i] >> imm; + } + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 72 /4 psrad $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x72 && (eip[3]&0370) == 0360) { + /* pslld $imm, %xmm: shift packed left logical */ + /* !!! MMX enabled CPUs never comes here !! + * It ignores 0x66 prefix and just process as MMX instruction, + * which is wrong. + */ + u8 modrm; + u8 imm; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + dst = XMMREG_ADDR(modrm & 7); + + imm = *eip++; + for (i=0;i<4;i++) { + dst[i] = dst[i] << imm; + } + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 72 /6 pslld $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x73 && (eip[3]&0370) == 0320) { + /* psrlq $imm, %xmm: shift packed right logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u64 *qdst; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + qdst = (u64*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; + + qdst[0] = qdst[0] >> imm; + qdst[1] = qdst[1] >> imm; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 73 /2 psrlq $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x73 && (eip[3]&0370) == 0330) { + /* psrldq $imm, %xmm: shift %xmm right logical */ + u8 modrm; + u8 imm; + u64 *qdst; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + qdst = (u64*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; /* immediate; BYTES to shift */ + if (imm > 15) imm=16; + + if (imm < 8) { + u64 o; + o = qdst[1] & ((1ULL<<(imm*8))-1); /* shifted bytes to qdst[0] */ + qdst[1] >>= (imm*8); + qdst[0] = (qdst[0] >> (imm*8)) | (o << (8-imm)*8); + } else { + qdst[0] = qdst[1] >> ((imm-8)*8); + qdst[1] = 0; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 73 /3 psrldq $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x73 && (eip[3]&0370) == 0360) { + /* psllq $imm, %xmm: shift packed left logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u64 *qdst; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + qdst = (u64*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; + + qdst[0] = qdst[0] << imm; + qdst[1] = qdst[1] << imm; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 73 /6 psllq $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x73 && (eip[3]&0370) == 0370) { + /* pslldq $imm, %xmm: shift 128bit left logical */ + /* !!! MMX enabled CPUs never comes here !! */ + u8 modrm; + u8 imm; + u64 *qdst, cr; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + qdst = (u64*)XMMREG_ADDR(modrm & 7); + + imm = *eip++; + + if (imm > 15) imm=16; + + cr = (qdst[0] & (~0ULL << (64-imm*8))) >> (64-imm*8); + qdst[0] = qdst[0] << (imm*8); + qdst[1] = (qdst[1] << (imm*8)) | cr;; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 73 /7 pslldq $imm, %xmm */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x74) { + /* pcmpeqb compare packed bytes XMM/mem to XMM and fill flags */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + u8 *bsrc, *bdst; + int i; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (u8*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (u8*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = (u8*)modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + for (i=0;i<16;i++) { + bdst[i] = (bdst[i] == bsrc[i]) ? 0xFF:0; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 74 pcmpeqb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x76) { + /* pcmpeqd compare XMM/mem to XMM and fill flags */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + dst[0]=(dst[0]==src[0])?0xffffffff:0; + dst[1]=(dst[1]==src[1])?0xffffffff:0; + dst[2]=(dst[2]==src[2])?0xffffffff:0; + dst[3]=(dst[3]==src[3])?0xffffffff:0; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 76 pcmpeqd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0x7E) { + /* movd XMM to reg32/m32 */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = reg_address(regs, 1, modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_WRITE, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + + *src = *dst; /* 32bit doubleword */ + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F 7E movd xmm->reg/mem */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xC2) { + /* cmppd: compare 2 64bit double XMM/mem against XMM and set bitmask */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + int i; + int cond; + u8 op; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + op = *eip++; /* immediate comparison spec */ + + __asm__ __volatile__("fsave %0\n\t" : "=m" (fsave)); + + for (i=0; i<4; i+=2) { + __asm__ __volatile__( + "fldl (%2)\n\t" + "fldl (%3)\n\t" + /* st0=dst st1=src */ + "fucompp\n\t" + "fstsw %%ax \n\t" + "mov %%ax,%1 \n\t" + : "=m" (fsave), "=r" (fpsw) + : "r" (&src[i]), "r" (&dst[i]) + : "ax" + ); + + /* branch by immediate suffix byte. + * easy by circuit but a mess by program. + */ + /* bits in x87 Status Word fpsw */ +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + fpsw &= (FP_C0|FP_C2|FP_C3); + cond = 0; + switch(op) { + case 0: /*EQ*/ + if (fpsw == FP_C3) cond=1; break; + case 1: /*LT*/ + if (fpsw == FP_C0) cond=1; break; + case 2: /*LE*/ + if (fpsw == FP_C3 || + fpsw == FP_C0) cond=1; break; + case 3: /*UNORD*/ + if (fpsw ==(FP_C3|FP_C2|FP_C0)) cond=1; break; + case 4: /*NEQ*/ + if (!(fpsw == FP_C3)) cond=1; break; + case 5: /*NLT*/ + if (!(fpsw == FP_C0)) cond=1; break; + case 6: /*NLE*/ + if (!(fpsw == FP_C3 || + fpsw == FP_C0)) cond=1; break; + case 7: /*ORD*/ + if (!(fpsw ==(FP_C3|FP_C2|FP_C0))) cond=1; break; + default: + goto invalid_opcode; + } + /* set result bitnask */ + *(u64*)(dst+i) = (cond) ? 0xFFFFFFFFFFFFFFFFULL : 0; + } + __asm__ __volatile__("frstor %0\n\t" : "=m" (fsave)); + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F C2 cmppd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xD4) { + /* paddq: 64bit SIMD add XMM/128 to XMM */ + /* ?? MMX enabled CPUs doesn't come here ?? */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + /* overflow wraps */ + *(u64*) dst += *(u64*) src; + *(u64*)(dst+2) += *(u64*)(src+2); + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F D4 paddq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xD5) { + /* pmullw: multiply by words and store lower 16bits */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + s16 *wdst, *wsrc; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (s16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + wdst[7] = (wdst[7] * wsrc[7]) & 0xffff; + wdst[6] = (wdst[6] * wsrc[6]) & 0xffff; + wdst[5] = (wdst[5] * wsrc[5]) & 0xffff; + wdst[4] = (wdst[4] * wsrc[4]) & 0xffff; + wdst[3] = (wdst[3] * wsrc[3]) & 0xffff; + wdst[2] = (wdst[2] * wsrc[2]) & 0xffff; + wdst[1] = (wdst[1] * wsrc[1]) & 0xffff; + wdst[0] = (wdst[0] * wsrc[0]) & 0xffff; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F D5 pmullw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xD6) { + /* movq XMM to XMM/mem 64bit */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_WRITE, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + + src[0] = dst[0]; + src[1] = dst[1]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F D6 movq xmm->xmm/mem */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xD7 && (eip[3]&0xC0) == 0xC0) { + /* pmovmskb move sign bits of XMM to reg32 */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = reg_address(regs, 1, xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + /* only reg->reg allowed */ + goto invalid_opcode; + } + + /* pack MSB of each byte to dest */ + *dst = ((src[0]&0x80) >> (8-1)) | ((src[0]&0x8000) >> (16-2)) | + ((src[0]&0x800000) >> (24-3)) | ((src[0]&0x80000000) >> (32-4)) | + ((src[1]&0x80) >> (8-1-4)) | ((src[1]&0x8000) >> (16-2-4)) | + ((src[1]&0x800000) >> (24-3-4)) | ((src[1]&0x80000000) >> (32-4-4)) | + ((src[2]&0x80) << 1) | ((src[2]&0x8000) >> (16-2-8)) | + ((src[2]&0x800000) >> (24-3-8)) | ((src[2]&0x80000000) >> (32-4-8)) | + ((src[3]&0x80) << 5) | ((src[3]&0x8000) >> (16-2-12)) | + ((src[3]&0x800000) >> (24-3-12))| ((src[3]&0x80000000) >> (32-4-12)); + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F D7 pmovmskb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xD8) { + /* psubusb packed unsigned byte subtract with 0-saturation */ + /* used in cc1 */ + u8 xdst, modrm; + u8 *bsrc, *bdst; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (u8*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (u8*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = (u8*)modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst[0] = (bdst[0]ip = (u32)eip; + goto sse_return; + } /* 66 0F D8 psubusb */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0xDB || eip[2] == 0xDF || eip[2] == 0xEB || eip[2] == 0xEF || eip[2] == 0xFA || eip[2] == 0xFE)) { + /* pxor XMM/128 to XMM */ + /* por XMM/128 to XMM */ + /* pand */ + /* pandn */ + /* paddd */ + /* psubd */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + u8 eip2 = eip[2]; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + switch (eip2) { + case 0xDB: /* pand */ + dst[0] &= src[0]; + dst[1] &= src[1]; + dst[2] &= src[2]; + dst[3] &= src[3]; break; + case 0xDF: /* pandn */ + dst[0] = (~dst[0]) & src[0]; + dst[1] = (~dst[1]) & src[1]; + dst[2] = (~dst[2]) & src[2]; + dst[3] = (~dst[3]) & src[3]; break; + case 0xEB: /* por */ + dst[0] |= src[0]; + dst[1] |= src[1]; + dst[2] |= src[2]; + dst[3] |= src[3]; break; + case 0xEF: /* pxor */ + dst[0] ^= src[0]; + dst[1] ^= src[1]; + dst[2] ^= src[2]; + dst[3] ^= src[3]; break; + case 0xFA: /* psubd */ + dst[0] -= src[0]; + dst[1] -= src[1]; + dst[2] -= src[2]; + dst[3] -= src[3]; break; + case 0xFE: /* paddd */ + dst[0] += src[0]; + dst[1] += src[1]; + dst[2] += src[2]; + dst[3] += src[3]; break; + default: goto invalid_opcode; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F DB pand */ /* 66 0F DF pandn */ /* 66 0F EB por */ /* 66 0F EF pxor */ /* 66 0F FA psubd */ /* 66 0F FE paddd */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xDE) { + /* pmaxub packed unsigned byte maximum */ + u8 xdst, modrm; + u8 *bsrc, *bdst; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (u8*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (u8*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = (u8*)modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + bdst[0] = (bdst[0]>bsrc[0])?bdst[0]:bsrc[0]; + bdst[1] = (bdst[1]>bsrc[1])?bdst[1]:bsrc[1]; + bdst[2] = (bdst[2]>bsrc[2])?bdst[2]:bsrc[2]; + bdst[3] = (bdst[3]>bsrc[3])?bdst[3]:bsrc[3]; + bdst[4] = (bdst[4]>bsrc[4])?bdst[4]:bsrc[4]; + bdst[5] = (bdst[5]>bsrc[5])?bdst[5]:bsrc[5]; + bdst[6] = (bdst[6]>bsrc[6])?bdst[6]:bsrc[6]; + bdst[7] = (bdst[7]>bsrc[7])?bdst[7]:bsrc[7]; + bdst[8] = (bdst[8]>bsrc[8])?bdst[8]:bsrc[8]; + bdst[9] = (bdst[9]>bsrc[9])?bdst[9]:bsrc[9]; + bdst[10] = (bdst[10]>bsrc[10])?bdst[10]:bsrc[10]; + bdst[11] = (bdst[11]>bsrc[11])?bdst[11]:bsrc[11]; + bdst[12] = (bdst[12]>bsrc[12])?bdst[12]:bsrc[12]; + bdst[13] = (bdst[13]>bsrc[13])?bdst[13]:bsrc[13]; + bdst[14] = (bdst[14]>bsrc[14])?bdst[14]:bsrc[14]; + bdst[15] = (bdst[15]>bsrc[15])?bdst[15]:bsrc[15]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F DE pmaxub */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xE5) { + /* pmulhw: multiply by words and store upper 16bits */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + s16 *wdst, *wsrc; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (s16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + wdst[7] = ((s32)wdst[7] * wsrc[7]) >> 16; + wdst[6] = ((s32)wdst[6] * wsrc[6]) >> 16; + wdst[5] = ((s32)wdst[5] * wsrc[5]) >> 16; + wdst[4] = ((s32)wdst[4] * wsrc[4]) >> 16; + wdst[3] = ((s32)wdst[3] * wsrc[3]) >> 16; + wdst[2] = ((s32)wdst[2] * wsrc[2]) >> 16; + wdst[1] = ((s32)wdst[1] * wsrc[1]) >> 16; + wdst[0] = ((s32)wdst[0] * wsrc[0]) >> 16; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F E5 pmulhw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xE6) { + /* cvttpd2dq: convert 2 double XMM/mem to 2 32bit long into XMM */ + u8 xdst, modrm; + char fsave[112]; + u16 cw; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__ ( + "fsave %0\n\t" + "fldl 8(%3)\n\t" + "fldl (%3)\n\t" + "fstcw %1\n\t" + "orw $0x0c00, %1\n\t" /* RC_CHOP */ + "fldcw %1\n\t" + "fistpl (%2)\n\t" + "fldcw %1\n\t" + "fistpl 4(%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "+m" (cw) + : "r" (dst), "r" (src) + ); + *(u64*)(dst+2) = 0ULL; /* high bits cleared */ + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F E6 cvttpd2dq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xE7) { + /* movntdq: movdqa with nocache hint */ + if (!do_mov128(regs, error_code, 0)) goto invalid_opcode; + EMU_COUNT_SSE2(eip); + goto sse_return; + } /* 66 0F E7 movntdq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xF4) { + /* pmuludq: 32*32=64 XMM/128 to XMM */ + u8 xdst, modrm; + modrm = eip[3]; + eip += 4; /* skips all the opcodes */ + EMU_COUNT_SSE2(eip); + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + /* overflow wraps */ + *(u64*)(dst) = (u64)dst[0] * (u64)src[0]; + *(u64*)(dst+2) = (u64)dst[2] * (u64)src[2]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F F4 pmuludq */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && (eip[2] == 0xF9 || eip[2] == 0xFD)) { + /* psubw: 16bit SIMD sub XMM/128 to XMM */ + /* paddw: 16bit SIMD add XMM/128 to XMM */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + s16 *wsrc, *wdst; + u8 eip2 = eip[2]; + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + wdst = (s16*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + wsrc = (s16*)XMMREG_ADDR(modrm & 7); + } else { + wsrc = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)wsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + /* overflow wraps */ + switch(eip2) { + case 0xF9: /* psubw */ + wdst[7] -= wsrc[7]; + wdst[6] -= wsrc[6]; + wdst[5] -= wsrc[5]; + wdst[4] -= wsrc[4]; + wdst[3] -= wsrc[3]; + wdst[2] -= wsrc[2]; + wdst[1] -= wsrc[1]; + wdst[0] -= wsrc[0]; + break; + case 0xFD: /* paddw */ + wdst[7] += wsrc[7]; + wdst[6] += wsrc[6]; + wdst[5] += wsrc[5]; + wdst[4] += wsrc[4]; + wdst[3] += wsrc[3]; + wdst[2] += wsrc[2]; + wdst[1] += wsrc[1]; + wdst[0] += wsrc[0]; + break; + default: goto invalid_opcode; + } + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F F9 psubw */ /* 66 0F FD paddw */ + + if (eip[0] == 0x66 && eip[1] == 0x0F && eip[2] == 0xFC) { + /* paddb: 8bit*16 SIMD add XMM/128 to XMM */ + /* used in cc1 */ + u8 xdst, modrm; + char *bdst, *bsrc; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + bdst = (char*)XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + bsrc = (char*)XMMREG_ADDR(modrm & 7); + } else { + bsrc = (char*)modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)bsrc, 16)) { + do_general_protection(regs, error_code); + return; + } + } + + /* overflow wraps */ + bdst[0] += bsrc[0]; + bdst[1] += bsrc[1]; + bdst[2] += bsrc[2]; + bdst[3] += bsrc[3]; + bdst[4] += bsrc[4]; + bdst[5] += bsrc[5]; + bdst[6] += bsrc[6]; + bdst[7] += bsrc[7]; + bdst[8] += bsrc[8]; + bdst[9] += bsrc[9]; + bdst[10]+= bsrc[10]; + bdst[11]+= bsrc[11]; + bdst[12]+= bsrc[12]; + bdst[13]+= bsrc[13]; + bdst[14]+= bsrc[14]; + bdst[15]+= bsrc[15]; + + regs->ip = (u32)eip; + goto sse_return; + } /* 66 0F FC paddb */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && (eip[2] == 0x10 || eip[2] == 0x11)) { + /* movsd: move 64bit double from XMM/mem to XMM */ + u8 xdst, modrm; + int to_reg = (eip[2]==0x10); + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + if (!to_reg) { + /* do we ever have this? */ + goto invalid_opcode; + } + *(u64*)dst = *(u64*)src; /*64bit*/ + regs->ip = (u32)eip; + goto sse_return; + } + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (to_reg) { + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + *(u64*)dst = *(u64*)src; /*64bit*/ + dst[2] = 0; /* other bits cleared */ + dst[3] = 0; + } else { + if (!access_ok(VERIFY_WRITE, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + *(u64*)src = *(u64*)dst; /*64bit*/ + } + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 10|11 movsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x2A) { + /* cvtsi2sd: convert 32bit int from reg/mem to XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = reg_address(regs, 1, modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fildl (%1)\n\t" + "fstpl (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 2A cvtsi2sd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x2C) { + /* cvttsd2si: convert double in XMM/mem to std register int32 truncated */ + u8 xdst, modrm; + char fsave[112]; + u16 cw; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = reg_address(regs, 1, xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%2)\n\t" + "fstcw %1\n\t" + "orw $0x0c00, %1\n\t" /* RC_CHOP */ + "fldcw %1\n\t" + "fistpl (%3)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "+m" (cw) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 2C cvttsd2si */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x2D) { + /* cvtsd2si: convert double in XMM/mem to std register int32 */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = reg_address(regs, 1, xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + /* truncation mode not set, until LDMXCSR is emulated */ + "fistpl (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 2D cvtsd2si */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x51) { + /* sqrtsd: square root 64bit double of XMM/mem and store in XMM */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%2)\n\t" + "fsqrt\n\t" + "fstpl (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 51 sqrtsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x58) { + /* addsd: add 64bit double XMM/mem to XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + "faddl (%2)\n\t" + "fstpl (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 58 addsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x59) { + /* mulsd: multiply 64bit double XMM/mem to XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + "fmull (%2)\n\t" + "fstpl (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 59 mulsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x5A) { + /* cvtsd2ss: convert double XMM/mem to XMM float */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + "fstps (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 2A cvtsd2ss */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x5C) { + /* subsd: subtract 64bit double XMM/mem from XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + "fsubl (%2)\n\t" + "fstpl (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 5C subsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x5D) { + /* minsd: pick minimum of 64bit double XMM/mem to XMM */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + /* don't use FUCOMIP here, CPU may not support it */ + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%3)\n\t" + "fldl (%2)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %1\n\t" + "frstor %0\n\t" + : "+m" (fsave), "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + if (fpsw & FP_C2) /*unordered*/ { + *(u64*)dst = *(u64*)src; /* minsd does so */ + } else if (fpsw & FP_C0) { + *(u64*)dst = *(u64*)src; /* src was smaller; rewrite */ + } + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 5D minsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x5E) { + /* divsd: divide by 64bit double XMM/mem of XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%1)\n\t" + "fdivl (%2)\n\t" + "fstpl (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 5E divsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x5F) { + /* maxsd: pick maximum of 64bit double XMM/mem to XMM */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + /* don't use FUCOMIP here, CPU may not support it */ + /* dst[i] = MAX(dst[i],src[i]) */ + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%2)\n\t" + "fldl (%3)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %1\n\t" + "frstor %0\n\t" + : "+m" (fsave), "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + if (fpsw & FP_C2) /*unordered*/ { + *(u64*)dst = *(u64*)src; /* maxsd does so */ + } else if (fpsw & FP_C0) { + *(u64*)dst = *(u64*)src; /* src was larger; rewrite */ + } + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 5F maxsd */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0x70) { + /* pshuflw: shuffle words */ + u8 xdst, modrm; + union { + u8 im8; + struct { unsigned sh0:2; unsigned sh1:2; unsigned sh2:2; unsigned sh3:2; } s __attribute__((packed)); + } im; + u64 usrc; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + im.im8 = *eip++; /* immediate op */ + usrc = *(u64*)src; + + dst[3]=src[3]; + dst[2]=src[2]; + dst[1]= (((usrc >> (im.s.sh3*16))&0xffff) << 16) | ((usrc >> (im.s.sh2*16))&0xffff); + dst[0]= (((usrc >> (im.s.sh1*16))&0xffff) << 16) | ((usrc >> (im.s.sh0*16))&0xffff); + + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 70 pshuflw */ + + if (eip[0] == 0xF2 && eip[1] == 0x0F && eip[2] == 0xC2) { + /* cmpsd: compare as 64bit double XMM/mem against XMM and set bitmask */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + int cond; + u8 op; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fldl (%2)\n\t" + "fldl (%3)\n\t" + /* st0=dst st1=src */ + "fucompp\n\t" + "fstsw %%ax \n\t" + "mov %%ax,%1 \n\t" + "frstor %0\n\t" + : "+m" (fsave), "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); + + /* branch by immediate suffix byte. + * easy by circuit but a mess by program. + */ + op = *eip++; + /* bits in x87 Status Word fpsw */ +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + fpsw &= (FP_C0|FP_C2|FP_C3); + cond = 0; + switch(op) { + case 0: /*EQ*/ + if (fpsw == FP_C3) cond=1; break; + case 1: /*LT*/ + if (fpsw == FP_C0) cond=1; break; + case 2: /*LE*/ + if (fpsw == FP_C3 || + fpsw == FP_C0) cond=1; break; + case 3: /*UNORD*/ + if (fpsw ==(FP_C3|FP_C2|FP_C0)) cond=1; break; + case 4: /*NEQ*/ + if (!(fpsw == FP_C3)) cond=1; break; + case 5: /*NLT*/ + if (!(fpsw == FP_C0)) cond=1; break; + case 6: /*NLE*/ + if (!(fpsw == FP_C3 || + fpsw == FP_C0)) cond=1; break; + case 7: /*ORD*/ + if (!(fpsw ==(FP_C3|FP_C2|FP_C0))) cond=1; break; + default: + goto invalid_opcode; + } + /* set result bitnask */ + *(u64*)dst = (cond) ? 0xFFFFFFFFFFFFFFFFULL : 0; + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F C2 cmpsd */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && (eip[2] == 0x10 || eip[2] == 0x11)) { + if (!do_movss(regs, error_code)) goto invalid_opcode; + goto sse_return; + } /* F3 0F 10|11 movss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x2A) { + /* cvtsi2ss: convert 32bit int from reg/mem to XMM float */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = reg_address(regs, 1, modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "fildl (%1)\n\t" + "fstps (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) : "memory" + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 2A cvtsi2ss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x2C) { + /* cvttss2si: convert 32bit float in XMM/mem to int register */ + u8 xdst, modrm; + u16 cw; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = reg_address(regs, 1, xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + /* set truncation mode to toward-zero */ + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "fstcw %1\n\t" + "orw $0x0c00, %1\n\t" /* RC_CHOP */ + "fldcw %1\n\t" + "fistpl (%3)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "=m" (cw) + : "r" (src), "r" (dst) + : "memory" + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 2C cvttss2si */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x51) { + /* sqrtss: square root 32bit float of XMM/mem and store in XMM */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "fsqrt\n\t" + "fstps (%1)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (dst), "r" (src) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 51 sqrtss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x58) { + /* addss: add 32bit float XMM/mem to XMM float */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "fadds (%1)\n\t" + "fstps (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 58 addss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x59) { + /* mulss: multiply 32bit float XMM/mem to XMM float */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "fmuls (%1)\n\t" + "fstps (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 59 mulss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x5A) { + /* cvtss2sd: convert 32bit float from XMM/mem to XMM double */ + u8 xdst, modrm; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%1)\n\t" + "fstpl (%2)\n\t" + "frstor %0\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 5A cvtss2sd */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x5B) { + /* cvttps2dq: convert 4 floats to 4 ints */ + u8 xdst, modrm; + char fsave[112]; + u16 cw; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "fstcw %1\n\t" + "orw $0x0c00, %1\n\t" /* RC_CHOP */ + "fldcw %1\n\t" + "fistpl (%3)\n\t" + "flds 0x4(%2)\n\t" + "fldcw %1\n\t" + "fistpl 0x4(%3)\n\t" + "flds 0x8(%2)\n\t" + "fldcw %1\n\t" + "fistpl 0x8(%3)\n\t" + "flds 0xc(%2)\n\t" + "fldcw %1\n\t" + "fistpl 0xc(%3)\n\t" + "frstor %0\n\t" + : "+m" (fsave), "+m" (cw) + : "r" (src), "r" (dst) + ); + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 5B cvttps2dq */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && (eip[2]&0xFC) == 0x5C) { + /* subss: subtract 32 float XMM/mem from XMM float */ + /* minss: smaller of 32bit float XMM/mem to XMM float */ + /* divss: divide by 32bit float XMM/mem to XMM float */ + /* maxss: larger of 32bit float XMM/mem to XMM float */ + u8 xdst, modrm; + u8 eip2 = eip[2]; + u16 fpsw; + char fsave[112]; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__("fsave %0\n\t" : "=m" (fsave)); + switch (eip2) { + case 0x5C: /* subss */ + __asm__ __volatile__( + "flds (%2)\n\t" + "fsubs (%1)\n\t" + "fstps (%2)\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + break; + case 0x5D: /* minss */ + /* dst[i] = MIN(dst[i],src[i]) */ + __asm__ __volatile__( + "flds (%2)\n\t" + "flds (%1)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %0\n\t" + : "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + if (fpsw & FP_C2) /*unordered*/ { + *dst = *src; /* minss does so */ + } else if (fpsw & FP_C0) { + *dst = *src; /* src was smaller; rewrite */ + } + break; + case 0x5E: /* divss */ + __asm__ __volatile__( + "flds (%2)\n\t" + "fdivs (%1)\n\t" + "fstps (%2)\n\t" + : "+m" (fsave) + : "r" (src), "r" (dst) + ); + break; + case 0x5F: /* maxss */ + /* dst[i] = MAX(dst[i],src[i]) */ + __asm__ __volatile__( + "flds (%1)\n\t" + "flds (%2)\n\t" + "fucompp\n\t" + "fstsw %%ax\n\t" + "mov %%ax, %0\n\t" + : "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + if (fpsw & FP_C2) /*unordered*/ { + *dst = *src; /* maxss does so */ + } else if (fpsw & FP_C0) { + *dst = *src; /* src was larger; rewrite */ + } + break; + default: goto invalid_opcode; + } + __asm__ __volatile__("frstor %0\n\t" : "+m" (fsave)); + + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 5C subss */ /* F3 0F 5D minss */ /* F3 0F 5E divss */ /* F3 0F 5F maxss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && (eip[2] == 0x6F || eip[2]==0x7F)) { + /* movdqu: move unaligned 128bit XMM/mem to XMM */ + if (!do_mov128(regs, error_code, (eip[2]==0x6F))) goto invalid_opcode; + EMU_COUNT_SSE(eip); + goto sse_return; + } /* F3 0F 6F|7F movdqu */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x70) { + /* pshufhw: shuffle high-64 bit words */ + u8 xdst, modrm; + union { + u8 im8; + struct { unsigned sh0:2; unsigned sh1:2; unsigned sh2:2; unsigned sh3:2; } s __attribute__((packed)); + } im; + u64 usrc; + + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 16)) { + do_general_protection(regs, error_code); + return; + } + } + im.im8 = *eip++; /* immediate op */ + usrc = *(u64*)(src+2); + + dst[3]= (((usrc >> (im.s.sh3*16))&0xffff) << 16) | ((usrc >> (im.s.sh2*16))&0xffff); + dst[2]= (((usrc >> (im.s.sh1*16))&0xffff) << 16) | ((usrc >> (im.s.sh0*16))&0xffff); + dst[1]=src[1]; + dst[0]=src[0]; + + regs->ip = (u32)eip; + goto sse_return; + } /* F2 0F 70 pshufhw */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0xC2) { + /* cmpss: compare as 32bit float XMM/mem against XMM and set 32bit bitmask */ + u8 xdst, modrm; + char fsave[112]; + u16 fpsw; + int cond; + u8 op; + + modrm = eip[3]; + EMU_COUNT_SSE(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 4)) { + do_general_protection(regs, error_code); + return; + } + } + __asm__ __volatile__( + "fsave %0\n\t" + "flds (%2)\n\t" + "flds (%3)\n\t" + /* st0=dst st1=src */ + "fucompp\n\t" + "fstsw %%ax \n\t" + "mov %%ax,%1 \n\t" + "frstor %0\n\t" + : "+m" (fsave), "=r" (fpsw) + : "r" (src), "r" (dst) + : "ax" + ); + + /* branch by immediate suffix byte. + * easy by circuit but a mess by program. + */ + op = *eip++; + /* bits in x87 Status Word fpsw */ +#define FP_C0 0x0100 /*carry*/ +#define FP_C2 0x0400 /*unord*/ +#define FP_C3 0x4000 /*zero*/ + fpsw &= (FP_C0|FP_C2|FP_C3); + cond = 0; + switch(op) { + case 0: /*EQ*/ + if (fpsw == FP_C3) cond=1; break; + case 1: /*LT*/ + if (fpsw == FP_C0) cond=1; break; + case 2: /*LE*/ + if (fpsw == FP_C3 || + fpsw == FP_C0) cond=1; break; + case 3: /*UNORD*/ + if (fpsw ==(FP_C3|FP_C2|FP_C0)) cond=1; break; + case 4: /*NEQ*/ + if (!(fpsw == FP_C3)) cond=1; break; + case 5: /*NLT*/ + if (!(fpsw == FP_C0)) cond=1; break; + case 6: /*NLE*/ + if (!(fpsw == FP_C3 || + fpsw == FP_C0)) cond=1; break; + case 7: /*ORD*/ + if (!(fpsw ==(FP_C3|FP_C2|FP_C0))) cond=1; break; + default: + goto invalid_opcode; + } + /* set result bitnask */ + *dst = (cond) ? 0xFFFFFFFFULL : 0; + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F C2 cmpss */ + + if (eip[0] == 0xF3 && eip[1] == 0x0F && eip[2] == 0x7E) { + /* movq XMM/mem to XMM 64bit */ + /* !! MMX enabled CPUs doesn't come here !! */ + u8 xdst, modrm; + modrm = eip[3]; + EMU_COUNT_SSE2(eip); + eip += 4; /* skips all the opcodes */ + + xdst = (modrm >> 3) & 7; + dst = XMMREG_ADDR(xdst); + if ((modrm & 0xC0) == 0xC0) { /* register to register */ + src = XMMREG_ADDR(modrm & 7); + } else { + src = modrm_address(regs, &eip, 1, modrm); + /* we must verify that src is valid for this task */ + if (!access_ok(VERIFY_READ, (void *)src, 8)) { + do_general_protection(regs, error_code); + return; + } + dst[2] = 0; /* xmm<-mem clear MSB */ + dst[3] = 0; + } + + dst[0] = src[0]; + dst[1] = src[1]; + + regs->ip = (u32)eip; + goto sse_return; + } /* F3 0F 7E movq xmm<-xmm/mem */ + + /* if we came here, no emulatable opcode found */ + if (sse_iter == 0) { + /* if this was first time, it was an invalid opcode */ + goto invalid_opcode; + } else { + /* we did emulate something. + * no emulatable thing found, so return */ + return; + } + + /* "return;" of SSE emulation code comes here. */ + /* SSE instructions come in series, so emulate them as + * far as we can. */ +sse_return: + sse_iter++; + goto sse_again; + +#endif /*CONFIG_CPU_EMU_SSE2 }*/ + /* it's a case we can't handle. Unknown opcode or too many prefixes. */ invalid_opcode: #ifdef CONFIG_CPU_EMU486_DEBUG - printk(KERN_DEBUG "do_invalid_op() : invalid opcode detected pid %d(%s) @%p : %02x %02x >%02x %02x %02x %02x %02x...\n", current->pid, current->comm, eip, eip[-2], eip[-1], eip[0], eip[1], eip[2], eip[3], eip[4]); + eip = (u8 *)regs->ip; /* reset */ + printk(KERN_NOTICE "do_invalid_op() : invalid opcode detected pid %d(%s) @%p : %02x %02x >%02x %02x %02x %02x %02x...", current->pid, current->comm, eip, eip[-2], eip[-1], eip[0], eip[1], eip[2], eip[3], eip[4]); + print_vma_addr(KERN_CONT " in ", regs->ip); + printk(KERN_CONT "\n"); #endif current->thread.error_code = error_code; current->thread.trap_nr = X86_TRAP_UD; if (notify_die(DIE_TRAP, "invalid operand", regs, error_code, X86_TRAP_UD, SIGILL) == NOTIFY_STOP) return;