1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 #include <linux/stringify.h>
28 #include <asm/debugreg.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
165 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
166 #define NoWrite ((u64)1 << 45) /* No writeback */
167 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
168 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
169 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
170 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
171 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
172 #define NearBranch ((u64)1 << 52) /* Near branches */
173 #define No16 ((u64)1 << 53) /* No 16 bit operand */
174 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
176 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
178 #define X2(x...) x, x
179 #define X3(x...) X2(x), x
180 #define X4(x...) X2(x), X2(x)
181 #define X5(x...) X4(x), x
182 #define X6(x...) X4(x), X2(x)
183 #define X7(x...) X4(x), X3(x)
184 #define X8(x...) X4(x), X4(x)
185 #define X16(x...) X8(x), X8(x)
187 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
188 #define FASTOP_SIZE 8
191 * fastop functions have a special calling convention:
196 * flags: rflags (in/out)
197 * ex: rsi (in:fastop pointer, out:zero if exception)
199 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
200 * different operand sizes can be reached by calculation, rather than a jump
201 * table (which would be bigger than the code).
203 * fastop functions are declared as taking a never-defined fastop parameter,
204 * so they can't be called from C directly.
213 int (*execute)(struct x86_emulate_ctxt *ctxt);
214 const struct opcode *group;
215 const struct group_dual *gdual;
216 const struct gprefix *gprefix;
217 const struct escape *esc;
218 const struct instr_dual *idual;
219 const struct mode_dual *mdual;
220 void (*fastop)(struct fastop *fake);
222 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
226 struct opcode mod012[8];
227 struct opcode mod3[8];
231 struct opcode pfx_no;
232 struct opcode pfx_66;
233 struct opcode pfx_f2;
234 struct opcode pfx_f3;
239 struct opcode high[64];
243 struct opcode mod012;
248 struct opcode mode32;
249 struct opcode mode64;
252 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
254 enum x86_transfer_type {
256 X86_TRANSFER_CALL_JMP,
258 X86_TRANSFER_TASK_SWITCH,
261 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
263 if (!(ctxt->regs_valid & (1 << nr))) {
264 ctxt->regs_valid |= 1 << nr;
265 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
267 return ctxt->_regs[nr];
270 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
272 ctxt->regs_valid |= 1 << nr;
273 ctxt->regs_dirty |= 1 << nr;
274 return &ctxt->_regs[nr];
277 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
280 return reg_write(ctxt, nr);
283 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
287 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
288 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
291 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
293 ctxt->regs_dirty = 0;
294 ctxt->regs_valid = 0;
298 * These EFLAGS bits are restored from saved value during emulation, and
299 * any changes are written back to the saved value after emulation.
301 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
302 X86_EFLAGS_PF|X86_EFLAGS_CF)
310 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
312 #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
313 #define FOP_RET "ret \n\t"
315 #define FOP_START(op) \
316 extern void em_##op(struct fastop *fake); \
317 asm(".pushsection .text, \"ax\" \n\t" \
318 ".global em_" #op " \n\t" \
325 #define FOPNOP() FOP_ALIGN FOP_RET
327 #define FOP1E(op, dst) \
328 FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
330 #define FOP1EEX(op, dst) \
331 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
333 #define FASTOP1(op) \
338 ON64(FOP1E(op##q, rax)) \
341 /* 1-operand, using src2 (for MUL/DIV r/m) */
342 #define FASTOP1SRC2(op, name) \
347 ON64(FOP1E(op, rcx)) \
350 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
351 #define FASTOP1SRC2EX(op, name) \
356 ON64(FOP1EEX(op, rcx)) \
359 #define FOP2E(op, dst, src) \
360 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
362 #define FASTOP2(op) \
364 FOP2E(op##b, al, dl) \
365 FOP2E(op##w, ax, dx) \
366 FOP2E(op##l, eax, edx) \
367 ON64(FOP2E(op##q, rax, rdx)) \
370 /* 2 operand, word only */
371 #define FASTOP2W(op) \
374 FOP2E(op##w, ax, dx) \
375 FOP2E(op##l, eax, edx) \
376 ON64(FOP2E(op##q, rax, rdx)) \
379 /* 2 operand, src is CL */
380 #define FASTOP2CL(op) \
382 FOP2E(op##b, al, cl) \
383 FOP2E(op##w, ax, cl) \
384 FOP2E(op##l, eax, cl) \
385 ON64(FOP2E(op##q, rax, cl)) \
388 /* 2 operand, src and dest are reversed */
389 #define FASTOP2R(op, name) \
391 FOP2E(op##b, dl, al) \
392 FOP2E(op##w, dx, ax) \
393 FOP2E(op##l, edx, eax) \
394 ON64(FOP2E(op##q, rdx, rax)) \
397 #define FOP3E(op, dst, src, src2) \
398 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
400 /* 3-operand, word-only, src2=cl */
401 #define FASTOP3WCL(op) \
404 FOP3E(op##w, ax, dx, cl) \
405 FOP3E(op##l, eax, edx, cl) \
406 ON64(FOP3E(op##q, rax, rdx, cl)) \
409 /* Special case for SETcc - 1 instruction per cc */
410 #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
412 asm(".global kvm_fastop_exception \n"
413 "kvm_fastop_exception: xor %esi, %esi; ret");
434 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
437 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
438 enum x86_intercept intercept,
439 enum x86_intercept_stage stage)
441 struct x86_instruction_info info = {
442 .intercept = intercept,
443 .rep_prefix = ctxt->rep_prefix,
444 .modrm_mod = ctxt->modrm_mod,
445 .modrm_reg = ctxt->modrm_reg,
446 .modrm_rm = ctxt->modrm_rm,
447 .src_val = ctxt->src.val64,
448 .dst_val = ctxt->dst.val64,
449 .src_bytes = ctxt->src.bytes,
450 .dst_bytes = ctxt->dst.bytes,
451 .ad_bytes = ctxt->ad_bytes,
452 .next_rip = ctxt->eip,
455 return ctxt->ops->intercept(ctxt, &info, stage);
458 static void assign_masked(ulong *dest, ulong src, ulong mask)
460 *dest = (*dest & ~mask) | (src & mask);
463 static void assign_register(unsigned long *reg, u64 val, int bytes)
465 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
468 *(u8 *)reg = (u8)val;
471 *(u16 *)reg = (u16)val;
475 break; /* 64b: zero-extend */
482 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
484 return (1UL << (ctxt->ad_bytes << 3)) - 1;
487 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
490 struct desc_struct ss;
492 if (ctxt->mode == X86EMUL_MODE_PROT64)
494 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
495 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
498 static int stack_size(struct x86_emulate_ctxt *ctxt)
500 return (__fls(stack_mask(ctxt)) + 1) >> 3;
503 /* Access/update address held in a register, based on addressing mode. */
504 static inline unsigned long
505 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
507 if (ctxt->ad_bytes == sizeof(unsigned long))
510 return reg & ad_mask(ctxt);
513 static inline unsigned long
514 register_address(struct x86_emulate_ctxt *ctxt, int reg)
516 return address_mask(ctxt, reg_read(ctxt, reg));
519 static void masked_increment(ulong *reg, ulong mask, int inc)
521 assign_masked(reg, *reg + inc, mask);
525 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
527 ulong *preg = reg_rmw(ctxt, reg);
529 assign_register(preg, *preg + inc, ctxt->ad_bytes);
532 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
534 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
537 static u32 desc_limit_scaled(struct desc_struct *desc)
539 u32 limit = get_desc_limit(desc);
541 return desc->g ? (limit << 12) | 0xfff : limit;
544 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
546 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
549 return ctxt->ops->get_cached_segment_base(ctxt, seg);
552 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
553 u32 error, bool valid)
556 ctxt->exception.vector = vec;
557 ctxt->exception.error_code = error;
558 ctxt->exception.error_code_valid = valid;
559 return X86EMUL_PROPAGATE_FAULT;
562 static int emulate_db(struct x86_emulate_ctxt *ctxt)
564 return emulate_exception(ctxt, DB_VECTOR, 0, false);
567 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
569 return emulate_exception(ctxt, GP_VECTOR, err, true);
572 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
574 return emulate_exception(ctxt, SS_VECTOR, err, true);
577 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
579 return emulate_exception(ctxt, UD_VECTOR, 0, false);
582 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
584 return emulate_exception(ctxt, TS_VECTOR, err, true);
587 static int emulate_de(struct x86_emulate_ctxt *ctxt)
589 return emulate_exception(ctxt, DE_VECTOR, 0, false);
592 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
594 return emulate_exception(ctxt, NM_VECTOR, 0, false);
597 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
600 struct desc_struct desc;
602 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
606 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
611 struct desc_struct desc;
613 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
614 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
618 * x86 defines three classes of vector instructions: explicitly
619 * aligned, explicitly unaligned, and the rest, which change behaviour
620 * depending on whether they're AVX encoded or not.
622 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
623 * subject to the same check.
625 static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
627 if (likely(size < 16))
630 if (ctxt->d & Aligned)
632 else if (ctxt->d & Unaligned)
634 else if (ctxt->d & Avx)
640 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
641 struct segmented_address addr,
642 unsigned *max_size, unsigned size,
643 bool write, bool fetch,
644 enum x86emul_mode mode, ulong *linear)
646 struct desc_struct desc;
652 la = seg_base(ctxt, addr.seg) + addr.ea;
655 case X86EMUL_MODE_PROT64:
657 if (is_noncanonical_address(la))
660 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
661 if (size > *max_size)
665 *linear = la = (u32)la;
666 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
670 /* code segment in protected mode or read-only data segment */
671 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
672 || !(desc.type & 2)) && write)
674 /* unreadable code segment */
675 if (!fetch && (desc.type & 8) && !(desc.type & 2))
677 lim = desc_limit_scaled(&desc);
678 if (!(desc.type & 8) && (desc.type & 4)) {
679 /* expand-down segment */
682 lim = desc.d ? 0xffffffff : 0xffff;
686 if (lim == 0xffffffff)
689 *max_size = (u64)lim + 1 - addr.ea;
690 if (size > *max_size)
695 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
696 return emulate_gp(ctxt, 0);
697 return X86EMUL_CONTINUE;
699 if (addr.seg == VCPU_SREG_SS)
700 return emulate_ss(ctxt, 0);
702 return emulate_gp(ctxt, 0);
705 static int linearize(struct x86_emulate_ctxt *ctxt,
706 struct segmented_address addr,
707 unsigned size, bool write,
711 return __linearize(ctxt, addr, &max_size, size, write, false,
715 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
716 enum x86emul_mode mode)
721 struct segmented_address addr = { .seg = VCPU_SREG_CS,
724 if (ctxt->op_bytes != sizeof(unsigned long))
725 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
726 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
727 if (rc == X86EMUL_CONTINUE)
728 ctxt->_eip = addr.ea;
732 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
734 return assign_eip(ctxt, dst, ctxt->mode);
737 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
738 const struct desc_struct *cs_desc)
740 enum x86emul_mode mode = ctxt->mode;
744 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
748 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
750 mode = X86EMUL_MODE_PROT64;
752 mode = X86EMUL_MODE_PROT32; /* temporary value */
755 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
756 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
757 rc = assign_eip(ctxt, dst, mode);
758 if (rc == X86EMUL_CONTINUE)
763 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
765 return assign_eip_near(ctxt, ctxt->_eip + rel);
768 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
769 struct segmented_address addr,
776 rc = linearize(ctxt, addr, size, false, &linear);
777 if (rc != X86EMUL_CONTINUE)
779 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
783 * Prefetch the remaining bytes of the instruction without crossing page
784 * boundary if they are not in fetch_cache yet.
786 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
789 unsigned size, max_size;
790 unsigned long linear;
791 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
792 struct segmented_address addr = { .seg = VCPU_SREG_CS,
793 .ea = ctxt->eip + cur_size };
796 * We do not know exactly how many bytes will be needed, and
797 * __linearize is expensive, so fetch as much as possible. We
798 * just have to avoid going beyond the 15 byte limit, the end
799 * of the segment, or the end of the page.
801 * __linearize is called with size 0 so that it does not do any
802 * boundary check itself. Instead, we use max_size to check
805 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
807 if (unlikely(rc != X86EMUL_CONTINUE))
810 size = min_t(unsigned, 15UL ^ cur_size, max_size);
811 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
814 * One instruction can only straddle two pages,
815 * and one has been loaded at the beginning of
816 * x86_decode_insn. So, if not enough bytes
817 * still, we must have hit the 15-byte boundary.
819 if (unlikely(size < op_size))
820 return emulate_gp(ctxt, 0);
822 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
823 size, &ctxt->exception);
824 if (unlikely(rc != X86EMUL_CONTINUE))
826 ctxt->fetch.end += size;
827 return X86EMUL_CONTINUE;
830 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
833 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
835 if (unlikely(done_size < size))
836 return __do_insn_fetch_bytes(ctxt, size - done_size);
838 return X86EMUL_CONTINUE;
841 /* Fetch next part of the instruction being emulated. */
842 #define insn_fetch(_type, _ctxt) \
845 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
846 if (rc != X86EMUL_CONTINUE) \
848 ctxt->_eip += sizeof(_type); \
849 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
850 ctxt->fetch.ptr += sizeof(_type); \
854 #define insn_fetch_arr(_arr, _size, _ctxt) \
856 rc = do_insn_fetch_bytes(_ctxt, _size); \
857 if (rc != X86EMUL_CONTINUE) \
859 ctxt->_eip += (_size); \
860 memcpy(_arr, ctxt->fetch.ptr, _size); \
861 ctxt->fetch.ptr += (_size); \
865 * Given the 'reg' portion of a ModRM byte, and a register block, return a
866 * pointer into the block that addresses the relevant register.
867 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
869 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
873 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
875 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
876 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
878 p = reg_rmw(ctxt, modrm_reg);
882 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
883 struct segmented_address addr,
884 u16 *size, unsigned long *address, int op_bytes)
891 rc = segmented_read_std(ctxt, addr, size, 2);
892 if (rc != X86EMUL_CONTINUE)
895 rc = segmented_read_std(ctxt, addr, address, op_bytes);
909 FASTOP1SRC2(mul, mul_ex);
910 FASTOP1SRC2(imul, imul_ex);
911 FASTOP1SRC2EX(div, div_ex);
912 FASTOP1SRC2EX(idiv, idiv_ex);
941 FASTOP2R(cmp, cmp_r);
943 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
945 /* If src is zero, do not writeback, but update flags */
946 if (ctxt->src.val == 0)
947 ctxt->dst.type = OP_NONE;
948 return fastop(ctxt, em_bsf);
951 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
953 /* If src is zero, do not writeback, but update flags */
954 if (ctxt->src.val == 0)
955 ctxt->dst.type = OP_NONE;
956 return fastop(ctxt, em_bsr);
959 static u8 test_cc(unsigned int condition, unsigned long flags)
962 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
964 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
965 asm("push %[flags]; popf; call *%[fastop]"
966 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
970 static void fetch_register_operand(struct operand *op)
974 op->val = *(u8 *)op->addr.reg;
977 op->val = *(u16 *)op->addr.reg;
980 op->val = *(u32 *)op->addr.reg;
983 op->val = *(u64 *)op->addr.reg;
988 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
990 ctxt->ops->get_fpu(ctxt);
992 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
993 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
994 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
995 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
996 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
997 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
998 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
999 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1000 #ifdef CONFIG_X86_64
1001 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1002 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1003 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1004 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1005 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1006 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1007 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1008 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1012 ctxt->ops->put_fpu(ctxt);
1015 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1018 ctxt->ops->get_fpu(ctxt);
1020 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1021 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1022 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1023 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1024 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1025 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1026 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1027 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1028 #ifdef CONFIG_X86_64
1029 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1030 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1031 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1032 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1033 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1034 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1035 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1036 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1040 ctxt->ops->put_fpu(ctxt);
1043 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1045 ctxt->ops->get_fpu(ctxt);
1047 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1048 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1049 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1050 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1051 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1052 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1053 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1054 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1057 ctxt->ops->put_fpu(ctxt);
1060 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1062 ctxt->ops->get_fpu(ctxt);
1064 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1065 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1066 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1067 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1068 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1069 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1070 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1071 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1074 ctxt->ops->put_fpu(ctxt);
1077 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1079 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1080 return emulate_nm(ctxt);
1082 ctxt->ops->get_fpu(ctxt);
1083 asm volatile("fninit");
1084 ctxt->ops->put_fpu(ctxt);
1085 return X86EMUL_CONTINUE;
1088 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1092 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1093 return emulate_nm(ctxt);
1095 ctxt->ops->get_fpu(ctxt);
1096 asm volatile("fnstcw %0": "+m"(fcw));
1097 ctxt->ops->put_fpu(ctxt);
1099 ctxt->dst.val = fcw;
1101 return X86EMUL_CONTINUE;
1104 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1108 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1109 return emulate_nm(ctxt);
1111 ctxt->ops->get_fpu(ctxt);
1112 asm volatile("fnstsw %0": "+m"(fsw));
1113 ctxt->ops->put_fpu(ctxt);
1115 ctxt->dst.val = fsw;
1117 return X86EMUL_CONTINUE;
1120 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1123 unsigned reg = ctxt->modrm_reg;
1125 if (!(ctxt->d & ModRM))
1126 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1128 if (ctxt->d & Sse) {
1132 read_sse_reg(ctxt, &op->vec_val, reg);
1135 if (ctxt->d & Mmx) {
1144 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1145 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1147 fetch_register_operand(op);
1148 op->orig_val = op->val;
1151 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1153 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1154 ctxt->modrm_seg = VCPU_SREG_SS;
1157 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1161 int index_reg, base_reg, scale;
1162 int rc = X86EMUL_CONTINUE;
1165 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1166 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1167 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1169 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1170 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1171 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1172 ctxt->modrm_seg = VCPU_SREG_DS;
1174 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1176 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1177 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1179 if (ctxt->d & Sse) {
1182 op->addr.xmm = ctxt->modrm_rm;
1183 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1186 if (ctxt->d & Mmx) {
1189 op->addr.mm = ctxt->modrm_rm & 7;
1192 fetch_register_operand(op);
1198 if (ctxt->ad_bytes == 2) {
1199 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1200 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1201 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1202 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1204 /* 16-bit ModR/M decode. */
1205 switch (ctxt->modrm_mod) {
1207 if (ctxt->modrm_rm == 6)
1208 modrm_ea += insn_fetch(u16, ctxt);
1211 modrm_ea += insn_fetch(s8, ctxt);
1214 modrm_ea += insn_fetch(u16, ctxt);
1217 switch (ctxt->modrm_rm) {
1219 modrm_ea += bx + si;
1222 modrm_ea += bx + di;
1225 modrm_ea += bp + si;
1228 modrm_ea += bp + di;
1237 if (ctxt->modrm_mod != 0)
1244 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1245 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1246 ctxt->modrm_seg = VCPU_SREG_SS;
1247 modrm_ea = (u16)modrm_ea;
1249 /* 32/64-bit ModR/M decode. */
1250 if ((ctxt->modrm_rm & 7) == 4) {
1251 sib = insn_fetch(u8, ctxt);
1252 index_reg |= (sib >> 3) & 7;
1253 base_reg |= sib & 7;
1256 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1257 modrm_ea += insn_fetch(s32, ctxt);
1259 modrm_ea += reg_read(ctxt, base_reg);
1260 adjust_modrm_seg(ctxt, base_reg);
1261 /* Increment ESP on POP [ESP] */
1262 if ((ctxt->d & IncSP) &&
1263 base_reg == VCPU_REGS_RSP)
1264 modrm_ea += ctxt->op_bytes;
1267 modrm_ea += reg_read(ctxt, index_reg) << scale;
1268 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1269 modrm_ea += insn_fetch(s32, ctxt);
1270 if (ctxt->mode == X86EMUL_MODE_PROT64)
1271 ctxt->rip_relative = 1;
1273 base_reg = ctxt->modrm_rm;
1274 modrm_ea += reg_read(ctxt, base_reg);
1275 adjust_modrm_seg(ctxt, base_reg);
1277 switch (ctxt->modrm_mod) {
1279 modrm_ea += insn_fetch(s8, ctxt);
1282 modrm_ea += insn_fetch(s32, ctxt);
1286 op->addr.mem.ea = modrm_ea;
1287 if (ctxt->ad_bytes != 8)
1288 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1294 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1297 int rc = X86EMUL_CONTINUE;
1300 switch (ctxt->ad_bytes) {
1302 op->addr.mem.ea = insn_fetch(u16, ctxt);
1305 op->addr.mem.ea = insn_fetch(u32, ctxt);
1308 op->addr.mem.ea = insn_fetch(u64, ctxt);
1315 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1319 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1320 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1322 if (ctxt->src.bytes == 2)
1323 sv = (s16)ctxt->src.val & (s16)mask;
1324 else if (ctxt->src.bytes == 4)
1325 sv = (s32)ctxt->src.val & (s32)mask;
1327 sv = (s64)ctxt->src.val & (s64)mask;
1329 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1330 ctxt->dst.addr.mem.ea + (sv >> 3));
1333 /* only subword offset */
1334 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1337 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1338 unsigned long addr, void *dest, unsigned size)
1341 struct read_cache *mc = &ctxt->mem_read;
1343 if (mc->pos < mc->end)
1346 WARN_ON((mc->end + size) >= sizeof(mc->data));
1348 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1350 if (rc != X86EMUL_CONTINUE)
1356 memcpy(dest, mc->data + mc->pos, size);
1358 return X86EMUL_CONTINUE;
1361 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1362 struct segmented_address addr,
1369 rc = linearize(ctxt, addr, size, false, &linear);
1370 if (rc != X86EMUL_CONTINUE)
1372 return read_emulated(ctxt, linear, data, size);
1375 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1376 struct segmented_address addr,
1383 rc = linearize(ctxt, addr, size, true, &linear);
1384 if (rc != X86EMUL_CONTINUE)
1386 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1390 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1391 struct segmented_address addr,
1392 const void *orig_data, const void *data,
1398 rc = linearize(ctxt, addr, size, true, &linear);
1399 if (rc != X86EMUL_CONTINUE)
1401 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1402 size, &ctxt->exception);
1405 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1406 unsigned int size, unsigned short port,
1409 struct read_cache *rc = &ctxt->io_read;
1411 if (rc->pos == rc->end) { /* refill pio read ahead */
1412 unsigned int in_page, n;
1413 unsigned int count = ctxt->rep_prefix ?
1414 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1415 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1416 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1417 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1418 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1421 rc->pos = rc->end = 0;
1422 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1427 if (ctxt->rep_prefix && (ctxt->d & String) &&
1428 !(ctxt->eflags & X86_EFLAGS_DF)) {
1429 ctxt->dst.data = rc->data + rc->pos;
1430 ctxt->dst.type = OP_MEM_STR;
1431 ctxt->dst.count = (rc->end - rc->pos) / size;
1434 memcpy(dest, rc->data + rc->pos, size);
1440 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1441 u16 index, struct desc_struct *desc)
1446 ctxt->ops->get_idt(ctxt, &dt);
1448 if (dt.size < index * 8 + 7)
1449 return emulate_gp(ctxt, index << 3 | 0x2);
1451 addr = dt.address + index * 8;
1452 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1456 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1457 u16 selector, struct desc_ptr *dt)
1459 const struct x86_emulate_ops *ops = ctxt->ops;
1462 if (selector & 1 << 2) {
1463 struct desc_struct desc;
1466 memset (dt, 0, sizeof *dt);
1467 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1471 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1472 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1474 ops->get_gdt(ctxt, dt);
1477 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1478 u16 selector, ulong *desc_addr_p)
1481 u16 index = selector >> 3;
1484 get_descriptor_table_ptr(ctxt, selector, &dt);
1486 if (dt.size < index * 8 + 7)
1487 return emulate_gp(ctxt, selector & 0xfffc);
1489 addr = dt.address + index * 8;
1491 #ifdef CONFIG_X86_64
1492 if (addr >> 32 != 0) {
1495 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1496 if (!(efer & EFER_LMA))
1501 *desc_addr_p = addr;
1502 return X86EMUL_CONTINUE;
1505 /* allowed just for 8 bytes segments */
1506 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1507 u16 selector, struct desc_struct *desc,
1512 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1513 if (rc != X86EMUL_CONTINUE)
1516 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1520 /* allowed just for 8 bytes segments */
1521 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1522 u16 selector, struct desc_struct *desc)
1527 rc = get_descriptor_ptr(ctxt, selector, &addr);
1528 if (rc != X86EMUL_CONTINUE)
1531 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1535 /* Does not support long mode */
1536 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1537 u16 selector, int seg, u8 cpl,
1538 enum x86_transfer_type transfer,
1539 struct desc_struct *desc)
1541 struct desc_struct seg_desc, old_desc;
1543 unsigned err_vec = GP_VECTOR;
1545 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1551 memset(&seg_desc, 0, sizeof seg_desc);
1553 if (ctxt->mode == X86EMUL_MODE_REAL) {
1554 /* set real mode segment descriptor (keep limit etc. for
1556 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1557 set_desc_base(&seg_desc, selector << 4);
1559 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1560 /* VM86 needs a clean new segment descriptor */
1561 set_desc_base(&seg_desc, selector << 4);
1562 set_desc_limit(&seg_desc, 0xffff);
1572 /* NULL selector is not valid for TR, CS and SS (except for long mode) */
1573 if ((seg == VCPU_SREG_CS
1574 || (seg == VCPU_SREG_SS
1575 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
1576 || seg == VCPU_SREG_TR)
1580 /* TR should be in GDT only */
1581 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1584 if (null_selector) /* for NULL selector skip all following checks */
1587 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1588 if (ret != X86EMUL_CONTINUE)
1591 err_code = selector & 0xfffc;
1592 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1595 /* can't load system descriptor into segment selector */
1596 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1597 if (transfer == X86_TRANSFER_CALL_JMP)
1598 return X86EMUL_UNHANDLEABLE;
1603 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1612 * segment is not a writable data segment or segment
1613 * selector's RPL != CPL or segment selector's RPL != CPL
1615 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1619 if (!(seg_desc.type & 8))
1622 if (seg_desc.type & 4) {
1628 if (rpl > cpl || dpl != cpl)
1631 /* in long-mode d/b must be clear if l is set */
1632 if (seg_desc.d && seg_desc.l) {
1635 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1636 if (efer & EFER_LMA)
1640 /* CS(RPL) <- CPL */
1641 selector = (selector & 0xfffc) | cpl;
1644 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1646 old_desc = seg_desc;
1647 seg_desc.type |= 2; /* busy */
1648 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1649 sizeof(seg_desc), &ctxt->exception);
1650 if (ret != X86EMUL_CONTINUE)
1653 case VCPU_SREG_LDTR:
1654 if (seg_desc.s || seg_desc.type != 2)
1657 default: /* DS, ES, FS, or GS */
1659 * segment is not a data or readable code segment or
1660 * ((segment is a data or nonconforming code segment)
1661 * and (both RPL and CPL > DPL))
1663 if ((seg_desc.type & 0xa) == 0x8 ||
1664 (((seg_desc.type & 0xc) != 0xc) &&
1665 (rpl > dpl && cpl > dpl)))
1671 /* mark segment as accessed */
1672 if (!(seg_desc.type & 1)) {
1674 ret = write_segment_descriptor(ctxt, selector,
1676 if (ret != X86EMUL_CONTINUE)
1679 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1680 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1681 sizeof(base3), &ctxt->exception);
1682 if (ret != X86EMUL_CONTINUE)
1684 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1685 ((u64)base3 << 32)))
1686 return emulate_gp(ctxt, 0);
1689 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1692 return X86EMUL_CONTINUE;
1694 return emulate_exception(ctxt, err_vec, err_code, true);
1697 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1698 u16 selector, int seg)
1700 u8 cpl = ctxt->ops->cpl(ctxt);
1701 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1702 X86_TRANSFER_NONE, NULL);
1705 static void write_register_operand(struct operand *op)
1707 return assign_register(op->addr.reg, op->val, op->bytes);
1710 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1714 write_register_operand(op);
1717 if (ctxt->lock_prefix)
1718 return segmented_cmpxchg(ctxt,
1724 return segmented_write(ctxt,
1730 return segmented_write(ctxt,
1733 op->bytes * op->count);
1736 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1739 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1747 return X86EMUL_CONTINUE;
1750 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1752 struct segmented_address addr;
1754 rsp_increment(ctxt, -bytes);
1755 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1756 addr.seg = VCPU_SREG_SS;
1758 return segmented_write(ctxt, addr, data, bytes);
1761 static int em_push(struct x86_emulate_ctxt *ctxt)
1763 /* Disable writeback. */
1764 ctxt->dst.type = OP_NONE;
1765 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1768 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1769 void *dest, int len)
1772 struct segmented_address addr;
1774 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1775 addr.seg = VCPU_SREG_SS;
1776 rc = segmented_read(ctxt, addr, dest, len);
1777 if (rc != X86EMUL_CONTINUE)
1780 rsp_increment(ctxt, len);
1784 static int em_pop(struct x86_emulate_ctxt *ctxt)
1786 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1789 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1790 void *dest, int len)
1793 unsigned long val, change_mask;
1794 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1795 int cpl = ctxt->ops->cpl(ctxt);
1797 rc = emulate_pop(ctxt, &val, len);
1798 if (rc != X86EMUL_CONTINUE)
1801 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1802 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1803 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1804 X86_EFLAGS_AC | X86_EFLAGS_ID;
1806 switch(ctxt->mode) {
1807 case X86EMUL_MODE_PROT64:
1808 case X86EMUL_MODE_PROT32:
1809 case X86EMUL_MODE_PROT16:
1811 change_mask |= X86_EFLAGS_IOPL;
1813 change_mask |= X86_EFLAGS_IF;
1815 case X86EMUL_MODE_VM86:
1817 return emulate_gp(ctxt, 0);
1818 change_mask |= X86_EFLAGS_IF;
1820 default: /* real mode */
1821 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1825 *(unsigned long *)dest =
1826 (ctxt->eflags & ~change_mask) | (val & change_mask);
1831 static int em_popf(struct x86_emulate_ctxt *ctxt)
1833 ctxt->dst.type = OP_REG;
1834 ctxt->dst.addr.reg = &ctxt->eflags;
1835 ctxt->dst.bytes = ctxt->op_bytes;
1836 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1839 static int em_enter(struct x86_emulate_ctxt *ctxt)
1842 unsigned frame_size = ctxt->src.val;
1843 unsigned nesting_level = ctxt->src2.val & 31;
1847 return X86EMUL_UNHANDLEABLE;
1849 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1850 rc = push(ctxt, &rbp, stack_size(ctxt));
1851 if (rc != X86EMUL_CONTINUE)
1853 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1855 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1856 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1858 return X86EMUL_CONTINUE;
1861 static int em_leave(struct x86_emulate_ctxt *ctxt)
1863 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1865 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1868 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1870 int seg = ctxt->src2.val;
1872 ctxt->src.val = get_segment_selector(ctxt, seg);
1873 if (ctxt->op_bytes == 4) {
1874 rsp_increment(ctxt, -2);
1878 return em_push(ctxt);
1881 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1883 int seg = ctxt->src2.val;
1884 unsigned long selector;
1887 rc = emulate_pop(ctxt, &selector, 2);
1888 if (rc != X86EMUL_CONTINUE)
1891 if (ctxt->modrm_reg == VCPU_SREG_SS)
1892 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1893 if (ctxt->op_bytes > 2)
1894 rsp_increment(ctxt, ctxt->op_bytes - 2);
1896 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1900 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1902 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1903 int rc = X86EMUL_CONTINUE;
1904 int reg = VCPU_REGS_RAX;
1906 while (reg <= VCPU_REGS_RDI) {
1907 (reg == VCPU_REGS_RSP) ?
1908 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1911 if (rc != X86EMUL_CONTINUE)
1920 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1922 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1923 return em_push(ctxt);
1926 static int em_popa(struct x86_emulate_ctxt *ctxt)
1928 int rc = X86EMUL_CONTINUE;
1929 int reg = VCPU_REGS_RDI;
1932 while (reg >= VCPU_REGS_RAX) {
1933 if (reg == VCPU_REGS_RSP) {
1934 rsp_increment(ctxt, ctxt->op_bytes);
1938 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1939 if (rc != X86EMUL_CONTINUE)
1941 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1947 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1949 const struct x86_emulate_ops *ops = ctxt->ops;
1956 /* TODO: Add limit checks */
1957 ctxt->src.val = ctxt->eflags;
1959 if (rc != X86EMUL_CONTINUE)
1962 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1964 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1966 if (rc != X86EMUL_CONTINUE)
1969 ctxt->src.val = ctxt->_eip;
1971 if (rc != X86EMUL_CONTINUE)
1974 ops->get_idt(ctxt, &dt);
1976 eip_addr = dt.address + (irq << 2);
1977 cs_addr = dt.address + (irq << 2) + 2;
1979 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
1980 if (rc != X86EMUL_CONTINUE)
1983 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
1984 if (rc != X86EMUL_CONTINUE)
1987 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
1988 if (rc != X86EMUL_CONTINUE)
1996 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2000 invalidate_registers(ctxt);
2001 rc = __emulate_int_real(ctxt, irq);
2002 if (rc == X86EMUL_CONTINUE)
2003 writeback_registers(ctxt);
2007 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2009 switch(ctxt->mode) {
2010 case X86EMUL_MODE_REAL:
2011 return __emulate_int_real(ctxt, irq);
2012 case X86EMUL_MODE_VM86:
2013 case X86EMUL_MODE_PROT16:
2014 case X86EMUL_MODE_PROT32:
2015 case X86EMUL_MODE_PROT64:
2017 /* Protected mode interrupts unimplemented yet */
2018 return X86EMUL_UNHANDLEABLE;
2022 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2024 int rc = X86EMUL_CONTINUE;
2025 unsigned long temp_eip = 0;
2026 unsigned long temp_eflags = 0;
2027 unsigned long cs = 0;
2028 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2029 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2030 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2031 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2032 X86_EFLAGS_AC | X86_EFLAGS_ID |
2034 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2037 /* TODO: Add stack limit check */
2039 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2041 if (rc != X86EMUL_CONTINUE)
2044 if (temp_eip & ~0xffff)
2045 return emulate_gp(ctxt, 0);
2047 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2049 if (rc != X86EMUL_CONTINUE)
2052 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2054 if (rc != X86EMUL_CONTINUE)
2057 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2059 if (rc != X86EMUL_CONTINUE)
2062 ctxt->_eip = temp_eip;
2064 if (ctxt->op_bytes == 4)
2065 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2066 else if (ctxt->op_bytes == 2) {
2067 ctxt->eflags &= ~0xffff;
2068 ctxt->eflags |= temp_eflags;
2071 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2072 ctxt->eflags |= X86_EFLAGS_FIXED;
2073 ctxt->ops->set_nmi_mask(ctxt, false);
2078 static int em_iret(struct x86_emulate_ctxt *ctxt)
2080 switch(ctxt->mode) {
2081 case X86EMUL_MODE_REAL:
2082 return emulate_iret_real(ctxt);
2083 case X86EMUL_MODE_VM86:
2084 case X86EMUL_MODE_PROT16:
2085 case X86EMUL_MODE_PROT32:
2086 case X86EMUL_MODE_PROT64:
2088 /* iret from protected mode unimplemented yet */
2089 return X86EMUL_UNHANDLEABLE;
2093 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2097 struct desc_struct new_desc;
2098 u8 cpl = ctxt->ops->cpl(ctxt);
2100 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2102 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2103 X86_TRANSFER_CALL_JMP,
2105 if (rc != X86EMUL_CONTINUE)
2108 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2109 /* Error handling is not implemented. */
2110 if (rc != X86EMUL_CONTINUE)
2111 return X86EMUL_UNHANDLEABLE;
2116 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2118 return assign_eip_near(ctxt, ctxt->src.val);
2121 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2126 old_eip = ctxt->_eip;
2127 rc = assign_eip_near(ctxt, ctxt->src.val);
2128 if (rc != X86EMUL_CONTINUE)
2130 ctxt->src.val = old_eip;
2135 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2137 u64 old = ctxt->dst.orig_val64;
2139 if (ctxt->dst.bytes == 16)
2140 return X86EMUL_UNHANDLEABLE;
2142 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2143 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2144 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2145 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2146 ctxt->eflags &= ~X86_EFLAGS_ZF;
2148 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2149 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2151 ctxt->eflags |= X86_EFLAGS_ZF;
2153 return X86EMUL_CONTINUE;
2156 static int em_ret(struct x86_emulate_ctxt *ctxt)
2161 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2162 if (rc != X86EMUL_CONTINUE)
2165 return assign_eip_near(ctxt, eip);
2168 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2171 unsigned long eip, cs;
2172 int cpl = ctxt->ops->cpl(ctxt);
2173 struct desc_struct new_desc;
2175 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2176 if (rc != X86EMUL_CONTINUE)
2178 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2179 if (rc != X86EMUL_CONTINUE)
2181 /* Outer-privilege level return is not implemented */
2182 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2183 return X86EMUL_UNHANDLEABLE;
2184 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2187 if (rc != X86EMUL_CONTINUE)
2189 rc = assign_eip_far(ctxt, eip, &new_desc);
2190 /* Error handling is not implemented. */
2191 if (rc != X86EMUL_CONTINUE)
2192 return X86EMUL_UNHANDLEABLE;
2197 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2201 rc = em_ret_far(ctxt);
2202 if (rc != X86EMUL_CONTINUE)
2204 rsp_increment(ctxt, ctxt->src.val);
2205 return X86EMUL_CONTINUE;
2208 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2210 /* Save real source value, then compare EAX against destination. */
2211 ctxt->dst.orig_val = ctxt->dst.val;
2212 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2213 ctxt->src.orig_val = ctxt->src.val;
2214 ctxt->src.val = ctxt->dst.orig_val;
2215 fastop(ctxt, em_cmp);
2217 if (ctxt->eflags & X86_EFLAGS_ZF) {
2218 /* Success: write back to memory; no update of EAX */
2219 ctxt->src.type = OP_NONE;
2220 ctxt->dst.val = ctxt->src.orig_val;
2222 /* Failure: write the value we saw to EAX. */
2223 ctxt->src.type = OP_REG;
2224 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2225 ctxt->src.val = ctxt->dst.orig_val;
2226 /* Create write-cycle to dest by writing the same value */
2227 ctxt->dst.val = ctxt->dst.orig_val;
2229 return X86EMUL_CONTINUE;
2232 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2234 int seg = ctxt->src2.val;
2238 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2240 rc = load_segment_descriptor(ctxt, sel, seg);
2241 if (rc != X86EMUL_CONTINUE)
2244 ctxt->dst.val = ctxt->src.val;
2248 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2250 u32 eax, ebx, ecx, edx;
2254 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2255 return edx & bit(X86_FEATURE_LM);
2258 #define GET_SMSTATE(type, smbase, offset) \
2261 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2263 if (r != X86EMUL_CONTINUE) \
2264 return X86EMUL_UNHANDLEABLE; \
2268 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2270 desc->g = (flags >> 23) & 1;
2271 desc->d = (flags >> 22) & 1;
2272 desc->l = (flags >> 21) & 1;
2273 desc->avl = (flags >> 20) & 1;
2274 desc->p = (flags >> 15) & 1;
2275 desc->dpl = (flags >> 13) & 3;
2276 desc->s = (flags >> 12) & 1;
2277 desc->type = (flags >> 8) & 15;
2280 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2282 struct desc_struct desc;
2286 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2289 offset = 0x7f84 + n * 12;
2291 offset = 0x7f2c + (n - 3) * 12;
2293 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2294 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2295 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2296 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2297 return X86EMUL_CONTINUE;
2300 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2302 struct desc_struct desc;
2307 offset = 0x7e00 + n * 16;
2309 selector = GET_SMSTATE(u16, smbase, offset);
2310 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2311 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2312 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2313 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2315 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2316 return X86EMUL_CONTINUE;
2319 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2325 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2326 * Then enable protected mode. However, PCID cannot be enabled
2327 * if EFER.LMA=0, so set it separately.
2329 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2331 return X86EMUL_UNHANDLEABLE;
2333 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2335 return X86EMUL_UNHANDLEABLE;
2337 if (cr4 & X86_CR4_PCIDE) {
2338 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2340 return X86EMUL_UNHANDLEABLE;
2343 return X86EMUL_CONTINUE;
2346 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2348 struct desc_struct desc;
2354 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2355 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2356 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2357 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2359 for (i = 0; i < 8; i++)
2360 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2362 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2363 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2364 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2365 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2367 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2368 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2369 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2370 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2371 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2373 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2374 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2375 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2376 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2377 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2379 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2380 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2381 ctxt->ops->set_gdt(ctxt, &dt);
2383 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2384 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2385 ctxt->ops->set_idt(ctxt, &dt);
2387 for (i = 0; i < 6; i++) {
2388 int r = rsm_load_seg_32(ctxt, smbase, i);
2389 if (r != X86EMUL_CONTINUE)
2393 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2395 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2397 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2400 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2402 struct desc_struct desc;
2409 for (i = 0; i < 16; i++)
2410 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2412 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2413 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2415 val = GET_SMSTATE(u32, smbase, 0x7f68);
2416 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2417 val = GET_SMSTATE(u32, smbase, 0x7f60);
2418 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2420 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2421 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2422 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2423 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2424 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2425 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2427 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2428 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2429 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2430 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2431 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2432 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2434 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2435 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2436 ctxt->ops->set_idt(ctxt, &dt);
2438 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2439 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2440 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2441 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2442 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2443 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2445 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2446 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2447 ctxt->ops->set_gdt(ctxt, &dt);
2449 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2450 if (r != X86EMUL_CONTINUE)
2453 for (i = 0; i < 6; i++) {
2454 r = rsm_load_seg_64(ctxt, smbase, i);
2455 if (r != X86EMUL_CONTINUE)
2459 return X86EMUL_CONTINUE;
2462 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2464 unsigned long cr0, cr4, efer;
2468 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2469 return emulate_ud(ctxt);
2472 * Get back to real mode, to prepare a safe state in which to load
2473 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2474 * supports long mode.
2476 cr4 = ctxt->ops->get_cr(ctxt, 4);
2477 if (emulator_has_longmode(ctxt)) {
2478 struct desc_struct cs_desc;
2480 /* Zero CR4.PCIDE before CR0.PG. */
2481 if (cr4 & X86_CR4_PCIDE) {
2482 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2483 cr4 &= ~X86_CR4_PCIDE;
2486 /* A 32-bit code segment is required to clear EFER.LMA. */
2487 memset(&cs_desc, 0, sizeof(cs_desc));
2489 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2490 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2493 /* For the 64-bit case, this will clear EFER.LMA. */
2494 cr0 = ctxt->ops->get_cr(ctxt, 0);
2495 if (cr0 & X86_CR0_PE)
2496 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2498 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2499 if (cr4 & X86_CR4_PAE)
2500 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2502 /* And finally go back to 32-bit mode. */
2504 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2506 smbase = ctxt->ops->get_smbase(ctxt);
2507 if (emulator_has_longmode(ctxt))
2508 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2510 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2512 if (ret != X86EMUL_CONTINUE) {
2513 /* FIXME: should triple fault */
2514 return X86EMUL_UNHANDLEABLE;
2517 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2518 ctxt->ops->set_nmi_mask(ctxt, false);
2520 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2521 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2522 return X86EMUL_CONTINUE;
2526 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2527 struct desc_struct *cs, struct desc_struct *ss)
2529 cs->l = 0; /* will be adjusted later */
2530 set_desc_base(cs, 0); /* flat segment */
2531 cs->g = 1; /* 4kb granularity */
2532 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2533 cs->type = 0x0b; /* Read, Execute, Accessed */
2535 cs->dpl = 0; /* will be adjusted later */
2540 set_desc_base(ss, 0); /* flat segment */
2541 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2542 ss->g = 1; /* 4kb granularity */
2544 ss->type = 0x03; /* Read/Write, Accessed */
2545 ss->d = 1; /* 32bit stack segment */
2552 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2554 u32 eax, ebx, ecx, edx;
2557 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2558 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2559 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2560 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2563 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2565 const struct x86_emulate_ops *ops = ctxt->ops;
2566 u32 eax, ebx, ecx, edx;
2569 * syscall should always be enabled in longmode - so only become
2570 * vendor specific (cpuid) if other modes are active...
2572 if (ctxt->mode == X86EMUL_MODE_PROT64)
2577 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2579 * Intel ("GenuineIntel")
2580 * remark: Intel CPUs only support "syscall" in 64bit
2581 * longmode. Also an 64bit guest with a
2582 * 32bit compat-app running will #UD !! While this
2583 * behaviour can be fixed (by emulating) into AMD
2584 * response - CPUs of AMD can't behave like Intel.
2586 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2587 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2588 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2591 /* AMD ("AuthenticAMD") */
2592 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2593 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2594 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2597 /* AMD ("AMDisbetter!") */
2598 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2599 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2600 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2603 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2607 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2609 const struct x86_emulate_ops *ops = ctxt->ops;
2610 struct desc_struct cs, ss;
2615 /* syscall is not available in real mode */
2616 if (ctxt->mode == X86EMUL_MODE_REAL ||
2617 ctxt->mode == X86EMUL_MODE_VM86)
2618 return emulate_ud(ctxt);
2620 if (!(em_syscall_is_enabled(ctxt)))
2621 return emulate_ud(ctxt);
2623 ops->get_msr(ctxt, MSR_EFER, &efer);
2624 setup_syscalls_segments(ctxt, &cs, &ss);
2626 if (!(efer & EFER_SCE))
2627 return emulate_ud(ctxt);
2629 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2631 cs_sel = (u16)(msr_data & 0xfffc);
2632 ss_sel = (u16)(msr_data + 8);
2634 if (efer & EFER_LMA) {
2638 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2639 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2641 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2642 if (efer & EFER_LMA) {
2643 #ifdef CONFIG_X86_64
2644 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2647 ctxt->mode == X86EMUL_MODE_PROT64 ?
2648 MSR_LSTAR : MSR_CSTAR, &msr_data);
2649 ctxt->_eip = msr_data;
2651 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2652 ctxt->eflags &= ~msr_data;
2653 ctxt->eflags |= X86_EFLAGS_FIXED;
2657 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2658 ctxt->_eip = (u32)msr_data;
2660 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2663 return X86EMUL_CONTINUE;
2666 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2668 const struct x86_emulate_ops *ops = ctxt->ops;
2669 struct desc_struct cs, ss;
2674 ops->get_msr(ctxt, MSR_EFER, &efer);
2675 /* inject #GP if in real mode */
2676 if (ctxt->mode == X86EMUL_MODE_REAL)
2677 return emulate_gp(ctxt, 0);
2680 * Not recognized on AMD in compat mode (but is recognized in legacy
2683 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2684 && !vendor_intel(ctxt))
2685 return emulate_ud(ctxt);
2687 /* sysenter/sysexit have not been tested in 64bit mode. */
2688 if (ctxt->mode == X86EMUL_MODE_PROT64)
2689 return X86EMUL_UNHANDLEABLE;
2691 setup_syscalls_segments(ctxt, &cs, &ss);
2693 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2694 if ((msr_data & 0xfffc) == 0x0)
2695 return emulate_gp(ctxt, 0);
2697 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2698 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2699 ss_sel = cs_sel + 8;
2700 if (efer & EFER_LMA) {
2705 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2706 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2708 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2709 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2711 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2712 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2715 return X86EMUL_CONTINUE;
2718 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2720 const struct x86_emulate_ops *ops = ctxt->ops;
2721 struct desc_struct cs, ss;
2722 u64 msr_data, rcx, rdx;
2724 u16 cs_sel = 0, ss_sel = 0;
2726 /* inject #GP if in real mode or Virtual 8086 mode */
2727 if (ctxt->mode == X86EMUL_MODE_REAL ||
2728 ctxt->mode == X86EMUL_MODE_VM86)
2729 return emulate_gp(ctxt, 0);
2731 setup_syscalls_segments(ctxt, &cs, &ss);
2733 if ((ctxt->rex_prefix & 0x8) != 0x0)
2734 usermode = X86EMUL_MODE_PROT64;
2736 usermode = X86EMUL_MODE_PROT32;
2738 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2739 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2743 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2745 case X86EMUL_MODE_PROT32:
2746 cs_sel = (u16)(msr_data + 16);
2747 if ((msr_data & 0xfffc) == 0x0)
2748 return emulate_gp(ctxt, 0);
2749 ss_sel = (u16)(msr_data + 24);
2753 case X86EMUL_MODE_PROT64:
2754 cs_sel = (u16)(msr_data + 32);
2755 if (msr_data == 0x0)
2756 return emulate_gp(ctxt, 0);
2757 ss_sel = cs_sel + 8;
2760 if (is_noncanonical_address(rcx) ||
2761 is_noncanonical_address(rdx))
2762 return emulate_gp(ctxt, 0);
2765 cs_sel |= SEGMENT_RPL_MASK;
2766 ss_sel |= SEGMENT_RPL_MASK;
2768 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2769 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2772 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2774 return X86EMUL_CONTINUE;
2777 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2780 if (ctxt->mode == X86EMUL_MODE_REAL)
2782 if (ctxt->mode == X86EMUL_MODE_VM86)
2784 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2785 return ctxt->ops->cpl(ctxt) > iopl;
2788 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2791 const struct x86_emulate_ops *ops = ctxt->ops;
2792 struct desc_struct tr_seg;
2795 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2796 unsigned mask = (1 << len) - 1;
2799 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2802 if (desc_limit_scaled(&tr_seg) < 103)
2804 base = get_desc_base(&tr_seg);
2805 #ifdef CONFIG_X86_64
2806 base |= ((u64)base3) << 32;
2808 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2809 if (r != X86EMUL_CONTINUE)
2811 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2813 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2814 if (r != X86EMUL_CONTINUE)
2816 if ((perm >> bit_idx) & mask)
2821 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2827 if (emulator_bad_iopl(ctxt))
2828 if (!emulator_io_port_access_allowed(ctxt, port, len))
2831 ctxt->perm_ok = true;
2836 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2839 * Intel CPUs mask the counter and pointers in quite strange
2840 * manner when ECX is zero due to REP-string optimizations.
2842 #ifdef CONFIG_X86_64
2843 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2846 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2849 case 0xa4: /* movsb */
2850 case 0xa5: /* movsd/w */
2851 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2853 case 0xaa: /* stosb */
2854 case 0xab: /* stosd/w */
2855 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2860 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2861 struct tss_segment_16 *tss)
2863 tss->ip = ctxt->_eip;
2864 tss->flag = ctxt->eflags;
2865 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2866 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2867 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2868 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2869 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2870 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2871 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2872 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2874 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2875 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2876 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2877 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2878 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2881 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2882 struct tss_segment_16 *tss)
2887 ctxt->_eip = tss->ip;
2888 ctxt->eflags = tss->flag | 2;
2889 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2890 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2891 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2892 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2893 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2894 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2895 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2896 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2899 * SDM says that segment selectors are loaded before segment
2902 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2903 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2904 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2905 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2906 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2911 * Now load segment descriptors. If fault happens at this stage
2912 * it is handled in a context of new task
2914 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2915 X86_TRANSFER_TASK_SWITCH, NULL);
2916 if (ret != X86EMUL_CONTINUE)
2918 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2919 X86_TRANSFER_TASK_SWITCH, NULL);
2920 if (ret != X86EMUL_CONTINUE)
2922 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2923 X86_TRANSFER_TASK_SWITCH, NULL);
2924 if (ret != X86EMUL_CONTINUE)
2926 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2927 X86_TRANSFER_TASK_SWITCH, NULL);
2928 if (ret != X86EMUL_CONTINUE)
2930 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2931 X86_TRANSFER_TASK_SWITCH, NULL);
2932 if (ret != X86EMUL_CONTINUE)
2935 return X86EMUL_CONTINUE;
2938 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2939 u16 tss_selector, u16 old_tss_sel,
2940 ulong old_tss_base, struct desc_struct *new_desc)
2942 const struct x86_emulate_ops *ops = ctxt->ops;
2943 struct tss_segment_16 tss_seg;
2945 u32 new_tss_base = get_desc_base(new_desc);
2947 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2949 if (ret != X86EMUL_CONTINUE)
2952 save_state_to_tss16(ctxt, &tss_seg);
2954 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2956 if (ret != X86EMUL_CONTINUE)
2959 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2961 if (ret != X86EMUL_CONTINUE)
2964 if (old_tss_sel != 0xffff) {
2965 tss_seg.prev_task_link = old_tss_sel;
2967 ret = ops->write_std(ctxt, new_tss_base,
2968 &tss_seg.prev_task_link,
2969 sizeof tss_seg.prev_task_link,
2971 if (ret != X86EMUL_CONTINUE)
2975 return load_state_from_tss16(ctxt, &tss_seg);
2978 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2979 struct tss_segment_32 *tss)
2981 /* CR3 and ldt selector are not saved intentionally */
2982 tss->eip = ctxt->_eip;
2983 tss->eflags = ctxt->eflags;
2984 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2985 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2986 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2987 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2988 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2989 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2990 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2991 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2993 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2994 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2995 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2996 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2997 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2998 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3001 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3002 struct tss_segment_32 *tss)
3007 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3008 return emulate_gp(ctxt, 0);
3009 ctxt->_eip = tss->eip;
3010 ctxt->eflags = tss->eflags | 2;
3012 /* General purpose registers */
3013 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3014 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3015 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3016 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3017 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3018 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3019 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3020 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3023 * SDM says that segment selectors are loaded before segment
3024 * descriptors. This is important because CPL checks will
3027 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3028 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3029 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3030 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3031 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3032 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3033 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3036 * If we're switching between Protected Mode and VM86, we need to make
3037 * sure to update the mode before loading the segment descriptors so
3038 * that the selectors are interpreted correctly.
3040 if (ctxt->eflags & X86_EFLAGS_VM) {
3041 ctxt->mode = X86EMUL_MODE_VM86;
3044 ctxt->mode = X86EMUL_MODE_PROT32;
3049 * Now load segment descriptors. If fault happenes at this stage
3050 * it is handled in a context of new task
3052 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3053 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3054 if (ret != X86EMUL_CONTINUE)
3056 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3057 X86_TRANSFER_TASK_SWITCH, NULL);
3058 if (ret != X86EMUL_CONTINUE)
3060 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3061 X86_TRANSFER_TASK_SWITCH, NULL);
3062 if (ret != X86EMUL_CONTINUE)
3064 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3065 X86_TRANSFER_TASK_SWITCH, NULL);
3066 if (ret != X86EMUL_CONTINUE)
3068 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3069 X86_TRANSFER_TASK_SWITCH, NULL);
3070 if (ret != X86EMUL_CONTINUE)
3072 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3073 X86_TRANSFER_TASK_SWITCH, NULL);
3074 if (ret != X86EMUL_CONTINUE)
3076 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3077 X86_TRANSFER_TASK_SWITCH, NULL);
3082 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3083 u16 tss_selector, u16 old_tss_sel,
3084 ulong old_tss_base, struct desc_struct *new_desc)
3086 const struct x86_emulate_ops *ops = ctxt->ops;
3087 struct tss_segment_32 tss_seg;
3089 u32 new_tss_base = get_desc_base(new_desc);
3090 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3091 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3093 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3095 if (ret != X86EMUL_CONTINUE)
3098 save_state_to_tss32(ctxt, &tss_seg);
3100 /* Only GP registers and segment selectors are saved */
3101 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3102 ldt_sel_offset - eip_offset, &ctxt->exception);
3103 if (ret != X86EMUL_CONTINUE)
3106 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3108 if (ret != X86EMUL_CONTINUE)
3111 if (old_tss_sel != 0xffff) {
3112 tss_seg.prev_task_link = old_tss_sel;
3114 ret = ops->write_std(ctxt, new_tss_base,
3115 &tss_seg.prev_task_link,
3116 sizeof tss_seg.prev_task_link,
3118 if (ret != X86EMUL_CONTINUE)
3122 return load_state_from_tss32(ctxt, &tss_seg);
3125 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3126 u16 tss_selector, int idt_index, int reason,
3127 bool has_error_code, u32 error_code)
3129 const struct x86_emulate_ops *ops = ctxt->ops;
3130 struct desc_struct curr_tss_desc, next_tss_desc;
3132 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3133 ulong old_tss_base =
3134 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3136 ulong desc_addr, dr7;
3138 /* FIXME: old_tss_base == ~0 ? */
3140 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3141 if (ret != X86EMUL_CONTINUE)
3143 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3144 if (ret != X86EMUL_CONTINUE)
3147 /* FIXME: check that next_tss_desc is tss */
3150 * Check privileges. The three cases are task switch caused by...
3152 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3153 * 2. Exception/IRQ/iret: No check is performed
3154 * 3. jmp/call to TSS/task-gate: No check is performed since the
3155 * hardware checks it before exiting.
3157 if (reason == TASK_SWITCH_GATE) {
3158 if (idt_index != -1) {
3159 /* Software interrupts */
3160 struct desc_struct task_gate_desc;
3163 ret = read_interrupt_descriptor(ctxt, idt_index,
3165 if (ret != X86EMUL_CONTINUE)
3168 dpl = task_gate_desc.dpl;
3169 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3170 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3174 desc_limit = desc_limit_scaled(&next_tss_desc);
3175 if (!next_tss_desc.p ||
3176 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3177 desc_limit < 0x2b)) {
3178 return emulate_ts(ctxt, tss_selector & 0xfffc);
3181 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3182 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3183 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3186 if (reason == TASK_SWITCH_IRET)
3187 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3189 /* set back link to prev task only if NT bit is set in eflags
3190 note that old_tss_sel is not used after this point */
3191 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3192 old_tss_sel = 0xffff;
3194 if (next_tss_desc.type & 8)
3195 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3196 old_tss_base, &next_tss_desc);
3198 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3199 old_tss_base, &next_tss_desc);
3200 if (ret != X86EMUL_CONTINUE)
3203 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3204 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3206 if (reason != TASK_SWITCH_IRET) {
3207 next_tss_desc.type |= (1 << 1); /* set busy flag */
3208 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3211 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3212 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3214 if (has_error_code) {
3215 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3216 ctxt->lock_prefix = 0;
3217 ctxt->src.val = (unsigned long) error_code;
3218 ret = em_push(ctxt);
3221 ops->get_dr(ctxt, 7, &dr7);
3222 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3227 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3228 u16 tss_selector, int idt_index, int reason,
3229 bool has_error_code, u32 error_code)
3233 invalidate_registers(ctxt);
3234 ctxt->_eip = ctxt->eip;
3235 ctxt->dst.type = OP_NONE;
3237 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3238 has_error_code, error_code);
3240 if (rc == X86EMUL_CONTINUE) {
3241 ctxt->eip = ctxt->_eip;
3242 writeback_registers(ctxt);
3245 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3248 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3251 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3253 register_address_increment(ctxt, reg, df * op->bytes);
3254 op->addr.mem.ea = register_address(ctxt, reg);
3257 static int em_das(struct x86_emulate_ctxt *ctxt)
3260 bool af, cf, old_cf;
3262 cf = ctxt->eflags & X86_EFLAGS_CF;
3268 af = ctxt->eflags & X86_EFLAGS_AF;
3269 if ((al & 0x0f) > 9 || af) {
3271 cf = old_cf | (al >= 250);
3276 if (old_al > 0x99 || old_cf) {
3282 /* Set PF, ZF, SF */
3283 ctxt->src.type = OP_IMM;
3285 ctxt->src.bytes = 1;
3286 fastop(ctxt, em_or);
3287 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3289 ctxt->eflags |= X86_EFLAGS_CF;
3291 ctxt->eflags |= X86_EFLAGS_AF;
3292 return X86EMUL_CONTINUE;
3295 static int em_aam(struct x86_emulate_ctxt *ctxt)
3299 if (ctxt->src.val == 0)
3300 return emulate_de(ctxt);
3302 al = ctxt->dst.val & 0xff;
3303 ah = al / ctxt->src.val;
3304 al %= ctxt->src.val;
3306 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3308 /* Set PF, ZF, SF */
3309 ctxt->src.type = OP_IMM;
3311 ctxt->src.bytes = 1;
3312 fastop(ctxt, em_or);
3314 return X86EMUL_CONTINUE;
3317 static int em_aad(struct x86_emulate_ctxt *ctxt)
3319 u8 al = ctxt->dst.val & 0xff;
3320 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3322 al = (al + (ah * ctxt->src.val)) & 0xff;
3324 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3326 /* Set PF, ZF, SF */
3327 ctxt->src.type = OP_IMM;
3329 ctxt->src.bytes = 1;
3330 fastop(ctxt, em_or);
3332 return X86EMUL_CONTINUE;
3335 static int em_call(struct x86_emulate_ctxt *ctxt)
3338 long rel = ctxt->src.val;
3340 ctxt->src.val = (unsigned long)ctxt->_eip;
3341 rc = jmp_rel(ctxt, rel);
3342 if (rc != X86EMUL_CONTINUE)
3344 return em_push(ctxt);
3347 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3352 struct desc_struct old_desc, new_desc;
3353 const struct x86_emulate_ops *ops = ctxt->ops;
3354 int cpl = ctxt->ops->cpl(ctxt);
3355 enum x86emul_mode prev_mode = ctxt->mode;
3357 old_eip = ctxt->_eip;
3358 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3360 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3361 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3362 X86_TRANSFER_CALL_JMP, &new_desc);
3363 if (rc != X86EMUL_CONTINUE)
3366 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3367 if (rc != X86EMUL_CONTINUE)
3370 ctxt->src.val = old_cs;
3372 if (rc != X86EMUL_CONTINUE)
3375 ctxt->src.val = old_eip;
3377 /* If we failed, we tainted the memory, but the very least we should
3379 if (rc != X86EMUL_CONTINUE) {
3380 pr_warn_once("faulting far call emulation tainted memory\n");
3385 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3386 ctxt->mode = prev_mode;
3391 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3396 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3397 if (rc != X86EMUL_CONTINUE)
3399 rc = assign_eip_near(ctxt, eip);
3400 if (rc != X86EMUL_CONTINUE)
3402 rsp_increment(ctxt, ctxt->src.val);
3403 return X86EMUL_CONTINUE;
3406 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3408 /* Write back the register source. */
3409 ctxt->src.val = ctxt->dst.val;
3410 write_register_operand(&ctxt->src);
3412 /* Write back the memory destination with implicit LOCK prefix. */
3413 ctxt->dst.val = ctxt->src.orig_val;
3414 ctxt->lock_prefix = 1;
3415 return X86EMUL_CONTINUE;
3418 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3420 ctxt->dst.val = ctxt->src2.val;
3421 return fastop(ctxt, em_imul);
3424 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3426 ctxt->dst.type = OP_REG;
3427 ctxt->dst.bytes = ctxt->src.bytes;
3428 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3429 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3431 return X86EMUL_CONTINUE;
3434 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3438 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3439 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3440 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3441 return X86EMUL_CONTINUE;
3444 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3448 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3449 return emulate_gp(ctxt, 0);
3450 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3451 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3452 return X86EMUL_CONTINUE;
3455 static int em_mov(struct x86_emulate_ctxt *ctxt)
3457 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3458 return X86EMUL_CONTINUE;
3461 #define FFL(x) bit(X86_FEATURE_##x)
3463 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3465 u32 ebx, ecx, edx, eax = 1;
3469 * Check MOVBE is set in the guest-visible CPUID leaf.
3471 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3472 if (!(ecx & FFL(MOVBE)))
3473 return emulate_ud(ctxt);
3475 switch (ctxt->op_bytes) {
3478 * From MOVBE definition: "...When the operand size is 16 bits,
3479 * the upper word of the destination register remains unchanged
3482 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3483 * rules so we have to do the operation almost per hand.
3485 tmp = (u16)ctxt->src.val;
3486 ctxt->dst.val &= ~0xffffUL;
3487 ctxt->dst.val |= (unsigned long)swab16(tmp);
3490 ctxt->dst.val = swab32((u32)ctxt->src.val);
3493 ctxt->dst.val = swab64(ctxt->src.val);
3498 return X86EMUL_CONTINUE;
3501 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3503 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3504 return emulate_gp(ctxt, 0);
3506 /* Disable writeback. */
3507 ctxt->dst.type = OP_NONE;
3508 return X86EMUL_CONTINUE;
3511 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3515 if (ctxt->mode == X86EMUL_MODE_PROT64)
3516 val = ctxt->src.val & ~0ULL;
3518 val = ctxt->src.val & ~0U;
3520 /* #UD condition is already handled. */
3521 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3522 return emulate_gp(ctxt, 0);
3524 /* Disable writeback. */
3525 ctxt->dst.type = OP_NONE;
3526 return X86EMUL_CONTINUE;
3529 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3533 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3534 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3535 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3536 return emulate_gp(ctxt, 0);
3538 return X86EMUL_CONTINUE;
3541 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3545 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3546 return emulate_gp(ctxt, 0);
3548 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3549 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3550 return X86EMUL_CONTINUE;
3553 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3555 if (ctxt->modrm_reg > VCPU_SREG_GS)
3556 return emulate_ud(ctxt);
3558 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3559 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3560 ctxt->dst.bytes = 2;
3561 return X86EMUL_CONTINUE;
3564 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3566 u16 sel = ctxt->src.val;
3568 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3569 return emulate_ud(ctxt);
3571 if (ctxt->modrm_reg == VCPU_SREG_SS)
3572 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3574 /* Disable writeback. */
3575 ctxt->dst.type = OP_NONE;
3576 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3579 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3581 u16 sel = ctxt->src.val;
3583 /* Disable writeback. */
3584 ctxt->dst.type = OP_NONE;
3585 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3588 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3590 u16 sel = ctxt->src.val;
3592 /* Disable writeback. */
3593 ctxt->dst.type = OP_NONE;
3594 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3597 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3602 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3603 if (rc == X86EMUL_CONTINUE)
3604 ctxt->ops->invlpg(ctxt, linear);
3605 /* Disable writeback. */
3606 ctxt->dst.type = OP_NONE;
3607 return X86EMUL_CONTINUE;
3610 static int em_clts(struct x86_emulate_ctxt *ctxt)
3614 cr0 = ctxt->ops->get_cr(ctxt, 0);
3616 ctxt->ops->set_cr(ctxt, 0, cr0);
3617 return X86EMUL_CONTINUE;
3620 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3622 int rc = ctxt->ops->fix_hypercall(ctxt);
3624 if (rc != X86EMUL_CONTINUE)
3627 /* Let the processor re-execute the fixed hypercall */
3628 ctxt->_eip = ctxt->eip;
3629 /* Disable writeback. */
3630 ctxt->dst.type = OP_NONE;
3631 return X86EMUL_CONTINUE;
3634 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3635 void (*get)(struct x86_emulate_ctxt *ctxt,
3636 struct desc_ptr *ptr))
3638 struct desc_ptr desc_ptr;
3640 if (ctxt->mode == X86EMUL_MODE_PROT64)
3642 get(ctxt, &desc_ptr);
3643 if (ctxt->op_bytes == 2) {
3645 desc_ptr.address &= 0x00ffffff;
3647 /* Disable writeback. */
3648 ctxt->dst.type = OP_NONE;
3649 return segmented_write(ctxt, ctxt->dst.addr.mem,
3650 &desc_ptr, 2 + ctxt->op_bytes);
3653 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3655 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3658 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3660 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3663 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3665 struct desc_ptr desc_ptr;
3668 if (ctxt->mode == X86EMUL_MODE_PROT64)
3670 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3671 &desc_ptr.size, &desc_ptr.address,
3673 if (rc != X86EMUL_CONTINUE)
3675 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3676 is_noncanonical_address(desc_ptr.address))
3677 return emulate_gp(ctxt, 0);
3679 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3681 ctxt->ops->set_idt(ctxt, &desc_ptr);
3682 /* Disable writeback. */
3683 ctxt->dst.type = OP_NONE;
3684 return X86EMUL_CONTINUE;
3687 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3689 return em_lgdt_lidt(ctxt, true);
3692 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3694 return em_lgdt_lidt(ctxt, false);
3697 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3699 if (ctxt->dst.type == OP_MEM)
3700 ctxt->dst.bytes = 2;
3701 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3702 return X86EMUL_CONTINUE;
3705 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3707 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3708 | (ctxt->src.val & 0x0f));
3709 ctxt->dst.type = OP_NONE;
3710 return X86EMUL_CONTINUE;
3713 static int em_loop(struct x86_emulate_ctxt *ctxt)
3715 int rc = X86EMUL_CONTINUE;
3717 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3718 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3719 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3720 rc = jmp_rel(ctxt, ctxt->src.val);
3725 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3727 int rc = X86EMUL_CONTINUE;
3729 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3730 rc = jmp_rel(ctxt, ctxt->src.val);
3735 static int em_in(struct x86_emulate_ctxt *ctxt)
3737 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3739 return X86EMUL_IO_NEEDED;
3741 return X86EMUL_CONTINUE;
3744 static int em_out(struct x86_emulate_ctxt *ctxt)
3746 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3748 /* Disable writeback. */
3749 ctxt->dst.type = OP_NONE;
3750 return X86EMUL_CONTINUE;
3753 static int em_cli(struct x86_emulate_ctxt *ctxt)
3755 if (emulator_bad_iopl(ctxt))
3756 return emulate_gp(ctxt, 0);
3758 ctxt->eflags &= ~X86_EFLAGS_IF;
3759 return X86EMUL_CONTINUE;
3762 static int em_sti(struct x86_emulate_ctxt *ctxt)
3764 if (emulator_bad_iopl(ctxt))
3765 return emulate_gp(ctxt, 0);
3767 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3768 ctxt->eflags |= X86_EFLAGS_IF;
3769 return X86EMUL_CONTINUE;
3772 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3774 u32 eax, ebx, ecx, edx;
3776 eax = reg_read(ctxt, VCPU_REGS_RAX);
3777 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3778 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3779 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3780 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3781 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3782 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3783 return X86EMUL_CONTINUE;
3786 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3790 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3792 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3794 ctxt->eflags &= ~0xffUL;
3795 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3796 return X86EMUL_CONTINUE;
3799 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3801 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3802 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3803 return X86EMUL_CONTINUE;
3806 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3808 switch (ctxt->op_bytes) {
3809 #ifdef CONFIG_X86_64
3811 asm("bswap %0" : "+r"(ctxt->dst.val));
3815 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3818 return X86EMUL_CONTINUE;
3821 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3823 /* emulating clflush regardless of cpuid */
3824 return X86EMUL_CONTINUE;
3827 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3829 ctxt->dst.val = (s32) ctxt->src.val;
3830 return X86EMUL_CONTINUE;
3833 static bool valid_cr(int nr)
3845 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
3847 if (!valid_cr(ctxt->modrm_reg))
3848 return emulate_ud(ctxt);
3850 return X86EMUL_CONTINUE;
3853 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3855 u64 new_val = ctxt->src.val64;
3856 int cr = ctxt->modrm_reg;
3859 static u64 cr_reserved_bits[] = {
3860 0xffffffff00000000ULL,
3861 0, 0, 0, /* CR3 checked later */
3868 return emulate_ud(ctxt);
3870 if (new_val & cr_reserved_bits[cr])
3871 return emulate_gp(ctxt, 0);
3876 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
3877 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
3878 return emulate_gp(ctxt, 0);
3880 cr4 = ctxt->ops->get_cr(ctxt, 4);
3881 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3883 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
3884 !(cr4 & X86_CR4_PAE))
3885 return emulate_gp(ctxt, 0);
3892 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3893 if (efer & EFER_LMA)
3894 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
3897 return emulate_gp(ctxt, 0);
3902 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3904 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
3905 return emulate_gp(ctxt, 0);
3911 return X86EMUL_CONTINUE;
3914 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3918 ctxt->ops->get_dr(ctxt, 7, &dr7);
3920 /* Check if DR7.Global_Enable is set */
3921 return dr7 & (1 << 13);
3924 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3926 int dr = ctxt->modrm_reg;
3930 return emulate_ud(ctxt);
3932 cr4 = ctxt->ops->get_cr(ctxt, 4);
3933 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3934 return emulate_ud(ctxt);
3936 if (check_dr7_gd(ctxt)) {
3939 ctxt->ops->get_dr(ctxt, 6, &dr6);
3941 dr6 |= DR6_BD | DR6_RTM;
3942 ctxt->ops->set_dr(ctxt, 6, dr6);
3943 return emulate_db(ctxt);
3946 return X86EMUL_CONTINUE;
3949 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3951 u64 new_val = ctxt->src.val64;
3952 int dr = ctxt->modrm_reg;
3954 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3955 return emulate_gp(ctxt, 0);
3957 return check_dr_read(ctxt);
3960 static int check_svme(struct x86_emulate_ctxt *ctxt)
3964 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3966 if (!(efer & EFER_SVME))
3967 return emulate_ud(ctxt);
3969 return X86EMUL_CONTINUE;
3972 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3974 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3976 /* Valid physical address? */
3977 if (rax & 0xffff000000000000ULL)
3978 return emulate_gp(ctxt, 0);
3980 return check_svme(ctxt);
3983 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3985 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3987 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3988 return emulate_ud(ctxt);
3990 return X86EMUL_CONTINUE;
3993 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3995 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3996 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3998 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3999 ctxt->ops->check_pmc(ctxt, rcx))
4000 return emulate_gp(ctxt, 0);
4002 return X86EMUL_CONTINUE;
4005 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4007 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4008 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4009 return emulate_gp(ctxt, 0);
4011 return X86EMUL_CONTINUE;
4014 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4016 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4017 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4018 return emulate_gp(ctxt, 0);
4020 return X86EMUL_CONTINUE;
4023 #define D(_y) { .flags = (_y) }
4024 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4025 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4026 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4027 #define N D(NotImpl)
4028 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4029 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4030 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4031 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4032 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4033 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4034 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4035 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4036 #define II(_f, _e, _i) \
4037 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4038 #define IIP(_f, _e, _i, _p) \
4039 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4040 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4041 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4043 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4044 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4045 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4046 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4047 #define I2bvIP(_f, _e, _i, _p) \
4048 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4050 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4051 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4052 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4054 static const struct opcode group7_rm0[] = {
4056 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4060 static const struct opcode group7_rm1[] = {
4061 DI(SrcNone | Priv, monitor),
4062 DI(SrcNone | Priv, mwait),
4066 static const struct opcode group7_rm3[] = {
4067 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4068 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4069 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4070 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4071 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4072 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4073 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4074 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4077 static const struct opcode group7_rm7[] = {
4079 DIP(SrcNone, rdtscp, check_rdtsc),
4083 static const struct opcode group1[] = {
4085 F(Lock | PageTable, em_or),
4088 F(Lock | PageTable, em_and),
4094 static const struct opcode group1A[] = {
4095 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4098 static const struct opcode group2[] = {
4099 F(DstMem | ModRM, em_rol),
4100 F(DstMem | ModRM, em_ror),
4101 F(DstMem | ModRM, em_rcl),
4102 F(DstMem | ModRM, em_rcr),
4103 F(DstMem | ModRM, em_shl),
4104 F(DstMem | ModRM, em_shr),
4105 F(DstMem | ModRM, em_shl),
4106 F(DstMem | ModRM, em_sar),
4109 static const struct opcode group3[] = {
4110 F(DstMem | SrcImm | NoWrite, em_test),
4111 F(DstMem | SrcImm | NoWrite, em_test),
4112 F(DstMem | SrcNone | Lock, em_not),
4113 F(DstMem | SrcNone | Lock, em_neg),
4114 F(DstXacc | Src2Mem, em_mul_ex),
4115 F(DstXacc | Src2Mem, em_imul_ex),
4116 F(DstXacc | Src2Mem, em_div_ex),
4117 F(DstXacc | Src2Mem, em_idiv_ex),
4120 static const struct opcode group4[] = {
4121 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4122 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4126 static const struct opcode group5[] = {
4127 F(DstMem | SrcNone | Lock, em_inc),
4128 F(DstMem | SrcNone | Lock, em_dec),
4129 I(SrcMem | NearBranch, em_call_near_abs),
4130 I(SrcMemFAddr | ImplicitOps, em_call_far),
4131 I(SrcMem | NearBranch, em_jmp_abs),
4132 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4133 I(SrcMem | Stack, em_push), D(Undefined),
4136 static const struct opcode group6[] = {
4137 DI(Prot | DstMem, sldt),
4138 DI(Prot | DstMem, str),
4139 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4140 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4144 static const struct group_dual group7 = { {
4145 II(Mov | DstMem, em_sgdt, sgdt),
4146 II(Mov | DstMem, em_sidt, sidt),
4147 II(SrcMem | Priv, em_lgdt, lgdt),
4148 II(SrcMem | Priv, em_lidt, lidt),
4149 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4150 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4151 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4155 N, EXT(0, group7_rm3),
4156 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4157 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4161 static const struct opcode group8[] = {
4163 F(DstMem | SrcImmByte | NoWrite, em_bt),
4164 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4165 F(DstMem | SrcImmByte | Lock, em_btr),
4166 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4169 static const struct group_dual group9 = { {
4170 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4172 N, N, N, N, N, N, N, N,
4175 static const struct opcode group11[] = {
4176 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4180 static const struct gprefix pfx_0f_ae_7 = {
4181 I(SrcMem | ByteOp, em_clflush), N, N, N,
4184 static const struct group_dual group15 = { {
4185 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4187 N, N, N, N, N, N, N, N,
4190 static const struct gprefix pfx_0f_6f_0f_7f = {
4191 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4194 static const struct instr_dual instr_dual_0f_2b = {
4198 static const struct gprefix pfx_0f_2b = {
4199 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4202 static const struct gprefix pfx_0f_28_0f_29 = {
4203 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4206 static const struct gprefix pfx_0f_e7 = {
4207 N, I(Sse, em_mov), N, N,
4210 static const struct escape escape_d9 = { {
4211 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4214 N, N, N, N, N, N, N, N,
4216 N, N, N, N, N, N, N, N,
4218 N, N, N, N, N, N, N, N,
4220 N, N, N, N, N, N, N, N,
4222 N, N, N, N, N, N, N, N,
4224 N, N, N, N, N, N, N, N,
4226 N, N, N, N, N, N, N, N,
4228 N, N, N, N, N, N, N, N,
4231 static const struct escape escape_db = { {
4232 N, N, N, N, N, N, N, N,
4235 N, N, N, N, N, N, N, N,
4237 N, N, N, N, N, N, N, N,
4239 N, N, N, N, N, N, N, N,
4241 N, N, N, N, N, N, N, N,
4243 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4245 N, N, N, N, N, N, N, N,
4247 N, N, N, N, N, N, N, N,
4249 N, N, N, N, N, N, N, N,
4252 static const struct escape escape_dd = { {
4253 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4256 N, N, N, N, N, N, N, N,
4258 N, N, N, N, N, N, N, N,
4260 N, N, N, N, N, N, N, N,
4262 N, N, N, N, N, N, N, N,
4264 N, N, N, N, N, N, N, N,
4266 N, N, N, N, N, N, N, N,
4268 N, N, N, N, N, N, N, N,
4270 N, N, N, N, N, N, N, N,
4273 static const struct instr_dual instr_dual_0f_c3 = {
4274 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4277 static const struct mode_dual mode_dual_63 = {
4278 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4281 static const struct opcode opcode_table[256] = {
4283 F6ALU(Lock, em_add),
4284 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4285 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4287 F6ALU(Lock | PageTable, em_or),
4288 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4291 F6ALU(Lock, em_adc),
4292 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4293 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4295 F6ALU(Lock, em_sbb),
4296 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4297 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4299 F6ALU(Lock | PageTable, em_and), N, N,
4301 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4303 F6ALU(Lock, em_xor), N, N,
4305 F6ALU(NoWrite, em_cmp), N, N,
4307 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4309 X8(I(SrcReg | Stack, em_push)),
4311 X8(I(DstReg | Stack, em_pop)),
4313 I(ImplicitOps | Stack | No64, em_pusha),
4314 I(ImplicitOps | Stack | No64, em_popa),
4315 N, MD(ModRM, &mode_dual_63),
4318 I(SrcImm | Mov | Stack, em_push),
4319 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4320 I(SrcImmByte | Mov | Stack, em_push),
4321 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4322 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4323 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4325 X16(D(SrcImmByte | NearBranch)),
4327 G(ByteOp | DstMem | SrcImm, group1),
4328 G(DstMem | SrcImm, group1),
4329 G(ByteOp | DstMem | SrcImm | No64, group1),
4330 G(DstMem | SrcImmByte, group1),
4331 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4332 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4334 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4335 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4336 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4337 D(ModRM | SrcMem | NoAccess | DstReg),
4338 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4341 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4343 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4344 I(SrcImmFAddr | No64, em_call_far), N,
4345 II(ImplicitOps | Stack, em_pushf, pushf),
4346 II(ImplicitOps | Stack, em_popf, popf),
4347 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4349 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4350 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4351 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4352 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4354 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4355 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4356 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4357 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4359 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4361 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4363 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4364 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4365 I(ImplicitOps | NearBranch, em_ret),
4366 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4367 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4368 G(ByteOp, group11), G(0, group11),
4370 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4371 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4372 I(ImplicitOps, em_ret_far),
4373 D(ImplicitOps), DI(SrcImmByte, intn),
4374 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4376 G(Src2One | ByteOp, group2), G(Src2One, group2),
4377 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4378 I(DstAcc | SrcImmUByte | No64, em_aam),
4379 I(DstAcc | SrcImmUByte | No64, em_aad),
4380 F(DstAcc | ByteOp | No64, em_salc),
4381 I(DstAcc | SrcXLat | ByteOp, em_mov),
4383 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4385 X3(I(SrcImmByte | NearBranch, em_loop)),
4386 I(SrcImmByte | NearBranch, em_jcxz),
4387 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4388 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4390 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4391 I(SrcImmFAddr | No64, em_jmp_far),
4392 D(SrcImmByte | ImplicitOps | NearBranch),
4393 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4394 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4396 N, DI(ImplicitOps, icebp), N, N,
4397 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4398 G(ByteOp, group3), G(0, group3),
4400 D(ImplicitOps), D(ImplicitOps),
4401 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4402 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4405 static const struct opcode twobyte_table[256] = {
4407 G(0, group6), GD(0, &group7), N, N,
4408 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4409 II(ImplicitOps | Priv, em_clts, clts), N,
4410 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4411 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4413 N, N, N, N, N, N, N, N,
4414 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4415 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4417 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4418 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4419 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4421 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4424 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4425 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4426 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4429 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4430 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4431 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4432 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4433 I(ImplicitOps | EmulateOnUD, em_sysenter),
4434 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4436 N, N, N, N, N, N, N, N,
4438 X16(D(DstReg | SrcMem | ModRM)),
4440 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4445 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4450 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4452 X16(D(SrcImm | NearBranch)),
4454 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4456 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4457 II(ImplicitOps, em_cpuid, cpuid),
4458 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4459 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4460 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4462 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4463 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4464 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4465 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4466 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4467 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4469 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4470 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4471 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4472 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4473 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4474 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4478 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4479 I(DstReg | SrcMem | ModRM, em_bsf_c),
4480 I(DstReg | SrcMem | ModRM, em_bsr_c),
4481 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4483 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4484 N, ID(0, &instr_dual_0f_c3),
4485 N, N, N, GD(0, &group9),
4487 X8(I(DstReg, em_bswap)),
4489 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4491 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4492 N, N, N, N, N, N, N, N,
4494 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4497 static const struct instr_dual instr_dual_0f_38_f0 = {
4498 I(DstReg | SrcMem | Mov, em_movbe), N
4501 static const struct instr_dual instr_dual_0f_38_f1 = {
4502 I(DstMem | SrcReg | Mov, em_movbe), N
4505 static const struct gprefix three_byte_0f_38_f0 = {
4506 ID(0, &instr_dual_0f_38_f0), N, N, N
4509 static const struct gprefix three_byte_0f_38_f1 = {
4510 ID(0, &instr_dual_0f_38_f1), N, N, N
4514 * Insns below are selected by the prefix which indexed by the third opcode
4517 static const struct opcode opcode_map_0f_38[256] = {
4519 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4521 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4523 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4524 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4545 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4549 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4555 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4556 unsigned size, bool sign_extension)
4558 int rc = X86EMUL_CONTINUE;
4562 op->addr.mem.ea = ctxt->_eip;
4563 /* NB. Immediates are sign-extended as necessary. */
4564 switch (op->bytes) {
4566 op->val = insn_fetch(s8, ctxt);
4569 op->val = insn_fetch(s16, ctxt);
4572 op->val = insn_fetch(s32, ctxt);
4575 op->val = insn_fetch(s64, ctxt);
4578 if (!sign_extension) {
4579 switch (op->bytes) {
4587 op->val &= 0xffffffff;
4595 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4598 int rc = X86EMUL_CONTINUE;
4602 decode_register_operand(ctxt, op);
4605 rc = decode_imm(ctxt, op, 1, false);
4608 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4612 if (ctxt->d & BitOp)
4613 fetch_bit_operand(ctxt);
4614 op->orig_val = op->val;
4617 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4621 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4622 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4623 fetch_register_operand(op);
4624 op->orig_val = op->val;
4628 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4629 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4630 fetch_register_operand(op);
4631 op->orig_val = op->val;
4634 if (ctxt->d & ByteOp) {
4639 op->bytes = ctxt->op_bytes;
4640 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4641 fetch_register_operand(op);
4642 op->orig_val = op->val;
4646 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4648 register_address(ctxt, VCPU_REGS_RDI);
4649 op->addr.mem.seg = VCPU_SREG_ES;
4656 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4657 fetch_register_operand(op);
4662 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4665 rc = decode_imm(ctxt, op, 1, true);
4673 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4676 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4679 ctxt->memop.bytes = 1;
4680 if (ctxt->memop.type == OP_REG) {
4681 ctxt->memop.addr.reg = decode_register(ctxt,
4682 ctxt->modrm_rm, true);
4683 fetch_register_operand(&ctxt->memop);
4687 ctxt->memop.bytes = 2;
4690 ctxt->memop.bytes = 4;
4693 rc = decode_imm(ctxt, op, 2, false);
4696 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4700 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4702 register_address(ctxt, VCPU_REGS_RSI);
4703 op->addr.mem.seg = ctxt->seg_override;
4709 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4712 reg_read(ctxt, VCPU_REGS_RBX) +
4713 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4714 op->addr.mem.seg = ctxt->seg_override;
4719 op->addr.mem.ea = ctxt->_eip;
4720 op->bytes = ctxt->op_bytes + 2;
4721 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4724 ctxt->memop.bytes = ctxt->op_bytes + 2;
4728 op->val = VCPU_SREG_ES;
4732 op->val = VCPU_SREG_CS;
4736 op->val = VCPU_SREG_SS;
4740 op->val = VCPU_SREG_DS;
4744 op->val = VCPU_SREG_FS;
4748 op->val = VCPU_SREG_GS;
4751 /* Special instructions do their own operand decoding. */
4753 op->type = OP_NONE; /* Disable writeback. */
4761 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4763 int rc = X86EMUL_CONTINUE;
4764 int mode = ctxt->mode;
4765 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4766 bool op_prefix = false;
4767 bool has_seg_override = false;
4768 struct opcode opcode;
4770 ctxt->memop.type = OP_NONE;
4771 ctxt->memopp = NULL;
4772 ctxt->_eip = ctxt->eip;
4773 ctxt->fetch.ptr = ctxt->fetch.data;
4774 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4775 ctxt->opcode_len = 1;
4777 memcpy(ctxt->fetch.data, insn, insn_len);
4779 rc = __do_insn_fetch_bytes(ctxt, 1);
4780 if (rc != X86EMUL_CONTINUE)
4785 case X86EMUL_MODE_REAL:
4786 case X86EMUL_MODE_VM86:
4787 case X86EMUL_MODE_PROT16:
4788 def_op_bytes = def_ad_bytes = 2;
4790 case X86EMUL_MODE_PROT32:
4791 def_op_bytes = def_ad_bytes = 4;
4793 #ifdef CONFIG_X86_64
4794 case X86EMUL_MODE_PROT64:
4800 return EMULATION_FAILED;
4803 ctxt->op_bytes = def_op_bytes;
4804 ctxt->ad_bytes = def_ad_bytes;
4806 /* Legacy prefixes. */
4808 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4809 case 0x66: /* operand-size override */
4811 /* switch between 2/4 bytes */
4812 ctxt->op_bytes = def_op_bytes ^ 6;
4814 case 0x67: /* address-size override */
4815 if (mode == X86EMUL_MODE_PROT64)
4816 /* switch between 4/8 bytes */
4817 ctxt->ad_bytes = def_ad_bytes ^ 12;
4819 /* switch between 2/4 bytes */
4820 ctxt->ad_bytes = def_ad_bytes ^ 6;
4822 case 0x26: /* ES override */
4823 case 0x2e: /* CS override */
4824 case 0x36: /* SS override */
4825 case 0x3e: /* DS override */
4826 has_seg_override = true;
4827 ctxt->seg_override = (ctxt->b >> 3) & 3;
4829 case 0x64: /* FS override */
4830 case 0x65: /* GS override */
4831 has_seg_override = true;
4832 ctxt->seg_override = ctxt->b & 7;
4834 case 0x40 ... 0x4f: /* REX */
4835 if (mode != X86EMUL_MODE_PROT64)
4837 ctxt->rex_prefix = ctxt->b;
4839 case 0xf0: /* LOCK */
4840 ctxt->lock_prefix = 1;
4842 case 0xf2: /* REPNE/REPNZ */
4843 case 0xf3: /* REP/REPE/REPZ */
4844 ctxt->rep_prefix = ctxt->b;
4850 /* Any legacy prefix after a REX prefix nullifies its effect. */
4852 ctxt->rex_prefix = 0;
4858 if (ctxt->rex_prefix & 8)
4859 ctxt->op_bytes = 8; /* REX.W */
4861 /* Opcode byte(s). */
4862 opcode = opcode_table[ctxt->b];
4863 /* Two-byte opcode? */
4864 if (ctxt->b == 0x0f) {
4865 ctxt->opcode_len = 2;
4866 ctxt->b = insn_fetch(u8, ctxt);
4867 opcode = twobyte_table[ctxt->b];
4869 /* 0F_38 opcode map */
4870 if (ctxt->b == 0x38) {
4871 ctxt->opcode_len = 3;
4872 ctxt->b = insn_fetch(u8, ctxt);
4873 opcode = opcode_map_0f_38[ctxt->b];
4876 ctxt->d = opcode.flags;
4878 if (ctxt->d & ModRM)
4879 ctxt->modrm = insn_fetch(u8, ctxt);
4881 /* vex-prefix instructions are not implemented */
4882 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4883 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4887 while (ctxt->d & GroupMask) {
4888 switch (ctxt->d & GroupMask) {
4890 goffset = (ctxt->modrm >> 3) & 7;
4891 opcode = opcode.u.group[goffset];
4894 goffset = (ctxt->modrm >> 3) & 7;
4895 if ((ctxt->modrm >> 6) == 3)
4896 opcode = opcode.u.gdual->mod3[goffset];
4898 opcode = opcode.u.gdual->mod012[goffset];
4901 goffset = ctxt->modrm & 7;
4902 opcode = opcode.u.group[goffset];
4905 if (ctxt->rep_prefix && op_prefix)
4906 return EMULATION_FAILED;
4907 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4908 switch (simd_prefix) {
4909 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4910 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4911 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4912 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4916 if (ctxt->modrm > 0xbf)
4917 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4919 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4922 if ((ctxt->modrm >> 6) == 3)
4923 opcode = opcode.u.idual->mod3;
4925 opcode = opcode.u.idual->mod012;
4928 if (ctxt->mode == X86EMUL_MODE_PROT64)
4929 opcode = opcode.u.mdual->mode64;
4931 opcode = opcode.u.mdual->mode32;
4934 return EMULATION_FAILED;
4937 ctxt->d &= ~(u64)GroupMask;
4938 ctxt->d |= opcode.flags;
4943 return EMULATION_FAILED;
4945 ctxt->execute = opcode.u.execute;
4947 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
4948 return EMULATION_FAILED;
4950 if (unlikely(ctxt->d &
4951 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4954 * These are copied unconditionally here, and checked unconditionally
4955 * in x86_emulate_insn.
4957 ctxt->check_perm = opcode.check_perm;
4958 ctxt->intercept = opcode.intercept;
4960 if (ctxt->d & NotImpl)
4961 return EMULATION_FAILED;
4963 if (mode == X86EMUL_MODE_PROT64) {
4964 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4966 else if (ctxt->d & NearBranch)
4970 if (ctxt->d & Op3264) {
4971 if (mode == X86EMUL_MODE_PROT64)
4977 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
4981 ctxt->op_bytes = 16;
4982 else if (ctxt->d & Mmx)
4986 /* ModRM and SIB bytes. */
4987 if (ctxt->d & ModRM) {
4988 rc = decode_modrm(ctxt, &ctxt->memop);
4989 if (!has_seg_override) {
4990 has_seg_override = true;
4991 ctxt->seg_override = ctxt->modrm_seg;
4993 } else if (ctxt->d & MemAbs)
4994 rc = decode_abs(ctxt, &ctxt->memop);
4995 if (rc != X86EMUL_CONTINUE)
4998 if (!has_seg_override)
4999 ctxt->seg_override = VCPU_SREG_DS;
5001 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5004 * Decode and fetch the source operand: register, memory
5007 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5008 if (rc != X86EMUL_CONTINUE)
5012 * Decode and fetch the second source operand: register, memory
5015 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5016 if (rc != X86EMUL_CONTINUE)
5019 /* Decode and fetch the destination operand: register or memory. */
5020 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5022 if (ctxt->rip_relative && likely(ctxt->memopp))
5023 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5024 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5027 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5030 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5032 return ctxt->d & PageTable;
5035 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5037 /* The second termination condition only applies for REPE
5038 * and REPNE. Test if the repeat string operation prefix is
5039 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5040 * corresponding termination condition according to:
5041 * - if REPE/REPZ and ZF = 0 then done
5042 * - if REPNE/REPNZ and ZF = 1 then done
5044 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5045 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5046 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5047 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5048 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5049 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5055 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5059 ctxt->ops->get_fpu(ctxt);
5060 asm volatile("1: fwait \n\t"
5062 ".pushsection .fixup,\"ax\" \n\t"
5064 "movb $1, %[fault] \n\t"
5067 _ASM_EXTABLE(1b, 3b)
5068 : [fault]"+qm"(fault));
5069 ctxt->ops->put_fpu(ctxt);
5071 if (unlikely(fault))
5072 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5074 return X86EMUL_CONTINUE;
5077 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5080 if (op->type == OP_MM)
5081 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5084 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5086 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5087 if (!(ctxt->d & ByteOp))
5088 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5089 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5090 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5092 : "c"(ctxt->src2.val));
5093 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5094 if (!fop) /* exception is returned in fop variable */
5095 return emulate_de(ctxt);
5096 return X86EMUL_CONTINUE;
5099 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5101 memset(&ctxt->rip_relative, 0,
5102 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5104 ctxt->io_read.pos = 0;
5105 ctxt->io_read.end = 0;
5106 ctxt->mem_read.end = 0;
5109 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5111 const struct x86_emulate_ops *ops = ctxt->ops;
5112 int rc = X86EMUL_CONTINUE;
5113 int saved_dst_type = ctxt->dst.type;
5115 ctxt->mem_read.pos = 0;
5117 /* LOCK prefix is allowed only with some instructions */
5118 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5119 rc = emulate_ud(ctxt);
5123 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5124 rc = emulate_ud(ctxt);
5128 if (unlikely(ctxt->d &
5129 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5130 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5131 (ctxt->d & Undefined)) {
5132 rc = emulate_ud(ctxt);
5136 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5137 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5138 rc = emulate_ud(ctxt);
5142 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5143 rc = emulate_nm(ctxt);
5147 if (ctxt->d & Mmx) {
5148 rc = flush_pending_x87_faults(ctxt);
5149 if (rc != X86EMUL_CONTINUE)
5152 * Now that we know the fpu is exception safe, we can fetch
5155 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5156 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5157 if (!(ctxt->d & Mov))
5158 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5161 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5162 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5163 X86_ICPT_PRE_EXCEPT);
5164 if (rc != X86EMUL_CONTINUE)
5168 /* Instruction can only be executed in protected mode */
5169 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5170 rc = emulate_ud(ctxt);
5174 /* Privileged instruction can be executed only in CPL=0 */
5175 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5176 if (ctxt->d & PrivUD)
5177 rc = emulate_ud(ctxt);
5179 rc = emulate_gp(ctxt, 0);
5183 /* Do instruction specific permission checks */
5184 if (ctxt->d & CheckPerm) {
5185 rc = ctxt->check_perm(ctxt);
5186 if (rc != X86EMUL_CONTINUE)
5190 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5191 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5192 X86_ICPT_POST_EXCEPT);
5193 if (rc != X86EMUL_CONTINUE)
5197 if (ctxt->rep_prefix && (ctxt->d & String)) {
5198 /* All REP prefixes have the same first termination condition */
5199 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5200 string_registers_quirk(ctxt);
5201 ctxt->eip = ctxt->_eip;
5202 ctxt->eflags &= ~X86_EFLAGS_RF;
5208 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5209 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5210 ctxt->src.valptr, ctxt->src.bytes);
5211 if (rc != X86EMUL_CONTINUE)
5213 ctxt->src.orig_val64 = ctxt->src.val64;
5216 if (ctxt->src2.type == OP_MEM) {
5217 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5218 &ctxt->src2.val, ctxt->src2.bytes);
5219 if (rc != X86EMUL_CONTINUE)
5223 if ((ctxt->d & DstMask) == ImplicitOps)
5227 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5228 /* optimisation - avoid slow emulated read if Mov */
5229 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5230 &ctxt->dst.val, ctxt->dst.bytes);
5231 if (rc != X86EMUL_CONTINUE) {
5232 if (!(ctxt->d & NoWrite) &&
5233 rc == X86EMUL_PROPAGATE_FAULT &&
5234 ctxt->exception.vector == PF_VECTOR)
5235 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5239 /* Copy full 64-bit value for CMPXCHG8B. */
5240 ctxt->dst.orig_val64 = ctxt->dst.val64;
5244 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5245 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5246 X86_ICPT_POST_MEMACCESS);
5247 if (rc != X86EMUL_CONTINUE)
5251 if (ctxt->rep_prefix && (ctxt->d & String))
5252 ctxt->eflags |= X86_EFLAGS_RF;
5254 ctxt->eflags &= ~X86_EFLAGS_RF;
5256 if (ctxt->execute) {
5257 if (ctxt->d & Fastop) {
5258 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5259 rc = fastop(ctxt, fop);
5260 if (rc != X86EMUL_CONTINUE)
5264 rc = ctxt->execute(ctxt);
5265 if (rc != X86EMUL_CONTINUE)
5270 if (ctxt->opcode_len == 2)
5272 else if (ctxt->opcode_len == 3)
5273 goto threebyte_insn;
5276 case 0x70 ... 0x7f: /* jcc (short) */
5277 if (test_cc(ctxt->b, ctxt->eflags))
5278 rc = jmp_rel(ctxt, ctxt->src.val);
5280 case 0x8d: /* lea r16/r32, m */
5281 ctxt->dst.val = ctxt->src.addr.mem.ea;
5283 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5284 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5285 ctxt->dst.type = OP_NONE;
5289 case 0x98: /* cbw/cwde/cdqe */
5290 switch (ctxt->op_bytes) {
5291 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5292 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5293 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5296 case 0xcc: /* int3 */
5297 rc = emulate_int(ctxt, 3);
5299 case 0xcd: /* int n */
5300 rc = emulate_int(ctxt, ctxt->src.val);
5302 case 0xce: /* into */
5303 if (ctxt->eflags & X86_EFLAGS_OF)
5304 rc = emulate_int(ctxt, 4);
5306 case 0xe9: /* jmp rel */
5307 case 0xeb: /* jmp rel short */
5308 rc = jmp_rel(ctxt, ctxt->src.val);
5309 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5311 case 0xf4: /* hlt */
5312 ctxt->ops->halt(ctxt);
5314 case 0xf5: /* cmc */
5315 /* complement carry flag from eflags reg */
5316 ctxt->eflags ^= X86_EFLAGS_CF;
5318 case 0xf8: /* clc */
5319 ctxt->eflags &= ~X86_EFLAGS_CF;
5321 case 0xf9: /* stc */
5322 ctxt->eflags |= X86_EFLAGS_CF;
5324 case 0xfc: /* cld */
5325 ctxt->eflags &= ~X86_EFLAGS_DF;
5327 case 0xfd: /* std */
5328 ctxt->eflags |= X86_EFLAGS_DF;
5331 goto cannot_emulate;
5334 if (rc != X86EMUL_CONTINUE)
5338 if (ctxt->d & SrcWrite) {
5339 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5340 rc = writeback(ctxt, &ctxt->src);
5341 if (rc != X86EMUL_CONTINUE)
5344 if (!(ctxt->d & NoWrite)) {
5345 rc = writeback(ctxt, &ctxt->dst);
5346 if (rc != X86EMUL_CONTINUE)
5351 * restore dst type in case the decoding will be reused
5352 * (happens for string instruction )
5354 ctxt->dst.type = saved_dst_type;
5356 if ((ctxt->d & SrcMask) == SrcSI)
5357 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5359 if ((ctxt->d & DstMask) == DstDI)
5360 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5362 if (ctxt->rep_prefix && (ctxt->d & String)) {
5364 struct read_cache *r = &ctxt->io_read;
5365 if ((ctxt->d & SrcMask) == SrcSI)
5366 count = ctxt->src.count;
5368 count = ctxt->dst.count;
5369 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5371 if (!string_insn_completed(ctxt)) {
5373 * Re-enter guest when pio read ahead buffer is empty
5374 * or, if it is not used, after each 1024 iteration.
5376 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5377 (r->end == 0 || r->end != r->pos)) {
5379 * Reset read cache. Usually happens before
5380 * decode, but since instruction is restarted
5381 * we have to do it here.
5383 ctxt->mem_read.end = 0;
5384 writeback_registers(ctxt);
5385 return EMULATION_RESTART;
5387 goto done; /* skip rip writeback */
5389 ctxt->eflags &= ~X86_EFLAGS_RF;
5392 ctxt->eip = ctxt->_eip;
5395 if (rc == X86EMUL_PROPAGATE_FAULT) {
5396 WARN_ON(ctxt->exception.vector > 0x1f);
5397 ctxt->have_exception = true;
5399 if (rc == X86EMUL_INTERCEPTED)
5400 return EMULATION_INTERCEPTED;
5402 if (rc == X86EMUL_CONTINUE)
5403 writeback_registers(ctxt);
5405 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5409 case 0x09: /* wbinvd */
5410 (ctxt->ops->wbinvd)(ctxt);
5412 case 0x08: /* invd */
5413 case 0x0d: /* GrpP (prefetch) */
5414 case 0x18: /* Grp16 (prefetch/nop) */
5415 case 0x1f: /* nop */
5417 case 0x20: /* mov cr, reg */
5418 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5420 case 0x21: /* mov from dr to reg */
5421 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5423 case 0x40 ... 0x4f: /* cmov */
5424 if (test_cc(ctxt->b, ctxt->eflags))
5425 ctxt->dst.val = ctxt->src.val;
5426 else if (ctxt->op_bytes != 4)
5427 ctxt->dst.type = OP_NONE; /* no writeback */
5429 case 0x80 ... 0x8f: /* jnz rel, etc*/
5430 if (test_cc(ctxt->b, ctxt->eflags))
5431 rc = jmp_rel(ctxt, ctxt->src.val);
5433 case 0x90 ... 0x9f: /* setcc r/m8 */
5434 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5436 case 0xb6 ... 0xb7: /* movzx */
5437 ctxt->dst.bytes = ctxt->op_bytes;
5438 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5439 : (u16) ctxt->src.val;
5441 case 0xbe ... 0xbf: /* movsx */
5442 ctxt->dst.bytes = ctxt->op_bytes;
5443 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5444 (s16) ctxt->src.val;
5447 goto cannot_emulate;
5452 if (rc != X86EMUL_CONTINUE)
5458 return EMULATION_FAILED;
5461 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5463 invalidate_registers(ctxt);
5466 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5468 writeback_registers(ctxt);