1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 #include <linux/stringify.h>
28 #include <asm/debugreg.h>
37 #define OpImplicit 1ull /* No generic decode */
38 #define OpReg 2ull /* Register */
39 #define OpMem 3ull /* Memory */
40 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
41 #define OpDI 5ull /* ES:DI/EDI/RDI */
42 #define OpMem64 6ull /* Memory, 64-bit */
43 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
44 #define OpDX 8ull /* DX register */
45 #define OpCL 9ull /* CL register (for shifts) */
46 #define OpImmByte 10ull /* 8-bit sign extended immediate */
47 #define OpOne 11ull /* Implied 1 */
48 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
49 #define OpMem16 13ull /* Memory operand (16-bit). */
50 #define OpMem32 14ull /* Memory operand (32-bit). */
51 #define OpImmU 15ull /* Immediate operand, zero extended */
52 #define OpSI 16ull /* SI/ESI/RSI */
53 #define OpImmFAddr 17ull /* Immediate far address */
54 #define OpMemFAddr 18ull /* Far address in memory */
55 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
56 #define OpES 20ull /* ES */
57 #define OpCS 21ull /* CS */
58 #define OpSS 22ull /* SS */
59 #define OpDS 23ull /* DS */
60 #define OpFS 24ull /* FS */
61 #define OpGS 25ull /* GS */
62 #define OpMem8 26ull /* 8-bit zero extended memory operand */
63 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
64 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
66 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
68 #define OpBits 5 /* Width of operand field */
69 #define OpMask ((1ull << OpBits) - 1)
72 * Opcode effective-address decode tables.
73 * Note that we only emulate instructions that have at least one memory
74 * operand (excluding implicit stack references). We assume that stack
75 * references and instruction fetches will never occur in special memory
76 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
80 /* Operand sizes: 8-bit operands or specified/overridden size. */
81 #define ByteOp (1<<0) /* 8-bit operands. */
82 /* Destination operand type. */
84 #define ImplicitOps (OpImplicit << DstShift)
85 #define DstReg (OpReg << DstShift)
86 #define DstMem (OpMem << DstShift)
87 #define DstAcc (OpAcc << DstShift)
88 #define DstDI (OpDI << DstShift)
89 #define DstMem64 (OpMem64 << DstShift)
90 #define DstMem16 (OpMem16 << DstShift)
91 #define DstImmUByte (OpImmUByte << DstShift)
92 #define DstDX (OpDX << DstShift)
93 #define DstAccLo (OpAccLo << DstShift)
94 #define DstMask (OpMask << DstShift)
95 /* Source operand type. */
97 #define SrcNone (OpNone << SrcShift)
98 #define SrcReg (OpReg << SrcShift)
99 #define SrcMem (OpMem << SrcShift)
100 #define SrcMem16 (OpMem16 << SrcShift)
101 #define SrcMem32 (OpMem32 << SrcShift)
102 #define SrcImm (OpImm << SrcShift)
103 #define SrcImmByte (OpImmByte << SrcShift)
104 #define SrcOne (OpOne << SrcShift)
105 #define SrcImmUByte (OpImmUByte << SrcShift)
106 #define SrcImmU (OpImmU << SrcShift)
107 #define SrcSI (OpSI << SrcShift)
108 #define SrcXLat (OpXLat << SrcShift)
109 #define SrcImmFAddr (OpImmFAddr << SrcShift)
110 #define SrcMemFAddr (OpMemFAddr << SrcShift)
111 #define SrcAcc (OpAcc << SrcShift)
112 #define SrcImmU16 (OpImmU16 << SrcShift)
113 #define SrcImm64 (OpImm64 << SrcShift)
114 #define SrcDX (OpDX << SrcShift)
115 #define SrcMem8 (OpMem8 << SrcShift)
116 #define SrcAccHi (OpAccHi << SrcShift)
117 #define SrcMask (OpMask << SrcShift)
118 #define BitOp (1<<11)
119 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
120 #define String (1<<13) /* String instruction (rep capable) */
121 #define Stack (1<<14) /* Stack instruction (push/pop) */
122 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
123 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
124 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
125 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
126 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
127 #define Escape (5<<15) /* Escape to coprocessor instruction */
128 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
129 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
130 #define Sse (1<<18) /* SSE Vector instruction */
131 /* Generic ModRM decode. */
132 #define ModRM (1<<19)
133 /* Destination is only written; never read. */
136 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
137 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
138 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
139 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
140 #define Undefined (1<<25) /* No Such Instruction */
141 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
142 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
144 #define PageTable (1 << 29) /* instruction used to write page table */
145 #define NotImpl (1 << 30) /* instruction is not implemented */
146 /* Source 2 operand type */
147 #define Src2Shift (31)
148 #define Src2None (OpNone << Src2Shift)
149 #define Src2Mem (OpMem << Src2Shift)
150 #define Src2CL (OpCL << Src2Shift)
151 #define Src2ImmByte (OpImmByte << Src2Shift)
152 #define Src2One (OpOne << Src2Shift)
153 #define Src2Imm (OpImm << Src2Shift)
154 #define Src2ES (OpES << Src2Shift)
155 #define Src2CS (OpCS << Src2Shift)
156 #define Src2SS (OpSS << Src2Shift)
157 #define Src2DS (OpDS << Src2Shift)
158 #define Src2FS (OpFS << Src2Shift)
159 #define Src2GS (OpGS << Src2Shift)
160 #define Src2Mask (OpMask << Src2Shift)
161 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
162 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
165 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
166 #define NoWrite ((u64)1 << 45) /* No writeback */
167 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
168 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
169 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
170 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
171 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
172 #define NearBranch ((u64)1 << 52) /* Near branches */
173 #define No16 ((u64)1 << 53) /* No 16 bit operand */
174 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
176 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
178 #define X2(x...) x, x
179 #define X3(x...) X2(x), x
180 #define X4(x...) X2(x), X2(x)
181 #define X5(x...) X4(x), x
182 #define X6(x...) X4(x), X2(x)
183 #define X7(x...) X4(x), X3(x)
184 #define X8(x...) X4(x), X4(x)
185 #define X16(x...) X8(x), X8(x)
187 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
188 #define FASTOP_SIZE 8
191 * fastop functions have a special calling convention:
196 * flags: rflags (in/out)
197 * ex: rsi (in:fastop pointer, out:zero if exception)
199 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
200 * different operand sizes can be reached by calculation, rather than a jump
201 * table (which would be bigger than the code).
203 * fastop functions are declared as taking a never-defined fastop parameter,
204 * so they can't be called from C directly.
213 int (*execute)(struct x86_emulate_ctxt *ctxt);
214 const struct opcode *group;
215 const struct group_dual *gdual;
216 const struct gprefix *gprefix;
217 const struct escape *esc;
218 const struct instr_dual *idual;
219 const struct mode_dual *mdual;
220 void (*fastop)(struct fastop *fake);
222 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
226 struct opcode mod012[8];
227 struct opcode mod3[8];
231 struct opcode pfx_no;
232 struct opcode pfx_66;
233 struct opcode pfx_f2;
234 struct opcode pfx_f3;
239 struct opcode high[64];
243 struct opcode mod012;
248 struct opcode mode32;
249 struct opcode mode64;
252 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
254 enum x86_transfer_type {
256 X86_TRANSFER_CALL_JMP,
258 X86_TRANSFER_TASK_SWITCH,
261 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
263 if (!(ctxt->regs_valid & (1 << nr))) {
264 ctxt->regs_valid |= 1 << nr;
265 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
267 return ctxt->_regs[nr];
270 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
272 ctxt->regs_valid |= 1 << nr;
273 ctxt->regs_dirty |= 1 << nr;
274 return &ctxt->_regs[nr];
277 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
280 return reg_write(ctxt, nr);
283 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
287 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
288 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
291 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
293 ctxt->regs_dirty = 0;
294 ctxt->regs_valid = 0;
298 * These EFLAGS bits are restored from saved value during emulation, and
299 * any changes are written back to the saved value after emulation.
301 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
302 X86_EFLAGS_PF|X86_EFLAGS_CF)
310 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
312 #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
313 #define FOP_RET "ret \n\t"
315 #define FOP_START(op) \
316 extern void em_##op(struct fastop *fake); \
317 asm(".pushsection .text, \"ax\" \n\t" \
318 ".global em_" #op " \n\t" \
325 #define FOPNOP() FOP_ALIGN FOP_RET
327 #define FOP1E(op, dst) \
328 FOP_ALIGN "10: " #op " %" #dst " \n\t" FOP_RET
330 #define FOP1EEX(op, dst) \
331 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
333 #define FASTOP1(op) \
338 ON64(FOP1E(op##q, rax)) \
341 /* 1-operand, using src2 (for MUL/DIV r/m) */
342 #define FASTOP1SRC2(op, name) \
347 ON64(FOP1E(op, rcx)) \
350 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
351 #define FASTOP1SRC2EX(op, name) \
356 ON64(FOP1EEX(op, rcx)) \
359 #define FOP2E(op, dst, src) \
360 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
362 #define FASTOP2(op) \
364 FOP2E(op##b, al, dl) \
365 FOP2E(op##w, ax, dx) \
366 FOP2E(op##l, eax, edx) \
367 ON64(FOP2E(op##q, rax, rdx)) \
370 /* 2 operand, word only */
371 #define FASTOP2W(op) \
374 FOP2E(op##w, ax, dx) \
375 FOP2E(op##l, eax, edx) \
376 ON64(FOP2E(op##q, rax, rdx)) \
379 /* 2 operand, src is CL */
380 #define FASTOP2CL(op) \
382 FOP2E(op##b, al, cl) \
383 FOP2E(op##w, ax, cl) \
384 FOP2E(op##l, eax, cl) \
385 ON64(FOP2E(op##q, rax, cl)) \
388 /* 2 operand, src and dest are reversed */
389 #define FASTOP2R(op, name) \
391 FOP2E(op##b, dl, al) \
392 FOP2E(op##w, dx, ax) \
393 FOP2E(op##l, edx, eax) \
394 ON64(FOP2E(op##q, rdx, rax)) \
397 #define FOP3E(op, dst, src, src2) \
398 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
400 /* 3-operand, word-only, src2=cl */
401 #define FASTOP3WCL(op) \
404 FOP3E(op##w, ax, dx, cl) \
405 FOP3E(op##l, eax, edx, cl) \
406 ON64(FOP3E(op##q, rax, rdx, cl)) \
409 /* Special case for SETcc - 1 instruction per cc */
410 #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
412 asm(".global kvm_fastop_exception \n"
413 "kvm_fastop_exception: xor %esi, %esi; ret");
434 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
437 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
438 enum x86_intercept intercept,
439 enum x86_intercept_stage stage)
441 struct x86_instruction_info info = {
442 .intercept = intercept,
443 .rep_prefix = ctxt->rep_prefix,
444 .modrm_mod = ctxt->modrm_mod,
445 .modrm_reg = ctxt->modrm_reg,
446 .modrm_rm = ctxt->modrm_rm,
447 .src_val = ctxt->src.val64,
448 .dst_val = ctxt->dst.val64,
449 .src_bytes = ctxt->src.bytes,
450 .dst_bytes = ctxt->dst.bytes,
451 .ad_bytes = ctxt->ad_bytes,
452 .next_rip = ctxt->eip,
455 return ctxt->ops->intercept(ctxt, &info, stage);
458 static void assign_masked(ulong *dest, ulong src, ulong mask)
460 *dest = (*dest & ~mask) | (src & mask);
463 static void assign_register(unsigned long *reg, u64 val, int bytes)
465 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
468 *(u8 *)reg = (u8)val;
471 *(u16 *)reg = (u16)val;
475 break; /* 64b: zero-extend */
482 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
484 return (1UL << (ctxt->ad_bytes << 3)) - 1;
487 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
490 struct desc_struct ss;
492 if (ctxt->mode == X86EMUL_MODE_PROT64)
494 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
495 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
498 static int stack_size(struct x86_emulate_ctxt *ctxt)
500 return (__fls(stack_mask(ctxt)) + 1) >> 3;
503 /* Access/update address held in a register, based on addressing mode. */
504 static inline unsigned long
505 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
507 if (ctxt->ad_bytes == sizeof(unsigned long))
510 return reg & ad_mask(ctxt);
513 static inline unsigned long
514 register_address(struct x86_emulate_ctxt *ctxt, int reg)
516 return address_mask(ctxt, reg_read(ctxt, reg));
519 static void masked_increment(ulong *reg, ulong mask, int inc)
521 assign_masked(reg, *reg + inc, mask);
525 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
527 ulong *preg = reg_rmw(ctxt, reg);
529 assign_register(preg, *preg + inc, ctxt->ad_bytes);
532 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
534 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
537 static u32 desc_limit_scaled(struct desc_struct *desc)
539 u32 limit = get_desc_limit(desc);
541 return desc->g ? (limit << 12) | 0xfff : limit;
544 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
546 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
549 return ctxt->ops->get_cached_segment_base(ctxt, seg);
552 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
553 u32 error, bool valid)
556 ctxt->exception.vector = vec;
557 ctxt->exception.error_code = error;
558 ctxt->exception.error_code_valid = valid;
559 return X86EMUL_PROPAGATE_FAULT;
562 static int emulate_db(struct x86_emulate_ctxt *ctxt)
564 return emulate_exception(ctxt, DB_VECTOR, 0, false);
567 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
569 return emulate_exception(ctxt, GP_VECTOR, err, true);
572 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
574 return emulate_exception(ctxt, SS_VECTOR, err, true);
577 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
579 return emulate_exception(ctxt, UD_VECTOR, 0, false);
582 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
584 return emulate_exception(ctxt, TS_VECTOR, err, true);
587 static int emulate_de(struct x86_emulate_ctxt *ctxt)
589 return emulate_exception(ctxt, DE_VECTOR, 0, false);
592 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
594 return emulate_exception(ctxt, NM_VECTOR, 0, false);
597 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
600 struct desc_struct desc;
602 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
606 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
611 struct desc_struct desc;
613 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
614 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
618 * x86 defines three classes of vector instructions: explicitly
619 * aligned, explicitly unaligned, and the rest, which change behaviour
620 * depending on whether they're AVX encoded or not.
622 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
623 * subject to the same check.
625 static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
627 if (likely(size < 16))
630 if (ctxt->d & Aligned)
632 else if (ctxt->d & Unaligned)
634 else if (ctxt->d & Avx)
640 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
641 struct segmented_address addr,
642 unsigned *max_size, unsigned size,
643 bool write, bool fetch,
644 enum x86emul_mode mode, ulong *linear)
646 struct desc_struct desc;
652 la = seg_base(ctxt, addr.seg) + addr.ea;
655 case X86EMUL_MODE_PROT64:
657 if (is_noncanonical_address(la))
660 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
661 if (size > *max_size)
665 *linear = la = (u32)la;
666 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
670 /* code segment in protected mode or read-only data segment */
671 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
672 || !(desc.type & 2)) && write)
674 /* unreadable code segment */
675 if (!fetch && (desc.type & 8) && !(desc.type & 2))
677 lim = desc_limit_scaled(&desc);
678 if (!(desc.type & 8) && (desc.type & 4)) {
679 /* expand-down segment */
682 lim = desc.d ? 0xffffffff : 0xffff;
686 if (lim == 0xffffffff)
689 *max_size = (u64)lim + 1 - addr.ea;
690 if (size > *max_size)
695 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
696 return emulate_gp(ctxt, 0);
697 return X86EMUL_CONTINUE;
699 if (addr.seg == VCPU_SREG_SS)
700 return emulate_ss(ctxt, 0);
702 return emulate_gp(ctxt, 0);
705 static int linearize(struct x86_emulate_ctxt *ctxt,
706 struct segmented_address addr,
707 unsigned size, bool write,
711 return __linearize(ctxt, addr, &max_size, size, write, false,
715 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
716 enum x86emul_mode mode)
721 struct segmented_address addr = { .seg = VCPU_SREG_CS,
724 if (ctxt->op_bytes != sizeof(unsigned long))
725 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
726 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
727 if (rc == X86EMUL_CONTINUE)
728 ctxt->_eip = addr.ea;
732 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
734 return assign_eip(ctxt, dst, ctxt->mode);
737 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
738 const struct desc_struct *cs_desc)
740 enum x86emul_mode mode = ctxt->mode;
744 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
748 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
750 mode = X86EMUL_MODE_PROT64;
752 mode = X86EMUL_MODE_PROT32; /* temporary value */
755 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
756 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
757 rc = assign_eip(ctxt, dst, mode);
758 if (rc == X86EMUL_CONTINUE)
763 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
765 return assign_eip_near(ctxt, ctxt->_eip + rel);
768 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
769 struct segmented_address addr,
776 rc = linearize(ctxt, addr, size, false, &linear);
777 if (rc != X86EMUL_CONTINUE)
779 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
783 * Prefetch the remaining bytes of the instruction without crossing page
784 * boundary if they are not in fetch_cache yet.
786 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
789 unsigned size, max_size;
790 unsigned long linear;
791 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
792 struct segmented_address addr = { .seg = VCPU_SREG_CS,
793 .ea = ctxt->eip + cur_size };
796 * We do not know exactly how many bytes will be needed, and
797 * __linearize is expensive, so fetch as much as possible. We
798 * just have to avoid going beyond the 15 byte limit, the end
799 * of the segment, or the end of the page.
801 * __linearize is called with size 0 so that it does not do any
802 * boundary check itself. Instead, we use max_size to check
805 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
807 if (unlikely(rc != X86EMUL_CONTINUE))
810 size = min_t(unsigned, 15UL ^ cur_size, max_size);
811 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
814 * One instruction can only straddle two pages,
815 * and one has been loaded at the beginning of
816 * x86_decode_insn. So, if not enough bytes
817 * still, we must have hit the 15-byte boundary.
819 if (unlikely(size < op_size))
820 return emulate_gp(ctxt, 0);
822 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
823 size, &ctxt->exception);
824 if (unlikely(rc != X86EMUL_CONTINUE))
826 ctxt->fetch.end += size;
827 return X86EMUL_CONTINUE;
830 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
833 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
835 if (unlikely(done_size < size))
836 return __do_insn_fetch_bytes(ctxt, size - done_size);
838 return X86EMUL_CONTINUE;
841 /* Fetch next part of the instruction being emulated. */
842 #define insn_fetch(_type, _ctxt) \
845 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
846 if (rc != X86EMUL_CONTINUE) \
848 ctxt->_eip += sizeof(_type); \
849 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
850 ctxt->fetch.ptr += sizeof(_type); \
854 #define insn_fetch_arr(_arr, _size, _ctxt) \
856 rc = do_insn_fetch_bytes(_ctxt, _size); \
857 if (rc != X86EMUL_CONTINUE) \
859 ctxt->_eip += (_size); \
860 memcpy(_arr, ctxt->fetch.ptr, _size); \
861 ctxt->fetch.ptr += (_size); \
865 * Given the 'reg' portion of a ModRM byte, and a register block, return a
866 * pointer into the block that addresses the relevant register.
867 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
869 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
873 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
875 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
876 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
878 p = reg_rmw(ctxt, modrm_reg);
882 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
883 struct segmented_address addr,
884 u16 *size, unsigned long *address, int op_bytes)
891 rc = segmented_read_std(ctxt, addr, size, 2);
892 if (rc != X86EMUL_CONTINUE)
895 rc = segmented_read_std(ctxt, addr, address, op_bytes);
909 FASTOP1SRC2(mul, mul_ex);
910 FASTOP1SRC2(imul, imul_ex);
911 FASTOP1SRC2EX(div, div_ex);
912 FASTOP1SRC2EX(idiv, idiv_ex);
941 FASTOP2R(cmp, cmp_r);
943 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
945 /* If src is zero, do not writeback, but update flags */
946 if (ctxt->src.val == 0)
947 ctxt->dst.type = OP_NONE;
948 return fastop(ctxt, em_bsf);
951 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
953 /* If src is zero, do not writeback, but update flags */
954 if (ctxt->src.val == 0)
955 ctxt->dst.type = OP_NONE;
956 return fastop(ctxt, em_bsr);
959 static u8 test_cc(unsigned int condition, unsigned long flags)
962 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
964 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
965 asm("push %[flags]; popf; call *%[fastop]"
966 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
970 static void fetch_register_operand(struct operand *op)
974 op->val = *(u8 *)op->addr.reg;
977 op->val = *(u16 *)op->addr.reg;
980 op->val = *(u32 *)op->addr.reg;
983 op->val = *(u64 *)op->addr.reg;
988 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
990 ctxt->ops->get_fpu(ctxt);
992 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
993 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
994 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
995 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
996 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
997 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
998 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
999 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1000 #ifdef CONFIG_X86_64
1001 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1002 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1003 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1004 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1005 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1006 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1007 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1008 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1012 ctxt->ops->put_fpu(ctxt);
1015 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1018 ctxt->ops->get_fpu(ctxt);
1020 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1021 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1022 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1023 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1024 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1025 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1026 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1027 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1028 #ifdef CONFIG_X86_64
1029 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1030 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1031 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1032 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1033 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1034 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1035 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1036 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1040 ctxt->ops->put_fpu(ctxt);
1043 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1045 ctxt->ops->get_fpu(ctxt);
1047 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1048 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1049 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1050 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1051 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1052 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1053 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1054 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1057 ctxt->ops->put_fpu(ctxt);
1060 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1062 ctxt->ops->get_fpu(ctxt);
1064 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1065 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1066 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1067 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1068 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1069 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1070 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1071 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1074 ctxt->ops->put_fpu(ctxt);
1077 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1079 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1080 return emulate_nm(ctxt);
1082 ctxt->ops->get_fpu(ctxt);
1083 asm volatile("fninit");
1084 ctxt->ops->put_fpu(ctxt);
1085 return X86EMUL_CONTINUE;
1088 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1092 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1093 return emulate_nm(ctxt);
1095 ctxt->ops->get_fpu(ctxt);
1096 asm volatile("fnstcw %0": "+m"(fcw));
1097 ctxt->ops->put_fpu(ctxt);
1099 ctxt->dst.val = fcw;
1101 return X86EMUL_CONTINUE;
1104 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1108 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1109 return emulate_nm(ctxt);
1111 ctxt->ops->get_fpu(ctxt);
1112 asm volatile("fnstsw %0": "+m"(fsw));
1113 ctxt->ops->put_fpu(ctxt);
1115 ctxt->dst.val = fsw;
1117 return X86EMUL_CONTINUE;
1120 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1123 unsigned reg = ctxt->modrm_reg;
1125 if (!(ctxt->d & ModRM))
1126 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1128 if (ctxt->d & Sse) {
1132 read_sse_reg(ctxt, &op->vec_val, reg);
1135 if (ctxt->d & Mmx) {
1144 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1145 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1147 fetch_register_operand(op);
1148 op->orig_val = op->val;
1151 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1153 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1154 ctxt->modrm_seg = VCPU_SREG_SS;
1157 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1161 int index_reg, base_reg, scale;
1162 int rc = X86EMUL_CONTINUE;
1165 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1166 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1167 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1169 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1170 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1171 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1172 ctxt->modrm_seg = VCPU_SREG_DS;
1174 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1176 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1177 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1179 if (ctxt->d & Sse) {
1182 op->addr.xmm = ctxt->modrm_rm;
1183 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1186 if (ctxt->d & Mmx) {
1189 op->addr.mm = ctxt->modrm_rm & 7;
1192 fetch_register_operand(op);
1198 if (ctxt->ad_bytes == 2) {
1199 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1200 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1201 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1202 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1204 /* 16-bit ModR/M decode. */
1205 switch (ctxt->modrm_mod) {
1207 if (ctxt->modrm_rm == 6)
1208 modrm_ea += insn_fetch(u16, ctxt);
1211 modrm_ea += insn_fetch(s8, ctxt);
1214 modrm_ea += insn_fetch(u16, ctxt);
1217 switch (ctxt->modrm_rm) {
1219 modrm_ea += bx + si;
1222 modrm_ea += bx + di;
1225 modrm_ea += bp + si;
1228 modrm_ea += bp + di;
1237 if (ctxt->modrm_mod != 0)
1244 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1245 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1246 ctxt->modrm_seg = VCPU_SREG_SS;
1247 modrm_ea = (u16)modrm_ea;
1249 /* 32/64-bit ModR/M decode. */
1250 if ((ctxt->modrm_rm & 7) == 4) {
1251 sib = insn_fetch(u8, ctxt);
1252 index_reg |= (sib >> 3) & 7;
1253 base_reg |= sib & 7;
1256 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1257 modrm_ea += insn_fetch(s32, ctxt);
1259 modrm_ea += reg_read(ctxt, base_reg);
1260 adjust_modrm_seg(ctxt, base_reg);
1261 /* Increment ESP on POP [ESP] */
1262 if ((ctxt->d & IncSP) &&
1263 base_reg == VCPU_REGS_RSP)
1264 modrm_ea += ctxt->op_bytes;
1267 modrm_ea += reg_read(ctxt, index_reg) << scale;
1268 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1269 modrm_ea += insn_fetch(s32, ctxt);
1270 if (ctxt->mode == X86EMUL_MODE_PROT64)
1271 ctxt->rip_relative = 1;
1273 base_reg = ctxt->modrm_rm;
1274 modrm_ea += reg_read(ctxt, base_reg);
1275 adjust_modrm_seg(ctxt, base_reg);
1277 switch (ctxt->modrm_mod) {
1279 modrm_ea += insn_fetch(s8, ctxt);
1282 modrm_ea += insn_fetch(s32, ctxt);
1286 op->addr.mem.ea = modrm_ea;
1287 if (ctxt->ad_bytes != 8)
1288 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1294 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1297 int rc = X86EMUL_CONTINUE;
1300 switch (ctxt->ad_bytes) {
1302 op->addr.mem.ea = insn_fetch(u16, ctxt);
1305 op->addr.mem.ea = insn_fetch(u32, ctxt);
1308 op->addr.mem.ea = insn_fetch(u64, ctxt);
1315 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1319 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1320 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1322 if (ctxt->src.bytes == 2)
1323 sv = (s16)ctxt->src.val & (s16)mask;
1324 else if (ctxt->src.bytes == 4)
1325 sv = (s32)ctxt->src.val & (s32)mask;
1327 sv = (s64)ctxt->src.val & (s64)mask;
1329 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1330 ctxt->dst.addr.mem.ea + (sv >> 3));
1333 /* only subword offset */
1334 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1337 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1338 unsigned long addr, void *dest, unsigned size)
1341 struct read_cache *mc = &ctxt->mem_read;
1343 if (mc->pos < mc->end)
1346 WARN_ON((mc->end + size) >= sizeof(mc->data));
1348 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1350 if (rc != X86EMUL_CONTINUE)
1356 memcpy(dest, mc->data + mc->pos, size);
1358 return X86EMUL_CONTINUE;
1361 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1362 struct segmented_address addr,
1369 rc = linearize(ctxt, addr, size, false, &linear);
1370 if (rc != X86EMUL_CONTINUE)
1372 return read_emulated(ctxt, linear, data, size);
1375 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1376 struct segmented_address addr,
1383 rc = linearize(ctxt, addr, size, true, &linear);
1384 if (rc != X86EMUL_CONTINUE)
1386 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1390 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1391 struct segmented_address addr,
1392 const void *orig_data, const void *data,
1398 rc = linearize(ctxt, addr, size, true, &linear);
1399 if (rc != X86EMUL_CONTINUE)
1401 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1402 size, &ctxt->exception);
1405 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1406 unsigned int size, unsigned short port,
1409 struct read_cache *rc = &ctxt->io_read;
1411 if (rc->pos == rc->end) { /* refill pio read ahead */
1412 unsigned int in_page, n;
1413 unsigned int count = ctxt->rep_prefix ?
1414 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1415 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1416 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1417 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1418 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1421 rc->pos = rc->end = 0;
1422 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1427 if (ctxt->rep_prefix && (ctxt->d & String) &&
1428 !(ctxt->eflags & X86_EFLAGS_DF)) {
1429 ctxt->dst.data = rc->data + rc->pos;
1430 ctxt->dst.type = OP_MEM_STR;
1431 ctxt->dst.count = (rc->end - rc->pos) / size;
1434 memcpy(dest, rc->data + rc->pos, size);
1440 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1441 u16 index, struct desc_struct *desc)
1446 ctxt->ops->get_idt(ctxt, &dt);
1448 if (dt.size < index * 8 + 7)
1449 return emulate_gp(ctxt, index << 3 | 0x2);
1451 addr = dt.address + index * 8;
1452 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1456 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1457 u16 selector, struct desc_ptr *dt)
1459 const struct x86_emulate_ops *ops = ctxt->ops;
1462 if (selector & 1 << 2) {
1463 struct desc_struct desc;
1466 memset (dt, 0, sizeof *dt);
1467 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1471 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1472 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1474 ops->get_gdt(ctxt, dt);
1477 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1478 u16 selector, ulong *desc_addr_p)
1481 u16 index = selector >> 3;
1484 get_descriptor_table_ptr(ctxt, selector, &dt);
1486 if (dt.size < index * 8 + 7)
1487 return emulate_gp(ctxt, selector & 0xfffc);
1489 addr = dt.address + index * 8;
1491 #ifdef CONFIG_X86_64
1492 if (addr >> 32 != 0) {
1495 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1496 if (!(efer & EFER_LMA))
1501 *desc_addr_p = addr;
1502 return X86EMUL_CONTINUE;
1505 /* allowed just for 8 bytes segments */
1506 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1507 u16 selector, struct desc_struct *desc,
1512 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1513 if (rc != X86EMUL_CONTINUE)
1516 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1520 /* allowed just for 8 bytes segments */
1521 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1522 u16 selector, struct desc_struct *desc)
1527 rc = get_descriptor_ptr(ctxt, selector, &addr);
1528 if (rc != X86EMUL_CONTINUE)
1531 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1535 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1536 u16 selector, int seg, u8 cpl,
1537 enum x86_transfer_type transfer,
1538 struct desc_struct *desc)
1540 struct desc_struct seg_desc, old_desc;
1542 unsigned err_vec = GP_VECTOR;
1544 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1550 memset(&seg_desc, 0, sizeof seg_desc);
1552 if (ctxt->mode == X86EMUL_MODE_REAL) {
1553 /* set real mode segment descriptor (keep limit etc. for
1555 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1556 set_desc_base(&seg_desc, selector << 4);
1558 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1559 /* VM86 needs a clean new segment descriptor */
1560 set_desc_base(&seg_desc, selector << 4);
1561 set_desc_limit(&seg_desc, 0xffff);
1571 /* TR should be in GDT only */
1572 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1575 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1576 if (null_selector) {
1577 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1580 if (seg == VCPU_SREG_SS) {
1581 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1585 * ctxt->ops->set_segment expects the CPL to be in
1586 * SS.DPL, so fake an expand-up 32-bit data segment.
1596 /* Skip all following checks */
1600 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1601 if (ret != X86EMUL_CONTINUE)
1604 err_code = selector & 0xfffc;
1605 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1608 /* can't load system descriptor into segment selector */
1609 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1610 if (transfer == X86_TRANSFER_CALL_JMP)
1611 return X86EMUL_UNHANDLEABLE;
1616 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1625 * segment is not a writable data segment or segment
1626 * selector's RPL != CPL or segment selector's RPL != CPL
1628 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1632 if (!(seg_desc.type & 8))
1635 if (seg_desc.type & 4) {
1641 if (rpl > cpl || dpl != cpl)
1644 /* in long-mode d/b must be clear if l is set */
1645 if (seg_desc.d && seg_desc.l) {
1648 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1649 if (efer & EFER_LMA)
1653 /* CS(RPL) <- CPL */
1654 selector = (selector & 0xfffc) | cpl;
1657 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1659 old_desc = seg_desc;
1660 seg_desc.type |= 2; /* busy */
1661 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1662 sizeof(seg_desc), &ctxt->exception);
1663 if (ret != X86EMUL_CONTINUE)
1666 case VCPU_SREG_LDTR:
1667 if (seg_desc.s || seg_desc.type != 2)
1670 default: /* DS, ES, FS, or GS */
1672 * segment is not a data or readable code segment or
1673 * ((segment is a data or nonconforming code segment)
1674 * and (both RPL and CPL > DPL))
1676 if ((seg_desc.type & 0xa) == 0x8 ||
1677 (((seg_desc.type & 0xc) != 0xc) &&
1678 (rpl > dpl && cpl > dpl)))
1684 /* mark segment as accessed */
1685 if (!(seg_desc.type & 1)) {
1687 ret = write_segment_descriptor(ctxt, selector,
1689 if (ret != X86EMUL_CONTINUE)
1692 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1693 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1694 sizeof(base3), &ctxt->exception);
1695 if (ret != X86EMUL_CONTINUE)
1697 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1698 ((u64)base3 << 32)))
1699 return emulate_gp(ctxt, 0);
1702 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1705 return X86EMUL_CONTINUE;
1707 return emulate_exception(ctxt, err_vec, err_code, true);
1710 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1711 u16 selector, int seg)
1713 u8 cpl = ctxt->ops->cpl(ctxt);
1716 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1717 * they can load it at CPL<3 (Intel's manual says only LSS can,
1720 * However, the Intel manual says that putting IST=1/DPL=3 in
1721 * an interrupt gate will result in SS=3 (the AMD manual instead
1722 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1723 * and only forbid it here.
1725 if (seg == VCPU_SREG_SS && selector == 3 &&
1726 ctxt->mode == X86EMUL_MODE_PROT64)
1727 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1729 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1730 X86_TRANSFER_NONE, NULL);
1733 static void write_register_operand(struct operand *op)
1735 return assign_register(op->addr.reg, op->val, op->bytes);
1738 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1742 write_register_operand(op);
1745 if (ctxt->lock_prefix)
1746 return segmented_cmpxchg(ctxt,
1752 return segmented_write(ctxt,
1758 return segmented_write(ctxt,
1761 op->bytes * op->count);
1764 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1767 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1775 return X86EMUL_CONTINUE;
1778 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1780 struct segmented_address addr;
1782 rsp_increment(ctxt, -bytes);
1783 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1784 addr.seg = VCPU_SREG_SS;
1786 return segmented_write(ctxt, addr, data, bytes);
1789 static int em_push(struct x86_emulate_ctxt *ctxt)
1791 /* Disable writeback. */
1792 ctxt->dst.type = OP_NONE;
1793 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1796 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1797 void *dest, int len)
1800 struct segmented_address addr;
1802 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1803 addr.seg = VCPU_SREG_SS;
1804 rc = segmented_read(ctxt, addr, dest, len);
1805 if (rc != X86EMUL_CONTINUE)
1808 rsp_increment(ctxt, len);
1812 static int em_pop(struct x86_emulate_ctxt *ctxt)
1814 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1817 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1818 void *dest, int len)
1821 unsigned long val, change_mask;
1822 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1823 int cpl = ctxt->ops->cpl(ctxt);
1825 rc = emulate_pop(ctxt, &val, len);
1826 if (rc != X86EMUL_CONTINUE)
1829 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1830 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1831 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1832 X86_EFLAGS_AC | X86_EFLAGS_ID;
1834 switch(ctxt->mode) {
1835 case X86EMUL_MODE_PROT64:
1836 case X86EMUL_MODE_PROT32:
1837 case X86EMUL_MODE_PROT16:
1839 change_mask |= X86_EFLAGS_IOPL;
1841 change_mask |= X86_EFLAGS_IF;
1843 case X86EMUL_MODE_VM86:
1845 return emulate_gp(ctxt, 0);
1846 change_mask |= X86_EFLAGS_IF;
1848 default: /* real mode */
1849 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1853 *(unsigned long *)dest =
1854 (ctxt->eflags & ~change_mask) | (val & change_mask);
1859 static int em_popf(struct x86_emulate_ctxt *ctxt)
1861 ctxt->dst.type = OP_REG;
1862 ctxt->dst.addr.reg = &ctxt->eflags;
1863 ctxt->dst.bytes = ctxt->op_bytes;
1864 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1867 static int em_enter(struct x86_emulate_ctxt *ctxt)
1870 unsigned frame_size = ctxt->src.val;
1871 unsigned nesting_level = ctxt->src2.val & 31;
1875 return X86EMUL_UNHANDLEABLE;
1877 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1878 rc = push(ctxt, &rbp, stack_size(ctxt));
1879 if (rc != X86EMUL_CONTINUE)
1881 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1883 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1884 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1886 return X86EMUL_CONTINUE;
1889 static int em_leave(struct x86_emulate_ctxt *ctxt)
1891 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1893 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1896 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1898 int seg = ctxt->src2.val;
1900 ctxt->src.val = get_segment_selector(ctxt, seg);
1901 if (ctxt->op_bytes == 4) {
1902 rsp_increment(ctxt, -2);
1906 return em_push(ctxt);
1909 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1911 int seg = ctxt->src2.val;
1912 unsigned long selector;
1915 rc = emulate_pop(ctxt, &selector, 2);
1916 if (rc != X86EMUL_CONTINUE)
1919 if (ctxt->modrm_reg == VCPU_SREG_SS)
1920 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1921 if (ctxt->op_bytes > 2)
1922 rsp_increment(ctxt, ctxt->op_bytes - 2);
1924 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1928 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1930 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1931 int rc = X86EMUL_CONTINUE;
1932 int reg = VCPU_REGS_RAX;
1934 while (reg <= VCPU_REGS_RDI) {
1935 (reg == VCPU_REGS_RSP) ?
1936 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1939 if (rc != X86EMUL_CONTINUE)
1948 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1950 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1951 return em_push(ctxt);
1954 static int em_popa(struct x86_emulate_ctxt *ctxt)
1956 int rc = X86EMUL_CONTINUE;
1957 int reg = VCPU_REGS_RDI;
1960 while (reg >= VCPU_REGS_RAX) {
1961 if (reg == VCPU_REGS_RSP) {
1962 rsp_increment(ctxt, ctxt->op_bytes);
1966 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1967 if (rc != X86EMUL_CONTINUE)
1969 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1975 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1977 const struct x86_emulate_ops *ops = ctxt->ops;
1984 /* TODO: Add limit checks */
1985 ctxt->src.val = ctxt->eflags;
1987 if (rc != X86EMUL_CONTINUE)
1990 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1992 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1994 if (rc != X86EMUL_CONTINUE)
1997 ctxt->src.val = ctxt->_eip;
1999 if (rc != X86EMUL_CONTINUE)
2002 ops->get_idt(ctxt, &dt);
2004 eip_addr = dt.address + (irq << 2);
2005 cs_addr = dt.address + (irq << 2) + 2;
2007 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2008 if (rc != X86EMUL_CONTINUE)
2011 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2012 if (rc != X86EMUL_CONTINUE)
2015 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2016 if (rc != X86EMUL_CONTINUE)
2024 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2028 invalidate_registers(ctxt);
2029 rc = __emulate_int_real(ctxt, irq);
2030 if (rc == X86EMUL_CONTINUE)
2031 writeback_registers(ctxt);
2035 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2037 switch(ctxt->mode) {
2038 case X86EMUL_MODE_REAL:
2039 return __emulate_int_real(ctxt, irq);
2040 case X86EMUL_MODE_VM86:
2041 case X86EMUL_MODE_PROT16:
2042 case X86EMUL_MODE_PROT32:
2043 case X86EMUL_MODE_PROT64:
2045 /* Protected mode interrupts unimplemented yet */
2046 return X86EMUL_UNHANDLEABLE;
2050 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2052 int rc = X86EMUL_CONTINUE;
2053 unsigned long temp_eip = 0;
2054 unsigned long temp_eflags = 0;
2055 unsigned long cs = 0;
2056 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2057 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2058 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2059 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2060 X86_EFLAGS_AC | X86_EFLAGS_ID |
2062 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2065 /* TODO: Add stack limit check */
2067 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2069 if (rc != X86EMUL_CONTINUE)
2072 if (temp_eip & ~0xffff)
2073 return emulate_gp(ctxt, 0);
2075 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2077 if (rc != X86EMUL_CONTINUE)
2080 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2082 if (rc != X86EMUL_CONTINUE)
2085 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2087 if (rc != X86EMUL_CONTINUE)
2090 ctxt->_eip = temp_eip;
2092 if (ctxt->op_bytes == 4)
2093 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2094 else if (ctxt->op_bytes == 2) {
2095 ctxt->eflags &= ~0xffff;
2096 ctxt->eflags |= temp_eflags;
2099 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2100 ctxt->eflags |= X86_EFLAGS_FIXED;
2101 ctxt->ops->set_nmi_mask(ctxt, false);
2106 static int em_iret(struct x86_emulate_ctxt *ctxt)
2108 switch(ctxt->mode) {
2109 case X86EMUL_MODE_REAL:
2110 return emulate_iret_real(ctxt);
2111 case X86EMUL_MODE_VM86:
2112 case X86EMUL_MODE_PROT16:
2113 case X86EMUL_MODE_PROT32:
2114 case X86EMUL_MODE_PROT64:
2116 /* iret from protected mode unimplemented yet */
2117 return X86EMUL_UNHANDLEABLE;
2121 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2125 struct desc_struct new_desc;
2126 u8 cpl = ctxt->ops->cpl(ctxt);
2128 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2130 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2131 X86_TRANSFER_CALL_JMP,
2133 if (rc != X86EMUL_CONTINUE)
2136 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2137 /* Error handling is not implemented. */
2138 if (rc != X86EMUL_CONTINUE)
2139 return X86EMUL_UNHANDLEABLE;
2144 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2146 return assign_eip_near(ctxt, ctxt->src.val);
2149 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2154 old_eip = ctxt->_eip;
2155 rc = assign_eip_near(ctxt, ctxt->src.val);
2156 if (rc != X86EMUL_CONTINUE)
2158 ctxt->src.val = old_eip;
2163 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2165 u64 old = ctxt->dst.orig_val64;
2167 if (ctxt->dst.bytes == 16)
2168 return X86EMUL_UNHANDLEABLE;
2170 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2171 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2172 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2173 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2174 ctxt->eflags &= ~X86_EFLAGS_ZF;
2176 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2177 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2179 ctxt->eflags |= X86_EFLAGS_ZF;
2181 return X86EMUL_CONTINUE;
2184 static int em_ret(struct x86_emulate_ctxt *ctxt)
2189 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2190 if (rc != X86EMUL_CONTINUE)
2193 return assign_eip_near(ctxt, eip);
2196 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2199 unsigned long eip, cs;
2200 int cpl = ctxt->ops->cpl(ctxt);
2201 struct desc_struct new_desc;
2203 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2204 if (rc != X86EMUL_CONTINUE)
2206 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2207 if (rc != X86EMUL_CONTINUE)
2209 /* Outer-privilege level return is not implemented */
2210 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2211 return X86EMUL_UNHANDLEABLE;
2212 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2215 if (rc != X86EMUL_CONTINUE)
2217 rc = assign_eip_far(ctxt, eip, &new_desc);
2218 /* Error handling is not implemented. */
2219 if (rc != X86EMUL_CONTINUE)
2220 return X86EMUL_UNHANDLEABLE;
2225 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2229 rc = em_ret_far(ctxt);
2230 if (rc != X86EMUL_CONTINUE)
2232 rsp_increment(ctxt, ctxt->src.val);
2233 return X86EMUL_CONTINUE;
2236 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2238 /* Save real source value, then compare EAX against destination. */
2239 ctxt->dst.orig_val = ctxt->dst.val;
2240 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2241 ctxt->src.orig_val = ctxt->src.val;
2242 ctxt->src.val = ctxt->dst.orig_val;
2243 fastop(ctxt, em_cmp);
2245 if (ctxt->eflags & X86_EFLAGS_ZF) {
2246 /* Success: write back to memory; no update of EAX */
2247 ctxt->src.type = OP_NONE;
2248 ctxt->dst.val = ctxt->src.orig_val;
2250 /* Failure: write the value we saw to EAX. */
2251 ctxt->src.type = OP_REG;
2252 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2253 ctxt->src.val = ctxt->dst.orig_val;
2254 /* Create write-cycle to dest by writing the same value */
2255 ctxt->dst.val = ctxt->dst.orig_val;
2257 return X86EMUL_CONTINUE;
2260 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2262 int seg = ctxt->src2.val;
2266 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2268 rc = load_segment_descriptor(ctxt, sel, seg);
2269 if (rc != X86EMUL_CONTINUE)
2272 ctxt->dst.val = ctxt->src.val;
2276 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2278 u32 eax, ebx, ecx, edx;
2282 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2283 return edx & bit(X86_FEATURE_LM);
2286 #define GET_SMSTATE(type, smbase, offset) \
2289 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2291 if (r != X86EMUL_CONTINUE) \
2292 return X86EMUL_UNHANDLEABLE; \
2296 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2298 desc->g = (flags >> 23) & 1;
2299 desc->d = (flags >> 22) & 1;
2300 desc->l = (flags >> 21) & 1;
2301 desc->avl = (flags >> 20) & 1;
2302 desc->p = (flags >> 15) & 1;
2303 desc->dpl = (flags >> 13) & 3;
2304 desc->s = (flags >> 12) & 1;
2305 desc->type = (flags >> 8) & 15;
2308 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2310 struct desc_struct desc;
2314 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2317 offset = 0x7f84 + n * 12;
2319 offset = 0x7f2c + (n - 3) * 12;
2321 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2322 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2323 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2324 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2325 return X86EMUL_CONTINUE;
2328 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2330 struct desc_struct desc;
2335 offset = 0x7e00 + n * 16;
2337 selector = GET_SMSTATE(u16, smbase, offset);
2338 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2339 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2340 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2341 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2343 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2344 return X86EMUL_CONTINUE;
2347 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2353 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2354 * Then enable protected mode. However, PCID cannot be enabled
2355 * if EFER.LMA=0, so set it separately.
2357 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2359 return X86EMUL_UNHANDLEABLE;
2361 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2363 return X86EMUL_UNHANDLEABLE;
2365 if (cr4 & X86_CR4_PCIDE) {
2366 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2368 return X86EMUL_UNHANDLEABLE;
2371 return X86EMUL_CONTINUE;
2374 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2376 struct desc_struct desc;
2382 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2383 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2384 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2385 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2387 for (i = 0; i < 8; i++)
2388 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2390 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2391 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2392 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2393 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2395 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2396 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2397 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2398 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2399 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2401 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2402 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2403 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2404 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2405 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2407 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2408 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2409 ctxt->ops->set_gdt(ctxt, &dt);
2411 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2412 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2413 ctxt->ops->set_idt(ctxt, &dt);
2415 for (i = 0; i < 6; i++) {
2416 int r = rsm_load_seg_32(ctxt, smbase, i);
2417 if (r != X86EMUL_CONTINUE)
2421 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2423 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2425 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2428 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2430 struct desc_struct desc;
2437 for (i = 0; i < 16; i++)
2438 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2440 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2441 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2443 val = GET_SMSTATE(u32, smbase, 0x7f68);
2444 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2445 val = GET_SMSTATE(u32, smbase, 0x7f60);
2446 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2448 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2449 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2450 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2451 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2452 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2453 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2455 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2456 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2457 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2458 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2459 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2460 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2462 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2463 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2464 ctxt->ops->set_idt(ctxt, &dt);
2466 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2467 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2468 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2469 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2470 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2471 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2473 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2474 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2475 ctxt->ops->set_gdt(ctxt, &dt);
2477 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2478 if (r != X86EMUL_CONTINUE)
2481 for (i = 0; i < 6; i++) {
2482 r = rsm_load_seg_64(ctxt, smbase, i);
2483 if (r != X86EMUL_CONTINUE)
2487 return X86EMUL_CONTINUE;
2490 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2492 unsigned long cr0, cr4, efer;
2496 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2497 return emulate_ud(ctxt);
2500 * Get back to real mode, to prepare a safe state in which to load
2501 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2502 * supports long mode.
2504 cr4 = ctxt->ops->get_cr(ctxt, 4);
2505 if (emulator_has_longmode(ctxt)) {
2506 struct desc_struct cs_desc;
2508 /* Zero CR4.PCIDE before CR0.PG. */
2509 if (cr4 & X86_CR4_PCIDE) {
2510 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2511 cr4 &= ~X86_CR4_PCIDE;
2514 /* A 32-bit code segment is required to clear EFER.LMA. */
2515 memset(&cs_desc, 0, sizeof(cs_desc));
2517 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2518 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2521 /* For the 64-bit case, this will clear EFER.LMA. */
2522 cr0 = ctxt->ops->get_cr(ctxt, 0);
2523 if (cr0 & X86_CR0_PE)
2524 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2526 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2527 if (cr4 & X86_CR4_PAE)
2528 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2530 /* And finally go back to 32-bit mode. */
2532 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2534 smbase = ctxt->ops->get_smbase(ctxt);
2535 if (emulator_has_longmode(ctxt))
2536 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2538 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2540 if (ret != X86EMUL_CONTINUE) {
2541 /* FIXME: should triple fault */
2542 return X86EMUL_UNHANDLEABLE;
2545 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2546 ctxt->ops->set_nmi_mask(ctxt, false);
2548 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2549 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2550 return X86EMUL_CONTINUE;
2554 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2555 struct desc_struct *cs, struct desc_struct *ss)
2557 cs->l = 0; /* will be adjusted later */
2558 set_desc_base(cs, 0); /* flat segment */
2559 cs->g = 1; /* 4kb granularity */
2560 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2561 cs->type = 0x0b; /* Read, Execute, Accessed */
2563 cs->dpl = 0; /* will be adjusted later */
2568 set_desc_base(ss, 0); /* flat segment */
2569 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2570 ss->g = 1; /* 4kb granularity */
2572 ss->type = 0x03; /* Read/Write, Accessed */
2573 ss->d = 1; /* 32bit stack segment */
2580 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2582 u32 eax, ebx, ecx, edx;
2585 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2586 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2587 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2588 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2591 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2593 const struct x86_emulate_ops *ops = ctxt->ops;
2594 u32 eax, ebx, ecx, edx;
2597 * syscall should always be enabled in longmode - so only become
2598 * vendor specific (cpuid) if other modes are active...
2600 if (ctxt->mode == X86EMUL_MODE_PROT64)
2605 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2607 * Intel ("GenuineIntel")
2608 * remark: Intel CPUs only support "syscall" in 64bit
2609 * longmode. Also an 64bit guest with a
2610 * 32bit compat-app running will #UD !! While this
2611 * behaviour can be fixed (by emulating) into AMD
2612 * response - CPUs of AMD can't behave like Intel.
2614 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2615 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2616 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2619 /* AMD ("AuthenticAMD") */
2620 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2621 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2622 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2625 /* AMD ("AMDisbetter!") */
2626 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2627 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2628 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2631 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2635 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2637 const struct x86_emulate_ops *ops = ctxt->ops;
2638 struct desc_struct cs, ss;
2643 /* syscall is not available in real mode */
2644 if (ctxt->mode == X86EMUL_MODE_REAL ||
2645 ctxt->mode == X86EMUL_MODE_VM86)
2646 return emulate_ud(ctxt);
2648 if (!(em_syscall_is_enabled(ctxt)))
2649 return emulate_ud(ctxt);
2651 ops->get_msr(ctxt, MSR_EFER, &efer);
2652 setup_syscalls_segments(ctxt, &cs, &ss);
2654 if (!(efer & EFER_SCE))
2655 return emulate_ud(ctxt);
2657 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2659 cs_sel = (u16)(msr_data & 0xfffc);
2660 ss_sel = (u16)(msr_data + 8);
2662 if (efer & EFER_LMA) {
2666 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2667 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2669 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2670 if (efer & EFER_LMA) {
2671 #ifdef CONFIG_X86_64
2672 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2675 ctxt->mode == X86EMUL_MODE_PROT64 ?
2676 MSR_LSTAR : MSR_CSTAR, &msr_data);
2677 ctxt->_eip = msr_data;
2679 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2680 ctxt->eflags &= ~msr_data;
2681 ctxt->eflags |= X86_EFLAGS_FIXED;
2685 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2686 ctxt->_eip = (u32)msr_data;
2688 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2691 return X86EMUL_CONTINUE;
2694 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2696 const struct x86_emulate_ops *ops = ctxt->ops;
2697 struct desc_struct cs, ss;
2702 ops->get_msr(ctxt, MSR_EFER, &efer);
2703 /* inject #GP if in real mode */
2704 if (ctxt->mode == X86EMUL_MODE_REAL)
2705 return emulate_gp(ctxt, 0);
2708 * Not recognized on AMD in compat mode (but is recognized in legacy
2711 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2712 && !vendor_intel(ctxt))
2713 return emulate_ud(ctxt);
2715 /* sysenter/sysexit have not been tested in 64bit mode. */
2716 if (ctxt->mode == X86EMUL_MODE_PROT64)
2717 return X86EMUL_UNHANDLEABLE;
2719 setup_syscalls_segments(ctxt, &cs, &ss);
2721 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2722 if ((msr_data & 0xfffc) == 0x0)
2723 return emulate_gp(ctxt, 0);
2725 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2726 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2727 ss_sel = cs_sel + 8;
2728 if (efer & EFER_LMA) {
2733 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2734 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2736 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2737 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2739 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2740 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2743 return X86EMUL_CONTINUE;
2746 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2748 const struct x86_emulate_ops *ops = ctxt->ops;
2749 struct desc_struct cs, ss;
2750 u64 msr_data, rcx, rdx;
2752 u16 cs_sel = 0, ss_sel = 0;
2754 /* inject #GP if in real mode or Virtual 8086 mode */
2755 if (ctxt->mode == X86EMUL_MODE_REAL ||
2756 ctxt->mode == X86EMUL_MODE_VM86)
2757 return emulate_gp(ctxt, 0);
2759 setup_syscalls_segments(ctxt, &cs, &ss);
2761 if ((ctxt->rex_prefix & 0x8) != 0x0)
2762 usermode = X86EMUL_MODE_PROT64;
2764 usermode = X86EMUL_MODE_PROT32;
2766 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2767 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2771 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2773 case X86EMUL_MODE_PROT32:
2774 cs_sel = (u16)(msr_data + 16);
2775 if ((msr_data & 0xfffc) == 0x0)
2776 return emulate_gp(ctxt, 0);
2777 ss_sel = (u16)(msr_data + 24);
2781 case X86EMUL_MODE_PROT64:
2782 cs_sel = (u16)(msr_data + 32);
2783 if (msr_data == 0x0)
2784 return emulate_gp(ctxt, 0);
2785 ss_sel = cs_sel + 8;
2788 if (is_noncanonical_address(rcx) ||
2789 is_noncanonical_address(rdx))
2790 return emulate_gp(ctxt, 0);
2793 cs_sel |= SEGMENT_RPL_MASK;
2794 ss_sel |= SEGMENT_RPL_MASK;
2796 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2797 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2800 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2802 return X86EMUL_CONTINUE;
2805 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2808 if (ctxt->mode == X86EMUL_MODE_REAL)
2810 if (ctxt->mode == X86EMUL_MODE_VM86)
2812 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2813 return ctxt->ops->cpl(ctxt) > iopl;
2816 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2819 const struct x86_emulate_ops *ops = ctxt->ops;
2820 struct desc_struct tr_seg;
2823 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2824 unsigned mask = (1 << len) - 1;
2827 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2830 if (desc_limit_scaled(&tr_seg) < 103)
2832 base = get_desc_base(&tr_seg);
2833 #ifdef CONFIG_X86_64
2834 base |= ((u64)base3) << 32;
2836 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2837 if (r != X86EMUL_CONTINUE)
2839 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2841 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2842 if (r != X86EMUL_CONTINUE)
2844 if ((perm >> bit_idx) & mask)
2849 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2855 if (emulator_bad_iopl(ctxt))
2856 if (!emulator_io_port_access_allowed(ctxt, port, len))
2859 ctxt->perm_ok = true;
2864 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2867 * Intel CPUs mask the counter and pointers in quite strange
2868 * manner when ECX is zero due to REP-string optimizations.
2870 #ifdef CONFIG_X86_64
2871 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2874 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2877 case 0xa4: /* movsb */
2878 case 0xa5: /* movsd/w */
2879 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2881 case 0xaa: /* stosb */
2882 case 0xab: /* stosd/w */
2883 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2888 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2889 struct tss_segment_16 *tss)
2891 tss->ip = ctxt->_eip;
2892 tss->flag = ctxt->eflags;
2893 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2894 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2895 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2896 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2897 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2898 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2899 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2900 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2902 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2903 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2904 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2905 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2906 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2909 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2910 struct tss_segment_16 *tss)
2915 ctxt->_eip = tss->ip;
2916 ctxt->eflags = tss->flag | 2;
2917 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2918 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2919 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2920 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2921 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2922 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2923 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2924 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2927 * SDM says that segment selectors are loaded before segment
2930 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2931 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2932 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2933 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2934 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2939 * Now load segment descriptors. If fault happens at this stage
2940 * it is handled in a context of new task
2942 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2943 X86_TRANSFER_TASK_SWITCH, NULL);
2944 if (ret != X86EMUL_CONTINUE)
2946 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2947 X86_TRANSFER_TASK_SWITCH, NULL);
2948 if (ret != X86EMUL_CONTINUE)
2950 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2951 X86_TRANSFER_TASK_SWITCH, NULL);
2952 if (ret != X86EMUL_CONTINUE)
2954 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2955 X86_TRANSFER_TASK_SWITCH, NULL);
2956 if (ret != X86EMUL_CONTINUE)
2958 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2959 X86_TRANSFER_TASK_SWITCH, NULL);
2960 if (ret != X86EMUL_CONTINUE)
2963 return X86EMUL_CONTINUE;
2966 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2967 u16 tss_selector, u16 old_tss_sel,
2968 ulong old_tss_base, struct desc_struct *new_desc)
2970 const struct x86_emulate_ops *ops = ctxt->ops;
2971 struct tss_segment_16 tss_seg;
2973 u32 new_tss_base = get_desc_base(new_desc);
2975 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2977 if (ret != X86EMUL_CONTINUE)
2980 save_state_to_tss16(ctxt, &tss_seg);
2982 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2984 if (ret != X86EMUL_CONTINUE)
2987 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2989 if (ret != X86EMUL_CONTINUE)
2992 if (old_tss_sel != 0xffff) {
2993 tss_seg.prev_task_link = old_tss_sel;
2995 ret = ops->write_std(ctxt, new_tss_base,
2996 &tss_seg.prev_task_link,
2997 sizeof tss_seg.prev_task_link,
2999 if (ret != X86EMUL_CONTINUE)
3003 return load_state_from_tss16(ctxt, &tss_seg);
3006 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3007 struct tss_segment_32 *tss)
3009 /* CR3 and ldt selector are not saved intentionally */
3010 tss->eip = ctxt->_eip;
3011 tss->eflags = ctxt->eflags;
3012 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3013 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3014 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3015 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3016 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3017 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3018 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3019 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3021 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3022 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3023 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3024 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3025 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3026 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3029 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3030 struct tss_segment_32 *tss)
3035 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3036 return emulate_gp(ctxt, 0);
3037 ctxt->_eip = tss->eip;
3038 ctxt->eflags = tss->eflags | 2;
3040 /* General purpose registers */
3041 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3042 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3043 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3044 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3045 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3046 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3047 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3048 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3051 * SDM says that segment selectors are loaded before segment
3052 * descriptors. This is important because CPL checks will
3055 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3056 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3057 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3058 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3059 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3060 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3061 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3064 * If we're switching between Protected Mode and VM86, we need to make
3065 * sure to update the mode before loading the segment descriptors so
3066 * that the selectors are interpreted correctly.
3068 if (ctxt->eflags & X86_EFLAGS_VM) {
3069 ctxt->mode = X86EMUL_MODE_VM86;
3072 ctxt->mode = X86EMUL_MODE_PROT32;
3077 * Now load segment descriptors. If fault happenes at this stage
3078 * it is handled in a context of new task
3080 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3081 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3082 if (ret != X86EMUL_CONTINUE)
3084 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3085 X86_TRANSFER_TASK_SWITCH, NULL);
3086 if (ret != X86EMUL_CONTINUE)
3088 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3089 X86_TRANSFER_TASK_SWITCH, NULL);
3090 if (ret != X86EMUL_CONTINUE)
3092 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3093 X86_TRANSFER_TASK_SWITCH, NULL);
3094 if (ret != X86EMUL_CONTINUE)
3096 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3097 X86_TRANSFER_TASK_SWITCH, NULL);
3098 if (ret != X86EMUL_CONTINUE)
3100 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3101 X86_TRANSFER_TASK_SWITCH, NULL);
3102 if (ret != X86EMUL_CONTINUE)
3104 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3105 X86_TRANSFER_TASK_SWITCH, NULL);
3110 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3111 u16 tss_selector, u16 old_tss_sel,
3112 ulong old_tss_base, struct desc_struct *new_desc)
3114 const struct x86_emulate_ops *ops = ctxt->ops;
3115 struct tss_segment_32 tss_seg;
3117 u32 new_tss_base = get_desc_base(new_desc);
3118 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3119 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3121 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3123 if (ret != X86EMUL_CONTINUE)
3126 save_state_to_tss32(ctxt, &tss_seg);
3128 /* Only GP registers and segment selectors are saved */
3129 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3130 ldt_sel_offset - eip_offset, &ctxt->exception);
3131 if (ret != X86EMUL_CONTINUE)
3134 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3136 if (ret != X86EMUL_CONTINUE)
3139 if (old_tss_sel != 0xffff) {
3140 tss_seg.prev_task_link = old_tss_sel;
3142 ret = ops->write_std(ctxt, new_tss_base,
3143 &tss_seg.prev_task_link,
3144 sizeof tss_seg.prev_task_link,
3146 if (ret != X86EMUL_CONTINUE)
3150 return load_state_from_tss32(ctxt, &tss_seg);
3153 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3154 u16 tss_selector, int idt_index, int reason,
3155 bool has_error_code, u32 error_code)
3157 const struct x86_emulate_ops *ops = ctxt->ops;
3158 struct desc_struct curr_tss_desc, next_tss_desc;
3160 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3161 ulong old_tss_base =
3162 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3164 ulong desc_addr, dr7;
3166 /* FIXME: old_tss_base == ~0 ? */
3168 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3169 if (ret != X86EMUL_CONTINUE)
3171 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3172 if (ret != X86EMUL_CONTINUE)
3175 /* FIXME: check that next_tss_desc is tss */
3178 * Check privileges. The three cases are task switch caused by...
3180 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3181 * 2. Exception/IRQ/iret: No check is performed
3182 * 3. jmp/call to TSS/task-gate: No check is performed since the
3183 * hardware checks it before exiting.
3185 if (reason == TASK_SWITCH_GATE) {
3186 if (idt_index != -1) {
3187 /* Software interrupts */
3188 struct desc_struct task_gate_desc;
3191 ret = read_interrupt_descriptor(ctxt, idt_index,
3193 if (ret != X86EMUL_CONTINUE)
3196 dpl = task_gate_desc.dpl;
3197 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3198 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3202 desc_limit = desc_limit_scaled(&next_tss_desc);
3203 if (!next_tss_desc.p ||
3204 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3205 desc_limit < 0x2b)) {
3206 return emulate_ts(ctxt, tss_selector & 0xfffc);
3209 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3210 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3211 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3214 if (reason == TASK_SWITCH_IRET)
3215 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3217 /* set back link to prev task only if NT bit is set in eflags
3218 note that old_tss_sel is not used after this point */
3219 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3220 old_tss_sel = 0xffff;
3222 if (next_tss_desc.type & 8)
3223 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3224 old_tss_base, &next_tss_desc);
3226 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3227 old_tss_base, &next_tss_desc);
3228 if (ret != X86EMUL_CONTINUE)
3231 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3232 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3234 if (reason != TASK_SWITCH_IRET) {
3235 next_tss_desc.type |= (1 << 1); /* set busy flag */
3236 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3239 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3240 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3242 if (has_error_code) {
3243 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3244 ctxt->lock_prefix = 0;
3245 ctxt->src.val = (unsigned long) error_code;
3246 ret = em_push(ctxt);
3249 ops->get_dr(ctxt, 7, &dr7);
3250 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3255 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3256 u16 tss_selector, int idt_index, int reason,
3257 bool has_error_code, u32 error_code)
3261 invalidate_registers(ctxt);
3262 ctxt->_eip = ctxt->eip;
3263 ctxt->dst.type = OP_NONE;
3265 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3266 has_error_code, error_code);
3268 if (rc == X86EMUL_CONTINUE) {
3269 ctxt->eip = ctxt->_eip;
3270 writeback_registers(ctxt);
3273 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3276 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3279 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3281 register_address_increment(ctxt, reg, df * op->bytes);
3282 op->addr.mem.ea = register_address(ctxt, reg);
3285 static int em_das(struct x86_emulate_ctxt *ctxt)
3288 bool af, cf, old_cf;
3290 cf = ctxt->eflags & X86_EFLAGS_CF;
3296 af = ctxt->eflags & X86_EFLAGS_AF;
3297 if ((al & 0x0f) > 9 || af) {
3299 cf = old_cf | (al >= 250);
3304 if (old_al > 0x99 || old_cf) {
3310 /* Set PF, ZF, SF */
3311 ctxt->src.type = OP_IMM;
3313 ctxt->src.bytes = 1;
3314 fastop(ctxt, em_or);
3315 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3317 ctxt->eflags |= X86_EFLAGS_CF;
3319 ctxt->eflags |= X86_EFLAGS_AF;
3320 return X86EMUL_CONTINUE;
3323 static int em_aam(struct x86_emulate_ctxt *ctxt)
3327 if (ctxt->src.val == 0)
3328 return emulate_de(ctxt);
3330 al = ctxt->dst.val & 0xff;
3331 ah = al / ctxt->src.val;
3332 al %= ctxt->src.val;
3334 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3336 /* Set PF, ZF, SF */
3337 ctxt->src.type = OP_IMM;
3339 ctxt->src.bytes = 1;
3340 fastop(ctxt, em_or);
3342 return X86EMUL_CONTINUE;
3345 static int em_aad(struct x86_emulate_ctxt *ctxt)
3347 u8 al = ctxt->dst.val & 0xff;
3348 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3350 al = (al + (ah * ctxt->src.val)) & 0xff;
3352 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3354 /* Set PF, ZF, SF */
3355 ctxt->src.type = OP_IMM;
3357 ctxt->src.bytes = 1;
3358 fastop(ctxt, em_or);
3360 return X86EMUL_CONTINUE;
3363 static int em_call(struct x86_emulate_ctxt *ctxt)
3366 long rel = ctxt->src.val;
3368 ctxt->src.val = (unsigned long)ctxt->_eip;
3369 rc = jmp_rel(ctxt, rel);
3370 if (rc != X86EMUL_CONTINUE)
3372 return em_push(ctxt);
3375 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3380 struct desc_struct old_desc, new_desc;
3381 const struct x86_emulate_ops *ops = ctxt->ops;
3382 int cpl = ctxt->ops->cpl(ctxt);
3383 enum x86emul_mode prev_mode = ctxt->mode;
3385 old_eip = ctxt->_eip;
3386 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3388 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3389 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3390 X86_TRANSFER_CALL_JMP, &new_desc);
3391 if (rc != X86EMUL_CONTINUE)
3394 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3395 if (rc != X86EMUL_CONTINUE)
3398 ctxt->src.val = old_cs;
3400 if (rc != X86EMUL_CONTINUE)
3403 ctxt->src.val = old_eip;
3405 /* If we failed, we tainted the memory, but the very least we should
3407 if (rc != X86EMUL_CONTINUE) {
3408 pr_warn_once("faulting far call emulation tainted memory\n");
3413 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3414 ctxt->mode = prev_mode;
3419 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3424 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3425 if (rc != X86EMUL_CONTINUE)
3427 rc = assign_eip_near(ctxt, eip);
3428 if (rc != X86EMUL_CONTINUE)
3430 rsp_increment(ctxt, ctxt->src.val);
3431 return X86EMUL_CONTINUE;
3434 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3436 /* Write back the register source. */
3437 ctxt->src.val = ctxt->dst.val;
3438 write_register_operand(&ctxt->src);
3440 /* Write back the memory destination with implicit LOCK prefix. */
3441 ctxt->dst.val = ctxt->src.orig_val;
3442 ctxt->lock_prefix = 1;
3443 return X86EMUL_CONTINUE;
3446 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3448 ctxt->dst.val = ctxt->src2.val;
3449 return fastop(ctxt, em_imul);
3452 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3454 ctxt->dst.type = OP_REG;
3455 ctxt->dst.bytes = ctxt->src.bytes;
3456 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3457 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3459 return X86EMUL_CONTINUE;
3462 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3466 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3467 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3468 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3469 return X86EMUL_CONTINUE;
3472 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3476 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3477 return emulate_gp(ctxt, 0);
3478 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3479 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3480 return X86EMUL_CONTINUE;
3483 static int em_mov(struct x86_emulate_ctxt *ctxt)
3485 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3486 return X86EMUL_CONTINUE;
3489 #define FFL(x) bit(X86_FEATURE_##x)
3491 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3493 u32 ebx, ecx, edx, eax = 1;
3497 * Check MOVBE is set in the guest-visible CPUID leaf.
3499 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3500 if (!(ecx & FFL(MOVBE)))
3501 return emulate_ud(ctxt);
3503 switch (ctxt->op_bytes) {
3506 * From MOVBE definition: "...When the operand size is 16 bits,
3507 * the upper word of the destination register remains unchanged
3510 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3511 * rules so we have to do the operation almost per hand.
3513 tmp = (u16)ctxt->src.val;
3514 ctxt->dst.val &= ~0xffffUL;
3515 ctxt->dst.val |= (unsigned long)swab16(tmp);
3518 ctxt->dst.val = swab32((u32)ctxt->src.val);
3521 ctxt->dst.val = swab64(ctxt->src.val);
3526 return X86EMUL_CONTINUE;
3529 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3531 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3532 return emulate_gp(ctxt, 0);
3534 /* Disable writeback. */
3535 ctxt->dst.type = OP_NONE;
3536 return X86EMUL_CONTINUE;
3539 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3543 if (ctxt->mode == X86EMUL_MODE_PROT64)
3544 val = ctxt->src.val & ~0ULL;
3546 val = ctxt->src.val & ~0U;
3548 /* #UD condition is already handled. */
3549 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3550 return emulate_gp(ctxt, 0);
3552 /* Disable writeback. */
3553 ctxt->dst.type = OP_NONE;
3554 return X86EMUL_CONTINUE;
3557 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3561 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3562 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3563 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3564 return emulate_gp(ctxt, 0);
3566 return X86EMUL_CONTINUE;
3569 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3573 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3574 return emulate_gp(ctxt, 0);
3576 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3577 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3578 return X86EMUL_CONTINUE;
3581 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3583 if (ctxt->modrm_reg > VCPU_SREG_GS)
3584 return emulate_ud(ctxt);
3586 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3587 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3588 ctxt->dst.bytes = 2;
3589 return X86EMUL_CONTINUE;
3592 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3594 u16 sel = ctxt->src.val;
3596 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3597 return emulate_ud(ctxt);
3599 if (ctxt->modrm_reg == VCPU_SREG_SS)
3600 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3602 /* Disable writeback. */
3603 ctxt->dst.type = OP_NONE;
3604 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3607 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3609 u16 sel = ctxt->src.val;
3611 /* Disable writeback. */
3612 ctxt->dst.type = OP_NONE;
3613 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3616 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3618 u16 sel = ctxt->src.val;
3620 /* Disable writeback. */
3621 ctxt->dst.type = OP_NONE;
3622 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3625 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3630 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3631 if (rc == X86EMUL_CONTINUE)
3632 ctxt->ops->invlpg(ctxt, linear);
3633 /* Disable writeback. */
3634 ctxt->dst.type = OP_NONE;
3635 return X86EMUL_CONTINUE;
3638 static int em_clts(struct x86_emulate_ctxt *ctxt)
3642 cr0 = ctxt->ops->get_cr(ctxt, 0);
3644 ctxt->ops->set_cr(ctxt, 0, cr0);
3645 return X86EMUL_CONTINUE;
3648 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3650 int rc = ctxt->ops->fix_hypercall(ctxt);
3652 if (rc != X86EMUL_CONTINUE)
3655 /* Let the processor re-execute the fixed hypercall */
3656 ctxt->_eip = ctxt->eip;
3657 /* Disable writeback. */
3658 ctxt->dst.type = OP_NONE;
3659 return X86EMUL_CONTINUE;
3662 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3663 void (*get)(struct x86_emulate_ctxt *ctxt,
3664 struct desc_ptr *ptr))
3666 struct desc_ptr desc_ptr;
3668 if (ctxt->mode == X86EMUL_MODE_PROT64)
3670 get(ctxt, &desc_ptr);
3671 if (ctxt->op_bytes == 2) {
3673 desc_ptr.address &= 0x00ffffff;
3675 /* Disable writeback. */
3676 ctxt->dst.type = OP_NONE;
3677 return segmented_write(ctxt, ctxt->dst.addr.mem,
3678 &desc_ptr, 2 + ctxt->op_bytes);
3681 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3683 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3686 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3688 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3691 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3693 struct desc_ptr desc_ptr;
3696 if (ctxt->mode == X86EMUL_MODE_PROT64)
3698 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3699 &desc_ptr.size, &desc_ptr.address,
3701 if (rc != X86EMUL_CONTINUE)
3703 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3704 is_noncanonical_address(desc_ptr.address))
3705 return emulate_gp(ctxt, 0);
3707 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3709 ctxt->ops->set_idt(ctxt, &desc_ptr);
3710 /* Disable writeback. */
3711 ctxt->dst.type = OP_NONE;
3712 return X86EMUL_CONTINUE;
3715 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3717 return em_lgdt_lidt(ctxt, true);
3720 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3722 return em_lgdt_lidt(ctxt, false);
3725 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3727 if (ctxt->dst.type == OP_MEM)
3728 ctxt->dst.bytes = 2;
3729 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3730 return X86EMUL_CONTINUE;
3733 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3735 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3736 | (ctxt->src.val & 0x0f));
3737 ctxt->dst.type = OP_NONE;
3738 return X86EMUL_CONTINUE;
3741 static int em_loop(struct x86_emulate_ctxt *ctxt)
3743 int rc = X86EMUL_CONTINUE;
3745 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3746 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3747 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3748 rc = jmp_rel(ctxt, ctxt->src.val);
3753 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3755 int rc = X86EMUL_CONTINUE;
3757 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3758 rc = jmp_rel(ctxt, ctxt->src.val);
3763 static int em_in(struct x86_emulate_ctxt *ctxt)
3765 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3767 return X86EMUL_IO_NEEDED;
3769 return X86EMUL_CONTINUE;
3772 static int em_out(struct x86_emulate_ctxt *ctxt)
3774 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3776 /* Disable writeback. */
3777 ctxt->dst.type = OP_NONE;
3778 return X86EMUL_CONTINUE;
3781 static int em_cli(struct x86_emulate_ctxt *ctxt)
3783 if (emulator_bad_iopl(ctxt))
3784 return emulate_gp(ctxt, 0);
3786 ctxt->eflags &= ~X86_EFLAGS_IF;
3787 return X86EMUL_CONTINUE;
3790 static int em_sti(struct x86_emulate_ctxt *ctxt)
3792 if (emulator_bad_iopl(ctxt))
3793 return emulate_gp(ctxt, 0);
3795 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3796 ctxt->eflags |= X86_EFLAGS_IF;
3797 return X86EMUL_CONTINUE;
3800 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3802 u32 eax, ebx, ecx, edx;
3804 eax = reg_read(ctxt, VCPU_REGS_RAX);
3805 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3806 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3807 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3808 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3809 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3810 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3811 return X86EMUL_CONTINUE;
3814 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3818 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3820 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3822 ctxt->eflags &= ~0xffUL;
3823 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3824 return X86EMUL_CONTINUE;
3827 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3829 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3830 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3831 return X86EMUL_CONTINUE;
3834 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3836 switch (ctxt->op_bytes) {
3837 #ifdef CONFIG_X86_64
3839 asm("bswap %0" : "+r"(ctxt->dst.val));
3843 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3846 return X86EMUL_CONTINUE;
3849 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3851 /* emulating clflush regardless of cpuid */
3852 return X86EMUL_CONTINUE;
3855 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3857 ctxt->dst.val = (s32) ctxt->src.val;
3858 return X86EMUL_CONTINUE;
3861 static bool valid_cr(int nr)
3873 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
3875 if (!valid_cr(ctxt->modrm_reg))
3876 return emulate_ud(ctxt);
3878 return X86EMUL_CONTINUE;
3881 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3883 u64 new_val = ctxt->src.val64;
3884 int cr = ctxt->modrm_reg;
3887 static u64 cr_reserved_bits[] = {
3888 0xffffffff00000000ULL,
3889 0, 0, 0, /* CR3 checked later */
3896 return emulate_ud(ctxt);
3898 if (new_val & cr_reserved_bits[cr])
3899 return emulate_gp(ctxt, 0);
3904 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
3905 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
3906 return emulate_gp(ctxt, 0);
3908 cr4 = ctxt->ops->get_cr(ctxt, 4);
3909 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3911 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
3912 !(cr4 & X86_CR4_PAE))
3913 return emulate_gp(ctxt, 0);
3920 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3921 if (efer & EFER_LMA)
3922 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
3925 return emulate_gp(ctxt, 0);
3930 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3932 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
3933 return emulate_gp(ctxt, 0);
3939 return X86EMUL_CONTINUE;
3942 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3946 ctxt->ops->get_dr(ctxt, 7, &dr7);
3948 /* Check if DR7.Global_Enable is set */
3949 return dr7 & (1 << 13);
3952 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3954 int dr = ctxt->modrm_reg;
3958 return emulate_ud(ctxt);
3960 cr4 = ctxt->ops->get_cr(ctxt, 4);
3961 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3962 return emulate_ud(ctxt);
3964 if (check_dr7_gd(ctxt)) {
3967 ctxt->ops->get_dr(ctxt, 6, &dr6);
3969 dr6 |= DR6_BD | DR6_RTM;
3970 ctxt->ops->set_dr(ctxt, 6, dr6);
3971 return emulate_db(ctxt);
3974 return X86EMUL_CONTINUE;
3977 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3979 u64 new_val = ctxt->src.val64;
3980 int dr = ctxt->modrm_reg;
3982 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3983 return emulate_gp(ctxt, 0);
3985 return check_dr_read(ctxt);
3988 static int check_svme(struct x86_emulate_ctxt *ctxt)
3992 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3994 if (!(efer & EFER_SVME))
3995 return emulate_ud(ctxt);
3997 return X86EMUL_CONTINUE;
4000 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4002 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4004 /* Valid physical address? */
4005 if (rax & 0xffff000000000000ULL)
4006 return emulate_gp(ctxt, 0);
4008 return check_svme(ctxt);
4011 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4013 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4015 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4016 return emulate_ud(ctxt);
4018 return X86EMUL_CONTINUE;
4021 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4023 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4024 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4026 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4027 ctxt->ops->check_pmc(ctxt, rcx))
4028 return emulate_gp(ctxt, 0);
4030 return X86EMUL_CONTINUE;
4033 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4035 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4036 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4037 return emulate_gp(ctxt, 0);
4039 return X86EMUL_CONTINUE;
4042 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4044 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4045 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4046 return emulate_gp(ctxt, 0);
4048 return X86EMUL_CONTINUE;
4051 #define D(_y) { .flags = (_y) }
4052 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4053 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4054 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4055 #define N D(NotImpl)
4056 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4057 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4058 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4059 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4060 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4061 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4062 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4063 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4064 #define II(_f, _e, _i) \
4065 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4066 #define IIP(_f, _e, _i, _p) \
4067 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4068 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4069 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4071 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4072 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4073 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4074 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4075 #define I2bvIP(_f, _e, _i, _p) \
4076 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4078 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4079 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4080 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4082 static const struct opcode group7_rm0[] = {
4084 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4088 static const struct opcode group7_rm1[] = {
4089 DI(SrcNone | Priv, monitor),
4090 DI(SrcNone | Priv, mwait),
4094 static const struct opcode group7_rm3[] = {
4095 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4096 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4097 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4098 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4099 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4100 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4101 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4102 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4105 static const struct opcode group7_rm7[] = {
4107 DIP(SrcNone, rdtscp, check_rdtsc),
4111 static const struct opcode group1[] = {
4113 F(Lock | PageTable, em_or),
4116 F(Lock | PageTable, em_and),
4122 static const struct opcode group1A[] = {
4123 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4126 static const struct opcode group2[] = {
4127 F(DstMem | ModRM, em_rol),
4128 F(DstMem | ModRM, em_ror),
4129 F(DstMem | ModRM, em_rcl),
4130 F(DstMem | ModRM, em_rcr),
4131 F(DstMem | ModRM, em_shl),
4132 F(DstMem | ModRM, em_shr),
4133 F(DstMem | ModRM, em_shl),
4134 F(DstMem | ModRM, em_sar),
4137 static const struct opcode group3[] = {
4138 F(DstMem | SrcImm | NoWrite, em_test),
4139 F(DstMem | SrcImm | NoWrite, em_test),
4140 F(DstMem | SrcNone | Lock, em_not),
4141 F(DstMem | SrcNone | Lock, em_neg),
4142 F(DstXacc | Src2Mem, em_mul_ex),
4143 F(DstXacc | Src2Mem, em_imul_ex),
4144 F(DstXacc | Src2Mem, em_div_ex),
4145 F(DstXacc | Src2Mem, em_idiv_ex),
4148 static const struct opcode group4[] = {
4149 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4150 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4154 static const struct opcode group5[] = {
4155 F(DstMem | SrcNone | Lock, em_inc),
4156 F(DstMem | SrcNone | Lock, em_dec),
4157 I(SrcMem | NearBranch, em_call_near_abs),
4158 I(SrcMemFAddr | ImplicitOps, em_call_far),
4159 I(SrcMem | NearBranch, em_jmp_abs),
4160 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4161 I(SrcMem | Stack, em_push), D(Undefined),
4164 static const struct opcode group6[] = {
4165 DI(Prot | DstMem, sldt),
4166 DI(Prot | DstMem, str),
4167 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4168 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4172 static const struct group_dual group7 = { {
4173 II(Mov | DstMem, em_sgdt, sgdt),
4174 II(Mov | DstMem, em_sidt, sidt),
4175 II(SrcMem | Priv, em_lgdt, lgdt),
4176 II(SrcMem | Priv, em_lidt, lidt),
4177 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4178 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4179 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4183 N, EXT(0, group7_rm3),
4184 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4185 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4189 static const struct opcode group8[] = {
4191 F(DstMem | SrcImmByte | NoWrite, em_bt),
4192 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4193 F(DstMem | SrcImmByte | Lock, em_btr),
4194 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4197 static const struct group_dual group9 = { {
4198 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4200 N, N, N, N, N, N, N, N,
4203 static const struct opcode group11[] = {
4204 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4208 static const struct gprefix pfx_0f_ae_7 = {
4209 I(SrcMem | ByteOp, em_clflush), N, N, N,
4212 static const struct group_dual group15 = { {
4213 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4215 N, N, N, N, N, N, N, N,
4218 static const struct gprefix pfx_0f_6f_0f_7f = {
4219 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4222 static const struct instr_dual instr_dual_0f_2b = {
4226 static const struct gprefix pfx_0f_2b = {
4227 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4230 static const struct gprefix pfx_0f_28_0f_29 = {
4231 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4234 static const struct gprefix pfx_0f_e7 = {
4235 N, I(Sse, em_mov), N, N,
4238 static const struct escape escape_d9 = { {
4239 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4242 N, N, N, N, N, N, N, N,
4244 N, N, N, N, N, N, N, N,
4246 N, N, N, N, N, N, N, N,
4248 N, N, N, N, N, N, N, N,
4250 N, N, N, N, N, N, N, N,
4252 N, N, N, N, N, N, N, N,
4254 N, N, N, N, N, N, N, N,
4256 N, N, N, N, N, N, N, N,
4259 static const struct escape escape_db = { {
4260 N, N, N, N, N, N, N, N,
4263 N, N, N, N, N, N, N, N,
4265 N, N, N, N, N, N, N, N,
4267 N, N, N, N, N, N, N, N,
4269 N, N, N, N, N, N, N, N,
4271 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4273 N, N, N, N, N, N, N, N,
4275 N, N, N, N, N, N, N, N,
4277 N, N, N, N, N, N, N, N,
4280 static const struct escape escape_dd = { {
4281 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4284 N, N, N, N, N, N, N, N,
4286 N, N, N, N, N, N, N, N,
4288 N, N, N, N, N, N, N, N,
4290 N, N, N, N, N, N, N, N,
4292 N, N, N, N, N, N, N, N,
4294 N, N, N, N, N, N, N, N,
4296 N, N, N, N, N, N, N, N,
4298 N, N, N, N, N, N, N, N,
4301 static const struct instr_dual instr_dual_0f_c3 = {
4302 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4305 static const struct mode_dual mode_dual_63 = {
4306 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4309 static const struct opcode opcode_table[256] = {
4311 F6ALU(Lock, em_add),
4312 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4313 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4315 F6ALU(Lock | PageTable, em_or),
4316 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4319 F6ALU(Lock, em_adc),
4320 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4321 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4323 F6ALU(Lock, em_sbb),
4324 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4325 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4327 F6ALU(Lock | PageTable, em_and), N, N,
4329 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4331 F6ALU(Lock, em_xor), N, N,
4333 F6ALU(NoWrite, em_cmp), N, N,
4335 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4337 X8(I(SrcReg | Stack, em_push)),
4339 X8(I(DstReg | Stack, em_pop)),
4341 I(ImplicitOps | Stack | No64, em_pusha),
4342 I(ImplicitOps | Stack | No64, em_popa),
4343 N, MD(ModRM, &mode_dual_63),
4346 I(SrcImm | Mov | Stack, em_push),
4347 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4348 I(SrcImmByte | Mov | Stack, em_push),
4349 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4350 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4351 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4353 X16(D(SrcImmByte | NearBranch)),
4355 G(ByteOp | DstMem | SrcImm, group1),
4356 G(DstMem | SrcImm, group1),
4357 G(ByteOp | DstMem | SrcImm | No64, group1),
4358 G(DstMem | SrcImmByte, group1),
4359 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4360 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4362 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4363 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4364 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4365 D(ModRM | SrcMem | NoAccess | DstReg),
4366 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4369 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4371 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4372 I(SrcImmFAddr | No64, em_call_far), N,
4373 II(ImplicitOps | Stack, em_pushf, pushf),
4374 II(ImplicitOps | Stack, em_popf, popf),
4375 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4377 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4378 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4379 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4380 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4382 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4383 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4384 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4385 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4387 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4389 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4391 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4392 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4393 I(ImplicitOps | NearBranch, em_ret),
4394 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4395 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4396 G(ByteOp, group11), G(0, group11),
4398 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4399 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4400 I(ImplicitOps, em_ret_far),
4401 D(ImplicitOps), DI(SrcImmByte, intn),
4402 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4404 G(Src2One | ByteOp, group2), G(Src2One, group2),
4405 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4406 I(DstAcc | SrcImmUByte | No64, em_aam),
4407 I(DstAcc | SrcImmUByte | No64, em_aad),
4408 F(DstAcc | ByteOp | No64, em_salc),
4409 I(DstAcc | SrcXLat | ByteOp, em_mov),
4411 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4413 X3(I(SrcImmByte | NearBranch, em_loop)),
4414 I(SrcImmByte | NearBranch, em_jcxz),
4415 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4416 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4418 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4419 I(SrcImmFAddr | No64, em_jmp_far),
4420 D(SrcImmByte | ImplicitOps | NearBranch),
4421 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4422 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4424 N, DI(ImplicitOps, icebp), N, N,
4425 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4426 G(ByteOp, group3), G(0, group3),
4428 D(ImplicitOps), D(ImplicitOps),
4429 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4430 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4433 static const struct opcode twobyte_table[256] = {
4435 G(0, group6), GD(0, &group7), N, N,
4436 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4437 II(ImplicitOps | Priv, em_clts, clts), N,
4438 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4439 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4441 N, N, N, N, N, N, N, N,
4442 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4443 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4445 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4446 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4447 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4449 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4452 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4453 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4454 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4457 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4458 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4459 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4460 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4461 I(ImplicitOps | EmulateOnUD, em_sysenter),
4462 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4464 N, N, N, N, N, N, N, N,
4466 X16(D(DstReg | SrcMem | ModRM)),
4468 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4473 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4478 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4480 X16(D(SrcImm | NearBranch)),
4482 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4484 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4485 II(ImplicitOps, em_cpuid, cpuid),
4486 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4487 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4488 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4490 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4491 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4492 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4493 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4494 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4495 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4497 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4498 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4499 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4500 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4501 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4502 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4506 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4507 I(DstReg | SrcMem | ModRM, em_bsf_c),
4508 I(DstReg | SrcMem | ModRM, em_bsr_c),
4509 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4511 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4512 N, ID(0, &instr_dual_0f_c3),
4513 N, N, N, GD(0, &group9),
4515 X8(I(DstReg, em_bswap)),
4517 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4519 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4520 N, N, N, N, N, N, N, N,
4522 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4525 static const struct instr_dual instr_dual_0f_38_f0 = {
4526 I(DstReg | SrcMem | Mov, em_movbe), N
4529 static const struct instr_dual instr_dual_0f_38_f1 = {
4530 I(DstMem | SrcReg | Mov, em_movbe), N
4533 static const struct gprefix three_byte_0f_38_f0 = {
4534 ID(0, &instr_dual_0f_38_f0), N, N, N
4537 static const struct gprefix three_byte_0f_38_f1 = {
4538 ID(0, &instr_dual_0f_38_f1), N, N, N
4542 * Insns below are selected by the prefix which indexed by the third opcode
4545 static const struct opcode opcode_map_0f_38[256] = {
4547 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4549 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4551 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4552 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4573 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4577 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4583 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4584 unsigned size, bool sign_extension)
4586 int rc = X86EMUL_CONTINUE;
4590 op->addr.mem.ea = ctxt->_eip;
4591 /* NB. Immediates are sign-extended as necessary. */
4592 switch (op->bytes) {
4594 op->val = insn_fetch(s8, ctxt);
4597 op->val = insn_fetch(s16, ctxt);
4600 op->val = insn_fetch(s32, ctxt);
4603 op->val = insn_fetch(s64, ctxt);
4606 if (!sign_extension) {
4607 switch (op->bytes) {
4615 op->val &= 0xffffffff;
4623 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4626 int rc = X86EMUL_CONTINUE;
4630 decode_register_operand(ctxt, op);
4633 rc = decode_imm(ctxt, op, 1, false);
4636 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4640 if (ctxt->d & BitOp)
4641 fetch_bit_operand(ctxt);
4642 op->orig_val = op->val;
4645 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4649 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4650 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4651 fetch_register_operand(op);
4652 op->orig_val = op->val;
4656 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4657 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4658 fetch_register_operand(op);
4659 op->orig_val = op->val;
4662 if (ctxt->d & ByteOp) {
4667 op->bytes = ctxt->op_bytes;
4668 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4669 fetch_register_operand(op);
4670 op->orig_val = op->val;
4674 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4676 register_address(ctxt, VCPU_REGS_RDI);
4677 op->addr.mem.seg = VCPU_SREG_ES;
4684 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4685 fetch_register_operand(op);
4690 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4693 rc = decode_imm(ctxt, op, 1, true);
4701 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4704 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4707 ctxt->memop.bytes = 1;
4708 if (ctxt->memop.type == OP_REG) {
4709 ctxt->memop.addr.reg = decode_register(ctxt,
4710 ctxt->modrm_rm, true);
4711 fetch_register_operand(&ctxt->memop);
4715 ctxt->memop.bytes = 2;
4718 ctxt->memop.bytes = 4;
4721 rc = decode_imm(ctxt, op, 2, false);
4724 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4728 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4730 register_address(ctxt, VCPU_REGS_RSI);
4731 op->addr.mem.seg = ctxt->seg_override;
4737 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4740 reg_read(ctxt, VCPU_REGS_RBX) +
4741 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4742 op->addr.mem.seg = ctxt->seg_override;
4747 op->addr.mem.ea = ctxt->_eip;
4748 op->bytes = ctxt->op_bytes + 2;
4749 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4752 ctxt->memop.bytes = ctxt->op_bytes + 2;
4756 op->val = VCPU_SREG_ES;
4760 op->val = VCPU_SREG_CS;
4764 op->val = VCPU_SREG_SS;
4768 op->val = VCPU_SREG_DS;
4772 op->val = VCPU_SREG_FS;
4776 op->val = VCPU_SREG_GS;
4779 /* Special instructions do their own operand decoding. */
4781 op->type = OP_NONE; /* Disable writeback. */
4789 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4791 int rc = X86EMUL_CONTINUE;
4792 int mode = ctxt->mode;
4793 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4794 bool op_prefix = false;
4795 bool has_seg_override = false;
4796 struct opcode opcode;
4798 ctxt->memop.type = OP_NONE;
4799 ctxt->memopp = NULL;
4800 ctxt->_eip = ctxt->eip;
4801 ctxt->fetch.ptr = ctxt->fetch.data;
4802 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4803 ctxt->opcode_len = 1;
4805 memcpy(ctxt->fetch.data, insn, insn_len);
4807 rc = __do_insn_fetch_bytes(ctxt, 1);
4808 if (rc != X86EMUL_CONTINUE)
4813 case X86EMUL_MODE_REAL:
4814 case X86EMUL_MODE_VM86:
4815 case X86EMUL_MODE_PROT16:
4816 def_op_bytes = def_ad_bytes = 2;
4818 case X86EMUL_MODE_PROT32:
4819 def_op_bytes = def_ad_bytes = 4;
4821 #ifdef CONFIG_X86_64
4822 case X86EMUL_MODE_PROT64:
4828 return EMULATION_FAILED;
4831 ctxt->op_bytes = def_op_bytes;
4832 ctxt->ad_bytes = def_ad_bytes;
4834 /* Legacy prefixes. */
4836 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4837 case 0x66: /* operand-size override */
4839 /* switch between 2/4 bytes */
4840 ctxt->op_bytes = def_op_bytes ^ 6;
4842 case 0x67: /* address-size override */
4843 if (mode == X86EMUL_MODE_PROT64)
4844 /* switch between 4/8 bytes */
4845 ctxt->ad_bytes = def_ad_bytes ^ 12;
4847 /* switch between 2/4 bytes */
4848 ctxt->ad_bytes = def_ad_bytes ^ 6;
4850 case 0x26: /* ES override */
4851 case 0x2e: /* CS override */
4852 case 0x36: /* SS override */
4853 case 0x3e: /* DS override */
4854 has_seg_override = true;
4855 ctxt->seg_override = (ctxt->b >> 3) & 3;
4857 case 0x64: /* FS override */
4858 case 0x65: /* GS override */
4859 has_seg_override = true;
4860 ctxt->seg_override = ctxt->b & 7;
4862 case 0x40 ... 0x4f: /* REX */
4863 if (mode != X86EMUL_MODE_PROT64)
4865 ctxt->rex_prefix = ctxt->b;
4867 case 0xf0: /* LOCK */
4868 ctxt->lock_prefix = 1;
4870 case 0xf2: /* REPNE/REPNZ */
4871 case 0xf3: /* REP/REPE/REPZ */
4872 ctxt->rep_prefix = ctxt->b;
4878 /* Any legacy prefix after a REX prefix nullifies its effect. */
4880 ctxt->rex_prefix = 0;
4886 if (ctxt->rex_prefix & 8)
4887 ctxt->op_bytes = 8; /* REX.W */
4889 /* Opcode byte(s). */
4890 opcode = opcode_table[ctxt->b];
4891 /* Two-byte opcode? */
4892 if (ctxt->b == 0x0f) {
4893 ctxt->opcode_len = 2;
4894 ctxt->b = insn_fetch(u8, ctxt);
4895 opcode = twobyte_table[ctxt->b];
4897 /* 0F_38 opcode map */
4898 if (ctxt->b == 0x38) {
4899 ctxt->opcode_len = 3;
4900 ctxt->b = insn_fetch(u8, ctxt);
4901 opcode = opcode_map_0f_38[ctxt->b];
4904 ctxt->d = opcode.flags;
4906 if (ctxt->d & ModRM)
4907 ctxt->modrm = insn_fetch(u8, ctxt);
4909 /* vex-prefix instructions are not implemented */
4910 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4911 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4915 while (ctxt->d & GroupMask) {
4916 switch (ctxt->d & GroupMask) {
4918 goffset = (ctxt->modrm >> 3) & 7;
4919 opcode = opcode.u.group[goffset];
4922 goffset = (ctxt->modrm >> 3) & 7;
4923 if ((ctxt->modrm >> 6) == 3)
4924 opcode = opcode.u.gdual->mod3[goffset];
4926 opcode = opcode.u.gdual->mod012[goffset];
4929 goffset = ctxt->modrm & 7;
4930 opcode = opcode.u.group[goffset];
4933 if (ctxt->rep_prefix && op_prefix)
4934 return EMULATION_FAILED;
4935 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4936 switch (simd_prefix) {
4937 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4938 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4939 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4940 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4944 if (ctxt->modrm > 0xbf)
4945 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4947 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4950 if ((ctxt->modrm >> 6) == 3)
4951 opcode = opcode.u.idual->mod3;
4953 opcode = opcode.u.idual->mod012;
4956 if (ctxt->mode == X86EMUL_MODE_PROT64)
4957 opcode = opcode.u.mdual->mode64;
4959 opcode = opcode.u.mdual->mode32;
4962 return EMULATION_FAILED;
4965 ctxt->d &= ~(u64)GroupMask;
4966 ctxt->d |= opcode.flags;
4971 return EMULATION_FAILED;
4973 ctxt->execute = opcode.u.execute;
4975 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
4976 return EMULATION_FAILED;
4978 if (unlikely(ctxt->d &
4979 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4982 * These are copied unconditionally here, and checked unconditionally
4983 * in x86_emulate_insn.
4985 ctxt->check_perm = opcode.check_perm;
4986 ctxt->intercept = opcode.intercept;
4988 if (ctxt->d & NotImpl)
4989 return EMULATION_FAILED;
4991 if (mode == X86EMUL_MODE_PROT64) {
4992 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4994 else if (ctxt->d & NearBranch)
4998 if (ctxt->d & Op3264) {
4999 if (mode == X86EMUL_MODE_PROT64)
5005 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5009 ctxt->op_bytes = 16;
5010 else if (ctxt->d & Mmx)
5014 /* ModRM and SIB bytes. */
5015 if (ctxt->d & ModRM) {
5016 rc = decode_modrm(ctxt, &ctxt->memop);
5017 if (!has_seg_override) {
5018 has_seg_override = true;
5019 ctxt->seg_override = ctxt->modrm_seg;
5021 } else if (ctxt->d & MemAbs)
5022 rc = decode_abs(ctxt, &ctxt->memop);
5023 if (rc != X86EMUL_CONTINUE)
5026 if (!has_seg_override)
5027 ctxt->seg_override = VCPU_SREG_DS;
5029 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5032 * Decode and fetch the source operand: register, memory
5035 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5036 if (rc != X86EMUL_CONTINUE)
5040 * Decode and fetch the second source operand: register, memory
5043 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5044 if (rc != X86EMUL_CONTINUE)
5047 /* Decode and fetch the destination operand: register or memory. */
5048 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5050 if (ctxt->rip_relative && likely(ctxt->memopp))
5051 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5052 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5055 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5058 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5060 return ctxt->d & PageTable;
5063 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5065 /* The second termination condition only applies for REPE
5066 * and REPNE. Test if the repeat string operation prefix is
5067 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5068 * corresponding termination condition according to:
5069 * - if REPE/REPZ and ZF = 0 then done
5070 * - if REPNE/REPNZ and ZF = 1 then done
5072 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5073 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5074 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5075 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5076 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5077 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5083 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5087 ctxt->ops->get_fpu(ctxt);
5088 asm volatile("1: fwait \n\t"
5090 ".pushsection .fixup,\"ax\" \n\t"
5092 "movb $1, %[fault] \n\t"
5095 _ASM_EXTABLE(1b, 3b)
5096 : [fault]"+qm"(fault));
5097 ctxt->ops->put_fpu(ctxt);
5099 if (unlikely(fault))
5100 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5102 return X86EMUL_CONTINUE;
5105 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5108 if (op->type == OP_MM)
5109 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5112 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5114 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5115 if (!(ctxt->d & ByteOp))
5116 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5117 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5118 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5120 : "c"(ctxt->src2.val));
5121 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5122 if (!fop) /* exception is returned in fop variable */
5123 return emulate_de(ctxt);
5124 return X86EMUL_CONTINUE;
5127 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5129 memset(&ctxt->rip_relative, 0,
5130 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5132 ctxt->io_read.pos = 0;
5133 ctxt->io_read.end = 0;
5134 ctxt->mem_read.end = 0;
5137 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5139 const struct x86_emulate_ops *ops = ctxt->ops;
5140 int rc = X86EMUL_CONTINUE;
5141 int saved_dst_type = ctxt->dst.type;
5143 ctxt->mem_read.pos = 0;
5145 /* LOCK prefix is allowed only with some instructions */
5146 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5147 rc = emulate_ud(ctxt);
5151 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5152 rc = emulate_ud(ctxt);
5156 if (unlikely(ctxt->d &
5157 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5158 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5159 (ctxt->d & Undefined)) {
5160 rc = emulate_ud(ctxt);
5164 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5165 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5166 rc = emulate_ud(ctxt);
5170 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5171 rc = emulate_nm(ctxt);
5175 if (ctxt->d & Mmx) {
5176 rc = flush_pending_x87_faults(ctxt);
5177 if (rc != X86EMUL_CONTINUE)
5180 * Now that we know the fpu is exception safe, we can fetch
5183 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5184 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5185 if (!(ctxt->d & Mov))
5186 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5189 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5190 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5191 X86_ICPT_PRE_EXCEPT);
5192 if (rc != X86EMUL_CONTINUE)
5196 /* Instruction can only be executed in protected mode */
5197 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5198 rc = emulate_ud(ctxt);
5202 /* Privileged instruction can be executed only in CPL=0 */
5203 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5204 if (ctxt->d & PrivUD)
5205 rc = emulate_ud(ctxt);
5207 rc = emulate_gp(ctxt, 0);
5211 /* Do instruction specific permission checks */
5212 if (ctxt->d & CheckPerm) {
5213 rc = ctxt->check_perm(ctxt);
5214 if (rc != X86EMUL_CONTINUE)
5218 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5219 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5220 X86_ICPT_POST_EXCEPT);
5221 if (rc != X86EMUL_CONTINUE)
5225 if (ctxt->rep_prefix && (ctxt->d & String)) {
5226 /* All REP prefixes have the same first termination condition */
5227 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5228 string_registers_quirk(ctxt);
5229 ctxt->eip = ctxt->_eip;
5230 ctxt->eflags &= ~X86_EFLAGS_RF;
5236 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5237 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5238 ctxt->src.valptr, ctxt->src.bytes);
5239 if (rc != X86EMUL_CONTINUE)
5241 ctxt->src.orig_val64 = ctxt->src.val64;
5244 if (ctxt->src2.type == OP_MEM) {
5245 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5246 &ctxt->src2.val, ctxt->src2.bytes);
5247 if (rc != X86EMUL_CONTINUE)
5251 if ((ctxt->d & DstMask) == ImplicitOps)
5255 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5256 /* optimisation - avoid slow emulated read if Mov */
5257 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5258 &ctxt->dst.val, ctxt->dst.bytes);
5259 if (rc != X86EMUL_CONTINUE) {
5260 if (!(ctxt->d & NoWrite) &&
5261 rc == X86EMUL_PROPAGATE_FAULT &&
5262 ctxt->exception.vector == PF_VECTOR)
5263 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5267 /* Copy full 64-bit value for CMPXCHG8B. */
5268 ctxt->dst.orig_val64 = ctxt->dst.val64;
5272 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5273 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5274 X86_ICPT_POST_MEMACCESS);
5275 if (rc != X86EMUL_CONTINUE)
5279 if (ctxt->rep_prefix && (ctxt->d & String))
5280 ctxt->eflags |= X86_EFLAGS_RF;
5282 ctxt->eflags &= ~X86_EFLAGS_RF;
5284 if (ctxt->execute) {
5285 if (ctxt->d & Fastop) {
5286 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5287 rc = fastop(ctxt, fop);
5288 if (rc != X86EMUL_CONTINUE)
5292 rc = ctxt->execute(ctxt);
5293 if (rc != X86EMUL_CONTINUE)
5298 if (ctxt->opcode_len == 2)
5300 else if (ctxt->opcode_len == 3)
5301 goto threebyte_insn;
5304 case 0x70 ... 0x7f: /* jcc (short) */
5305 if (test_cc(ctxt->b, ctxt->eflags))
5306 rc = jmp_rel(ctxt, ctxt->src.val);
5308 case 0x8d: /* lea r16/r32, m */
5309 ctxt->dst.val = ctxt->src.addr.mem.ea;
5311 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5312 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5313 ctxt->dst.type = OP_NONE;
5317 case 0x98: /* cbw/cwde/cdqe */
5318 switch (ctxt->op_bytes) {
5319 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5320 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5321 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5324 case 0xcc: /* int3 */
5325 rc = emulate_int(ctxt, 3);
5327 case 0xcd: /* int n */
5328 rc = emulate_int(ctxt, ctxt->src.val);
5330 case 0xce: /* into */
5331 if (ctxt->eflags & X86_EFLAGS_OF)
5332 rc = emulate_int(ctxt, 4);
5334 case 0xe9: /* jmp rel */
5335 case 0xeb: /* jmp rel short */
5336 rc = jmp_rel(ctxt, ctxt->src.val);
5337 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5339 case 0xf4: /* hlt */
5340 ctxt->ops->halt(ctxt);
5342 case 0xf5: /* cmc */
5343 /* complement carry flag from eflags reg */
5344 ctxt->eflags ^= X86_EFLAGS_CF;
5346 case 0xf8: /* clc */
5347 ctxt->eflags &= ~X86_EFLAGS_CF;
5349 case 0xf9: /* stc */
5350 ctxt->eflags |= X86_EFLAGS_CF;
5352 case 0xfc: /* cld */
5353 ctxt->eflags &= ~X86_EFLAGS_DF;
5355 case 0xfd: /* std */
5356 ctxt->eflags |= X86_EFLAGS_DF;
5359 goto cannot_emulate;
5362 if (rc != X86EMUL_CONTINUE)
5366 if (ctxt->d & SrcWrite) {
5367 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5368 rc = writeback(ctxt, &ctxt->src);
5369 if (rc != X86EMUL_CONTINUE)
5372 if (!(ctxt->d & NoWrite)) {
5373 rc = writeback(ctxt, &ctxt->dst);
5374 if (rc != X86EMUL_CONTINUE)
5379 * restore dst type in case the decoding will be reused
5380 * (happens for string instruction )
5382 ctxt->dst.type = saved_dst_type;
5384 if ((ctxt->d & SrcMask) == SrcSI)
5385 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5387 if ((ctxt->d & DstMask) == DstDI)
5388 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5390 if (ctxt->rep_prefix && (ctxt->d & String)) {
5392 struct read_cache *r = &ctxt->io_read;
5393 if ((ctxt->d & SrcMask) == SrcSI)
5394 count = ctxt->src.count;
5396 count = ctxt->dst.count;
5397 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5399 if (!string_insn_completed(ctxt)) {
5401 * Re-enter guest when pio read ahead buffer is empty
5402 * or, if it is not used, after each 1024 iteration.
5404 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5405 (r->end == 0 || r->end != r->pos)) {
5407 * Reset read cache. Usually happens before
5408 * decode, but since instruction is restarted
5409 * we have to do it here.
5411 ctxt->mem_read.end = 0;
5412 writeback_registers(ctxt);
5413 return EMULATION_RESTART;
5415 goto done; /* skip rip writeback */
5417 ctxt->eflags &= ~X86_EFLAGS_RF;
5420 ctxt->eip = ctxt->_eip;
5423 if (rc == X86EMUL_PROPAGATE_FAULT) {
5424 WARN_ON(ctxt->exception.vector > 0x1f);
5425 ctxt->have_exception = true;
5427 if (rc == X86EMUL_INTERCEPTED)
5428 return EMULATION_INTERCEPTED;
5430 if (rc == X86EMUL_CONTINUE)
5431 writeback_registers(ctxt);
5433 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5437 case 0x09: /* wbinvd */
5438 (ctxt->ops->wbinvd)(ctxt);
5440 case 0x08: /* invd */
5441 case 0x0d: /* GrpP (prefetch) */
5442 case 0x18: /* Grp16 (prefetch/nop) */
5443 case 0x1f: /* nop */
5445 case 0x20: /* mov cr, reg */
5446 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5448 case 0x21: /* mov from dr to reg */
5449 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5451 case 0x40 ... 0x4f: /* cmov */
5452 if (test_cc(ctxt->b, ctxt->eflags))
5453 ctxt->dst.val = ctxt->src.val;
5454 else if (ctxt->op_bytes != 4)
5455 ctxt->dst.type = OP_NONE; /* no writeback */
5457 case 0x80 ... 0x8f: /* jnz rel, etc*/
5458 if (test_cc(ctxt->b, ctxt->eflags))
5459 rc = jmp_rel(ctxt, ctxt->src.val);
5461 case 0x90 ... 0x9f: /* setcc r/m8 */
5462 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5464 case 0xb6 ... 0xb7: /* movzx */
5465 ctxt->dst.bytes = ctxt->op_bytes;
5466 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5467 : (u16) ctxt->src.val;
5469 case 0xbe ... 0xbf: /* movsx */
5470 ctxt->dst.bytes = ctxt->op_bytes;
5471 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5472 (s16) ctxt->src.val;
5475 goto cannot_emulate;
5480 if (rc != X86EMUL_CONTINUE)
5486 return EMULATION_FAILED;
5489 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5491 invalidate_registers(ctxt);
5494 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5496 writeback_registers(ctxt);