1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 #include <linux/stringify.h>
36 #define OpImplicit 1ull /* No generic decode */
37 #define OpReg 2ull /* Register */
38 #define OpMem 3ull /* Memory */
39 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
40 #define OpDI 5ull /* ES:DI/EDI/RDI */
41 #define OpMem64 6ull /* Memory, 64-bit */
42 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
43 #define OpDX 8ull /* DX register */
44 #define OpCL 9ull /* CL register (for shifts) */
45 #define OpImmByte 10ull /* 8-bit sign extended immediate */
46 #define OpOne 11ull /* Implied 1 */
47 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
48 #define OpMem16 13ull /* Memory operand (16-bit). */
49 #define OpMem32 14ull /* Memory operand (32-bit). */
50 #define OpImmU 15ull /* Immediate operand, zero extended */
51 #define OpSI 16ull /* SI/ESI/RSI */
52 #define OpImmFAddr 17ull /* Immediate far address */
53 #define OpMemFAddr 18ull /* Far address in memory */
54 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
55 #define OpES 20ull /* ES */
56 #define OpCS 21ull /* CS */
57 #define OpSS 22ull /* SS */
58 #define OpDS 23ull /* DS */
59 #define OpFS 24ull /* FS */
60 #define OpGS 25ull /* GS */
61 #define OpMem8 26ull /* 8-bit zero extended memory operand */
62 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
63 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
65 #define OpBits 5 /* Width of operand field */
66 #define OpMask ((1ull << OpBits) - 1)
69 * Opcode effective-address decode tables.
70 * Note that we only emulate instructions that have at least one memory
71 * operand (excluding implicit stack references). We assume that stack
72 * references and instruction fetches will never occur in special memory
73 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
77 /* Operand sizes: 8-bit operands or specified/overridden size. */
78 #define ByteOp (1<<0) /* 8-bit operands. */
79 /* Destination operand type. */
81 #define ImplicitOps (OpImplicit << DstShift)
82 #define DstReg (OpReg << DstShift)
83 #define DstMem (OpMem << DstShift)
84 #define DstAcc (OpAcc << DstShift)
85 #define DstDI (OpDI << DstShift)
86 #define DstMem64 (OpMem64 << DstShift)
87 #define DstImmUByte (OpImmUByte << DstShift)
88 #define DstDX (OpDX << DstShift)
89 #define DstMask (OpMask << DstShift)
90 /* Source operand type. */
92 #define SrcNone (OpNone << SrcShift)
93 #define SrcReg (OpReg << SrcShift)
94 #define SrcMem (OpMem << SrcShift)
95 #define SrcMem16 (OpMem16 << SrcShift)
96 #define SrcMem32 (OpMem32 << SrcShift)
97 #define SrcImm (OpImm << SrcShift)
98 #define SrcImmByte (OpImmByte << SrcShift)
99 #define SrcOne (OpOne << SrcShift)
100 #define SrcImmUByte (OpImmUByte << SrcShift)
101 #define SrcImmU (OpImmU << SrcShift)
102 #define SrcSI (OpSI << SrcShift)
103 #define SrcXLat (OpXLat << SrcShift)
104 #define SrcImmFAddr (OpImmFAddr << SrcShift)
105 #define SrcMemFAddr (OpMemFAddr << SrcShift)
106 #define SrcAcc (OpAcc << SrcShift)
107 #define SrcImmU16 (OpImmU16 << SrcShift)
108 #define SrcImm64 (OpImm64 << SrcShift)
109 #define SrcDX (OpDX << SrcShift)
110 #define SrcMem8 (OpMem8 << SrcShift)
111 #define SrcMask (OpMask << SrcShift)
112 #define BitOp (1<<11)
113 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
114 #define String (1<<13) /* String instruction (rep capable) */
115 #define Stack (1<<14) /* Stack instruction (push/pop) */
116 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
117 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
118 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
119 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
120 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
121 #define Escape (5<<15) /* Escape to coprocessor instruction */
122 #define Sse (1<<18) /* SSE Vector instruction */
123 /* Generic ModRM decode. */
124 #define ModRM (1<<19)
125 /* Destination is only written; never read. */
128 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
129 #define VendorSpecific (1<<22) /* Vendor specific instruction */
130 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
131 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
132 #define Undefined (1<<25) /* No Such Instruction */
133 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
134 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
136 #define PageTable (1 << 29) /* instruction used to write page table */
137 #define NotImpl (1 << 30) /* instruction is not implemented */
138 /* Source 2 operand type */
139 #define Src2Shift (31)
140 #define Src2None (OpNone << Src2Shift)
141 #define Src2CL (OpCL << Src2Shift)
142 #define Src2ImmByte (OpImmByte << Src2Shift)
143 #define Src2One (OpOne << Src2Shift)
144 #define Src2Imm (OpImm << Src2Shift)
145 #define Src2ES (OpES << Src2Shift)
146 #define Src2CS (OpCS << Src2Shift)
147 #define Src2SS (OpSS << Src2Shift)
148 #define Src2DS (OpDS << Src2Shift)
149 #define Src2FS (OpFS << Src2Shift)
150 #define Src2GS (OpGS << Src2Shift)
151 #define Src2Mask (OpMask << Src2Shift)
152 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
153 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
154 #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
155 #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
156 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
157 #define NoWrite ((u64)1 << 45) /* No writeback */
159 #define X2(x...) x, x
160 #define X3(x...) X2(x), x
161 #define X4(x...) X2(x), X2(x)
162 #define X5(x...) X4(x), x
163 #define X6(x...) X4(x), X2(x)
164 #define X7(x...) X4(x), X3(x)
165 #define X8(x...) X4(x), X4(x)
166 #define X16(x...) X8(x), X8(x)
168 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
169 #define FASTOP_SIZE 8
172 * fastop functions have a special calling convention:
174 * dst: [rdx]:rax (in/out)
177 * flags: rflags (in/out)
179 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
180 * different operand sizes can be reached by calculation, rather than a jump
181 * table (which would be bigger than the code).
183 * fastop functions are declared as taking a never-defined fastop parameter,
184 * so they can't be called from C directly.
193 int (*execute)(struct x86_emulate_ctxt *ctxt);
194 const struct opcode *group;
195 const struct group_dual *gdual;
196 const struct gprefix *gprefix;
197 const struct escape *esc;
198 void (*fastop)(struct fastop *fake);
200 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
204 struct opcode mod012[8];
205 struct opcode mod3[8];
209 struct opcode pfx_no;
210 struct opcode pfx_66;
211 struct opcode pfx_f2;
212 struct opcode pfx_f3;
217 struct opcode high[64];
220 /* EFLAGS bit definitions. */
221 #define EFLG_ID (1<<21)
222 #define EFLG_VIP (1<<20)
223 #define EFLG_VIF (1<<19)
224 #define EFLG_AC (1<<18)
225 #define EFLG_VM (1<<17)
226 #define EFLG_RF (1<<16)
227 #define EFLG_IOPL (3<<12)
228 #define EFLG_NT (1<<14)
229 #define EFLG_OF (1<<11)
230 #define EFLG_DF (1<<10)
231 #define EFLG_IF (1<<9)
232 #define EFLG_TF (1<<8)
233 #define EFLG_SF (1<<7)
234 #define EFLG_ZF (1<<6)
235 #define EFLG_AF (1<<4)
236 #define EFLG_PF (1<<2)
237 #define EFLG_CF (1<<0)
239 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
240 #define EFLG_RESERVED_ONE_MASK 2
242 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
244 if (!(ctxt->regs_valid & (1 << nr))) {
245 ctxt->regs_valid |= 1 << nr;
246 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
248 return ctxt->_regs[nr];
251 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
253 ctxt->regs_valid |= 1 << nr;
254 ctxt->regs_dirty |= 1 << nr;
255 return &ctxt->_regs[nr];
258 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
261 return reg_write(ctxt, nr);
264 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
268 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
269 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
272 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
274 ctxt->regs_dirty = 0;
275 ctxt->regs_valid = 0;
279 * Instruction emulation:
280 * Most instructions are emulated directly via a fragment of inline assembly
281 * code. This allows us to save/restore EFLAGS and thus very easily pick up
282 * any modified flags.
285 #if defined(CONFIG_X86_64)
286 #define _LO32 "k" /* force 32-bit operand */
287 #define _STK "%%rsp" /* stack pointer */
288 #elif defined(__i386__)
289 #define _LO32 "" /* force 32-bit operand */
290 #define _STK "%%esp" /* stack pointer */
294 * These EFLAGS bits are restored from saved value during emulation, and
295 * any changes are written back to the saved value after emulation.
297 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
299 /* Before executing instruction: restore necessary bits in EFLAGS. */
300 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
301 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
302 "movl %"_sav",%"_LO32 _tmp"; " \
305 "movl %"_msk",%"_LO32 _tmp"; " \
306 "andl %"_LO32 _tmp",("_STK"); " \
308 "notl %"_LO32 _tmp"; " \
309 "andl %"_LO32 _tmp",("_STK"); " \
310 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
312 "orl %"_LO32 _tmp",("_STK"); " \
316 /* After executing instruction: write-back necessary bits in EFLAGS. */
317 #define _POST_EFLAGS(_sav, _msk, _tmp) \
318 /* _sav |= EFLAGS & _msk; */ \
321 "andl %"_msk",%"_LO32 _tmp"; " \
322 "orl %"_LO32 _tmp",%"_sav"; "
330 #define ____emulate_2op(ctxt, _op, _x, _y, _suffix, _dsttype) \
332 __asm__ __volatile__ ( \
333 _PRE_EFLAGS("0", "4", "2") \
334 _op _suffix " %"_x"3,%1; " \
335 _POST_EFLAGS("0", "4", "2") \
336 : "=m" ((ctxt)->eflags), \
337 "+q" (*(_dsttype*)&(ctxt)->dst.val), \
339 : _y ((ctxt)->src.val), "i" (EFLAGS_MASK)); \
343 /* Raw emulation: instruction has two explicit operands. */
344 #define __emulate_2op_nobyte(ctxt,_op,_wx,_wy,_lx,_ly,_qx,_qy) \
346 unsigned long _tmp; \
348 switch ((ctxt)->dst.bytes) { \
350 ____emulate_2op(ctxt,_op,_wx,_wy,"w",u16); \
353 ____emulate_2op(ctxt,_op,_lx,_ly,"l",u32); \
356 ON64(____emulate_2op(ctxt,_op,_qx,_qy,"q",u64)); \
361 #define __emulate_2op(ctxt,_op,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
363 unsigned long _tmp; \
364 switch ((ctxt)->dst.bytes) { \
366 ____emulate_2op(ctxt,_op,_bx,_by,"b",u8); \
369 __emulate_2op_nobyte(ctxt, _op, \
370 _wx, _wy, _lx, _ly, _qx, _qy); \
375 /* Source operand is byte-sized and may be restricted to just %cl. */
376 #define emulate_2op_SrcB(ctxt, _op) \
377 __emulate_2op(ctxt, _op, "b", "c", "b", "c", "b", "c", "b", "c")
379 /* Source operand is byte, word, long or quad sized. */
380 #define emulate_2op_SrcV(ctxt, _op) \
381 __emulate_2op(ctxt, _op, "b", "q", "w", "r", _LO32, "r", "", "r")
383 /* Source operand is word, long or quad sized. */
384 #define emulate_2op_SrcV_nobyte(ctxt, _op) \
385 __emulate_2op_nobyte(ctxt, _op, "w", "r", _LO32, "r", "", "r")
387 /* Instruction has three operands and one operand is stored in ECX register */
388 #define __emulate_2op_cl(ctxt, _op, _suffix, _type) \
390 unsigned long _tmp; \
391 _type _clv = (ctxt)->src2.val; \
392 _type _srcv = (ctxt)->src.val; \
393 _type _dstv = (ctxt)->dst.val; \
395 __asm__ __volatile__ ( \
396 _PRE_EFLAGS("0", "5", "2") \
397 _op _suffix " %4,%1 \n" \
398 _POST_EFLAGS("0", "5", "2") \
399 : "=m" ((ctxt)->eflags), "+r" (_dstv), "=&r" (_tmp) \
400 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
403 (ctxt)->src2.val = (unsigned long) _clv; \
404 (ctxt)->src2.val = (unsigned long) _srcv; \
405 (ctxt)->dst.val = (unsigned long) _dstv; \
408 #define emulate_2op_cl(ctxt, _op) \
410 switch ((ctxt)->dst.bytes) { \
412 __emulate_2op_cl(ctxt, _op, "w", u16); \
415 __emulate_2op_cl(ctxt, _op, "l", u32); \
418 ON64(__emulate_2op_cl(ctxt, _op, "q", ulong)); \
423 #define __emulate_1op(ctxt, _op, _suffix) \
425 unsigned long _tmp; \
427 __asm__ __volatile__ ( \
428 _PRE_EFLAGS("0", "3", "2") \
429 _op _suffix " %1; " \
430 _POST_EFLAGS("0", "3", "2") \
431 : "=m" ((ctxt)->eflags), "+m" ((ctxt)->dst.val), \
433 : "i" (EFLAGS_MASK)); \
436 /* Instruction has only one explicit operand (no source operand). */
437 #define emulate_1op(ctxt, _op) \
439 switch ((ctxt)->dst.bytes) { \
440 case 1: __emulate_1op(ctxt, _op, "b"); break; \
441 case 2: __emulate_1op(ctxt, _op, "w"); break; \
442 case 4: __emulate_1op(ctxt, _op, "l"); break; \
443 case 8: ON64(__emulate_1op(ctxt, _op, "q")); break; \
447 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
449 #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t"
450 #define FOP_RET "ret \n\t"
452 #define FOP_START(op) \
453 extern void em_##op(struct fastop *fake); \
454 asm(".pushsection .text, \"ax\" \n\t" \
455 ".global em_" #op " \n\t" \
462 #define FOPNOP() FOP_ALIGN FOP_RET
464 #define FOP1E(op, dst) \
465 FOP_ALIGN #op " %" #dst " \n\t" FOP_RET
467 #define FASTOP1(op) \
472 ON64(FOP1E(op##q, rax)) \
475 #define FOP2E(op, dst, src) \
476 FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET
478 #define FASTOP2(op) \
480 FOP2E(op##b, al, bl) \
481 FOP2E(op##w, ax, bx) \
482 FOP2E(op##l, eax, ebx) \
483 ON64(FOP2E(op##q, rax, rbx)) \
486 /* 2 operand, word only */
487 #define FASTOP2W(op) \
490 FOP2E(op##w, ax, bx) \
491 FOP2E(op##l, eax, ebx) \
492 ON64(FOP2E(op##q, rax, rbx)) \
495 /* 2 operand, src is CL */
496 #define FASTOP2CL(op) \
498 FOP2E(op##b, al, cl) \
499 FOP2E(op##w, ax, cl) \
500 FOP2E(op##l, eax, cl) \
501 ON64(FOP2E(op##q, rax, cl)) \
504 #define FOP3E(op, dst, src, src2) \
505 FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
507 /* 3-operand, word-only, src2=cl */
508 #define FASTOP3WCL(op) \
511 FOP3E(op##w, ax, bx, cl) \
512 FOP3E(op##l, eax, ebx, cl) \
513 ON64(FOP3E(op##q, rax, rbx, cl)) \
516 /* Special case for SETcc - 1 instruction per cc */
517 #define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t"
538 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
541 #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \
543 unsigned long _tmp; \
544 ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \
545 ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \
547 __asm__ __volatile__ ( \
548 _PRE_EFLAGS("0", "5", "1") \
550 _op _suffix " %6; " \
552 _POST_EFLAGS("0", "5", "1") \
553 ".pushsection .fixup,\"ax\" \n\t" \
554 "3: movb $1, %4 \n\t" \
557 _ASM_EXTABLE(1b, 3b) \
558 : "=m" ((ctxt)->eflags), "=&r" (_tmp), \
559 "+a" (*rax), "+d" (*rdx), "+qm"(_ex) \
560 : "i" (EFLAGS_MASK), "m" ((ctxt)->src.val)); \
563 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
564 #define emulate_1op_rax_rdx(ctxt, _op, _ex) \
566 switch((ctxt)->src.bytes) { \
568 __emulate_1op_rax_rdx(ctxt, _op, "b", _ex); \
571 __emulate_1op_rax_rdx(ctxt, _op, "w", _ex); \
574 __emulate_1op_rax_rdx(ctxt, _op, "l", _ex); \
577 __emulate_1op_rax_rdx(ctxt, _op, "q", _ex)); \
582 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
583 enum x86_intercept intercept,
584 enum x86_intercept_stage stage)
586 struct x86_instruction_info info = {
587 .intercept = intercept,
588 .rep_prefix = ctxt->rep_prefix,
589 .modrm_mod = ctxt->modrm_mod,
590 .modrm_reg = ctxt->modrm_reg,
591 .modrm_rm = ctxt->modrm_rm,
592 .src_val = ctxt->src.val64,
593 .src_bytes = ctxt->src.bytes,
594 .dst_bytes = ctxt->dst.bytes,
595 .ad_bytes = ctxt->ad_bytes,
596 .next_rip = ctxt->eip,
599 return ctxt->ops->intercept(ctxt, &info, stage);
602 static void assign_masked(ulong *dest, ulong src, ulong mask)
604 *dest = (*dest & ~mask) | (src & mask);
607 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
609 return (1UL << (ctxt->ad_bytes << 3)) - 1;
612 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
615 struct desc_struct ss;
617 if (ctxt->mode == X86EMUL_MODE_PROT64)
619 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
620 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
623 static int stack_size(struct x86_emulate_ctxt *ctxt)
625 return (__fls(stack_mask(ctxt)) + 1) >> 3;
628 /* Access/update address held in a register, based on addressing mode. */
629 static inline unsigned long
630 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
632 if (ctxt->ad_bytes == sizeof(unsigned long))
635 return reg & ad_mask(ctxt);
638 static inline unsigned long
639 register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
641 return address_mask(ctxt, reg);
644 static void masked_increment(ulong *reg, ulong mask, int inc)
646 assign_masked(reg, *reg + inc, mask);
650 register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
654 if (ctxt->ad_bytes == sizeof(unsigned long))
657 mask = ad_mask(ctxt);
658 masked_increment(reg, mask, inc);
661 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
663 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
666 static u32 desc_limit_scaled(struct desc_struct *desc)
668 u32 limit = get_desc_limit(desc);
670 return desc->g ? (limit << 12) | 0xfff : limit;
673 static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
675 ctxt->has_seg_override = true;
676 ctxt->seg_override = seg;
679 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
681 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
684 return ctxt->ops->get_cached_segment_base(ctxt, seg);
687 static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
689 if (!ctxt->has_seg_override)
692 return ctxt->seg_override;
695 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
696 u32 error, bool valid)
698 ctxt->exception.vector = vec;
699 ctxt->exception.error_code = error;
700 ctxt->exception.error_code_valid = valid;
701 return X86EMUL_PROPAGATE_FAULT;
704 static int emulate_db(struct x86_emulate_ctxt *ctxt)
706 return emulate_exception(ctxt, DB_VECTOR, 0, false);
709 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
711 return emulate_exception(ctxt, GP_VECTOR, err, true);
714 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
716 return emulate_exception(ctxt, SS_VECTOR, err, true);
719 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
721 return emulate_exception(ctxt, UD_VECTOR, 0, false);
724 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
726 return emulate_exception(ctxt, TS_VECTOR, err, true);
729 static int emulate_de(struct x86_emulate_ctxt *ctxt)
731 return emulate_exception(ctxt, DE_VECTOR, 0, false);
734 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
736 return emulate_exception(ctxt, NM_VECTOR, 0, false);
739 static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
742 switch (ctxt->op_bytes) {
744 ctxt->_eip = (u16)dst;
747 ctxt->_eip = (u32)dst;
750 if ((cs_l && is_noncanonical_address(dst)) ||
751 (!cs_l && (dst & ~(u32)-1)))
752 return emulate_gp(ctxt, 0);
756 WARN(1, "unsupported eip assignment size\n");
758 return X86EMUL_CONTINUE;
761 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
763 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
766 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
768 return assign_eip_near(ctxt, ctxt->_eip + rel);
771 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
774 struct desc_struct desc;
776 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
780 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
785 struct desc_struct desc;
787 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
788 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
792 * x86 defines three classes of vector instructions: explicitly
793 * aligned, explicitly unaligned, and the rest, which change behaviour
794 * depending on whether they're AVX encoded or not.
796 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
797 * subject to the same check.
799 static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
801 if (likely(size < 16))
804 if (ctxt->d & Aligned)
806 else if (ctxt->d & Unaligned)
808 else if (ctxt->d & Avx)
814 static int __linearize(struct x86_emulate_ctxt *ctxt,
815 struct segmented_address addr,
816 unsigned size, bool write, bool fetch,
819 struct desc_struct desc;
826 la = seg_base(ctxt, addr.seg) + addr.ea;
827 switch (ctxt->mode) {
828 case X86EMUL_MODE_PROT64:
829 if (((signed long)la << 16) >> 16 != la)
830 return emulate_gp(ctxt, 0);
833 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
837 /* code segment in protected mode or read-only data segment */
838 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
839 || !(desc.type & 2)) && write)
841 /* unreadable code segment */
842 if (!fetch && (desc.type & 8) && !(desc.type & 2))
844 lim = desc_limit_scaled(&desc);
845 if ((desc.type & 8) || !(desc.type & 4)) {
846 /* expand-up segment */
847 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
850 /* expand-down segment */
851 if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
853 lim = desc.d ? 0xffffffff : 0xffff;
854 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
857 cpl = ctxt->ops->cpl(ctxt);
858 if (!(desc.type & 8)) {
862 } else if ((desc.type & 8) && !(desc.type & 4)) {
863 /* nonconforming code segment */
866 } else if ((desc.type & 8) && (desc.type & 4)) {
867 /* conforming code segment */
873 if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
875 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
876 return emulate_gp(ctxt, 0);
878 return X86EMUL_CONTINUE;
880 if (addr.seg == VCPU_SREG_SS)
881 return emulate_ss(ctxt, sel);
883 return emulate_gp(ctxt, sel);
886 static int linearize(struct x86_emulate_ctxt *ctxt,
887 struct segmented_address addr,
888 unsigned size, bool write,
891 return __linearize(ctxt, addr, size, write, false, linear);
895 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
896 struct segmented_address addr,
903 rc = linearize(ctxt, addr, size, false, &linear);
904 if (rc != X86EMUL_CONTINUE)
906 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
910 * Fetch the next byte of the instruction being emulated which is pointed to
911 * by ctxt->_eip, then increment ctxt->_eip.
913 * Also prefetch the remaining bytes of the instruction without crossing page
914 * boundary if they are not in fetch_cache yet.
916 static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest)
918 struct fetch_cache *fc = &ctxt->fetch;
922 if (ctxt->_eip == fc->end) {
923 unsigned long linear;
924 struct segmented_address addr = { .seg = VCPU_SREG_CS,
926 cur_size = fc->end - fc->start;
927 size = min(15UL - cur_size,
928 PAGE_SIZE - offset_in_page(ctxt->_eip));
929 rc = __linearize(ctxt, addr, size, false, true, &linear);
930 if (unlikely(rc != X86EMUL_CONTINUE))
932 rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
933 size, &ctxt->exception);
934 if (unlikely(rc != X86EMUL_CONTINUE))
938 *dest = fc->data[ctxt->_eip - fc->start];
940 return X86EMUL_CONTINUE;
943 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
944 void *dest, unsigned size)
948 /* x86 instructions are limited to 15 bytes. */
949 if (unlikely(ctxt->_eip + size - ctxt->eip > 15))
950 return X86EMUL_UNHANDLEABLE;
952 rc = do_insn_fetch_byte(ctxt, dest++);
953 if (rc != X86EMUL_CONTINUE)
956 return X86EMUL_CONTINUE;
959 /* Fetch next part of the instruction being emulated. */
960 #define insn_fetch(_type, _ctxt) \
961 ({ unsigned long _x; \
962 rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \
963 if (rc != X86EMUL_CONTINUE) \
968 #define insn_fetch_arr(_arr, _size, _ctxt) \
969 ({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \
970 if (rc != X86EMUL_CONTINUE) \
975 * Given the 'reg' portion of a ModRM byte, and a register block, return a
976 * pointer into the block that addresses the relevant register.
977 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
979 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
984 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
985 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
987 p = reg_rmw(ctxt, modrm_reg);
991 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
992 struct segmented_address addr,
993 u16 *size, unsigned long *address, int op_bytes)
1000 rc = segmented_read_std(ctxt, addr, size, 2);
1001 if (rc != X86EMUL_CONTINUE)
1004 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1043 static u8 test_cc(unsigned int condition, unsigned long flags)
1046 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1048 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1049 asm("push %[flags]; popf; call *%[fastop]"
1050 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1054 static void fetch_register_operand(struct operand *op)
1056 switch (op->bytes) {
1058 op->val = *(u8 *)op->addr.reg;
1061 op->val = *(u16 *)op->addr.reg;
1064 op->val = *(u32 *)op->addr.reg;
1067 op->val = *(u64 *)op->addr.reg;
1072 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1074 ctxt->ops->get_fpu(ctxt);
1076 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1077 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
1078 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
1079 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
1080 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
1081 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
1082 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
1083 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1084 #ifdef CONFIG_X86_64
1085 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1086 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1087 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1088 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1089 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1090 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1091 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1092 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1096 ctxt->ops->put_fpu(ctxt);
1099 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1102 ctxt->ops->get_fpu(ctxt);
1104 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1105 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1106 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1107 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1108 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1109 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1110 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1111 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1112 #ifdef CONFIG_X86_64
1113 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1114 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1115 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1116 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1117 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1118 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1119 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1120 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1124 ctxt->ops->put_fpu(ctxt);
1127 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1129 ctxt->ops->get_fpu(ctxt);
1131 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1132 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1133 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1134 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1135 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1136 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1137 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1138 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1141 ctxt->ops->put_fpu(ctxt);
1144 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1146 ctxt->ops->get_fpu(ctxt);
1148 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1149 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1150 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1151 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1152 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1153 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1154 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1155 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1158 ctxt->ops->put_fpu(ctxt);
1161 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1163 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1164 return emulate_nm(ctxt);
1166 ctxt->ops->get_fpu(ctxt);
1167 asm volatile("fninit");
1168 ctxt->ops->put_fpu(ctxt);
1169 return X86EMUL_CONTINUE;
1172 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1176 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1177 return emulate_nm(ctxt);
1179 ctxt->ops->get_fpu(ctxt);
1180 asm volatile("fnstcw %0": "+m"(fcw));
1181 ctxt->ops->put_fpu(ctxt);
1183 /* force 2 byte destination */
1184 ctxt->dst.bytes = 2;
1185 ctxt->dst.val = fcw;
1187 return X86EMUL_CONTINUE;
1190 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1194 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1195 return emulate_nm(ctxt);
1197 ctxt->ops->get_fpu(ctxt);
1198 asm volatile("fnstsw %0": "+m"(fsw));
1199 ctxt->ops->put_fpu(ctxt);
1201 /* force 2 byte destination */
1202 ctxt->dst.bytes = 2;
1203 ctxt->dst.val = fsw;
1205 return X86EMUL_CONTINUE;
1208 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1211 unsigned reg = ctxt->modrm_reg;
1212 int highbyte_regs = ctxt->rex_prefix == 0;
1214 if (!(ctxt->d & ModRM))
1215 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1217 if (ctxt->d & Sse) {
1221 read_sse_reg(ctxt, &op->vec_val, reg);
1224 if (ctxt->d & Mmx) {
1233 if (ctxt->d & ByteOp) {
1234 op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
1237 op->addr.reg = decode_register(ctxt, reg, 0);
1238 op->bytes = ctxt->op_bytes;
1240 fetch_register_operand(op);
1241 op->orig_val = op->val;
1244 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1246 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1247 ctxt->modrm_seg = VCPU_SREG_SS;
1250 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1254 int index_reg = 0, base_reg = 0, scale;
1255 int rc = X86EMUL_CONTINUE;
1258 if (ctxt->rex_prefix) {
1259 ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */
1260 index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */
1261 ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
1264 ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
1265 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1266 ctxt->modrm_rm |= (ctxt->modrm & 0x07);
1267 ctxt->modrm_seg = VCPU_SREG_DS;
1269 if (ctxt->modrm_mod == 3) {
1270 int highbyte_regs = ctxt->rex_prefix == 0;
1273 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1274 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1275 highbyte_regs && (ctxt->d & ByteOp));
1276 if (ctxt->d & Sse) {
1279 op->addr.xmm = ctxt->modrm_rm;
1280 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1283 if (ctxt->d & Mmx) {
1286 op->addr.xmm = ctxt->modrm_rm & 7;
1289 fetch_register_operand(op);
1295 if (ctxt->ad_bytes == 2) {
1296 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1297 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1298 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1299 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1301 /* 16-bit ModR/M decode. */
1302 switch (ctxt->modrm_mod) {
1304 if (ctxt->modrm_rm == 6)
1305 modrm_ea += insn_fetch(u16, ctxt);
1308 modrm_ea += insn_fetch(s8, ctxt);
1311 modrm_ea += insn_fetch(u16, ctxt);
1314 switch (ctxt->modrm_rm) {
1316 modrm_ea += bx + si;
1319 modrm_ea += bx + di;
1322 modrm_ea += bp + si;
1325 modrm_ea += bp + di;
1334 if (ctxt->modrm_mod != 0)
1341 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1342 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1343 ctxt->modrm_seg = VCPU_SREG_SS;
1344 modrm_ea = (u16)modrm_ea;
1346 /* 32/64-bit ModR/M decode. */
1347 if ((ctxt->modrm_rm & 7) == 4) {
1348 sib = insn_fetch(u8, ctxt);
1349 index_reg |= (sib >> 3) & 7;
1350 base_reg |= sib & 7;
1353 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1354 modrm_ea += insn_fetch(s32, ctxt);
1356 modrm_ea += reg_read(ctxt, base_reg);
1357 adjust_modrm_seg(ctxt, base_reg);
1360 modrm_ea += reg_read(ctxt, index_reg) << scale;
1361 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1362 if (ctxt->mode == X86EMUL_MODE_PROT64)
1363 ctxt->rip_relative = 1;
1365 base_reg = ctxt->modrm_rm;
1366 modrm_ea += reg_read(ctxt, base_reg);
1367 adjust_modrm_seg(ctxt, base_reg);
1369 switch (ctxt->modrm_mod) {
1371 if (ctxt->modrm_rm == 5)
1372 modrm_ea += insn_fetch(s32, ctxt);
1375 modrm_ea += insn_fetch(s8, ctxt);
1378 modrm_ea += insn_fetch(s32, ctxt);
1382 op->addr.mem.ea = modrm_ea;
1387 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1390 int rc = X86EMUL_CONTINUE;
1393 switch (ctxt->ad_bytes) {
1395 op->addr.mem.ea = insn_fetch(u16, ctxt);
1398 op->addr.mem.ea = insn_fetch(u32, ctxt);
1401 op->addr.mem.ea = insn_fetch(u64, ctxt);
1408 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1412 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1413 mask = ~(ctxt->dst.bytes * 8 - 1);
1415 if (ctxt->src.bytes == 2)
1416 sv = (s16)ctxt->src.val & (s16)mask;
1417 else if (ctxt->src.bytes == 4)
1418 sv = (s32)ctxt->src.val & (s32)mask;
1420 ctxt->dst.addr.mem.ea += (sv >> 3);
1423 /* only subword offset */
1424 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1427 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1428 unsigned long addr, void *dest, unsigned size)
1431 struct read_cache *mc = &ctxt->mem_read;
1433 if (mc->pos < mc->end)
1436 WARN_ON((mc->end + size) >= sizeof(mc->data));
1438 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1440 if (rc != X86EMUL_CONTINUE)
1446 memcpy(dest, mc->data + mc->pos, size);
1448 return X86EMUL_CONTINUE;
1451 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1452 struct segmented_address addr,
1459 rc = linearize(ctxt, addr, size, false, &linear);
1460 if (rc != X86EMUL_CONTINUE)
1462 return read_emulated(ctxt, linear, data, size);
1465 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1466 struct segmented_address addr,
1473 rc = linearize(ctxt, addr, size, true, &linear);
1474 if (rc != X86EMUL_CONTINUE)
1476 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1480 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1481 struct segmented_address addr,
1482 const void *orig_data, const void *data,
1488 rc = linearize(ctxt, addr, size, true, &linear);
1489 if (rc != X86EMUL_CONTINUE)
1491 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1492 size, &ctxt->exception);
1495 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1496 unsigned int size, unsigned short port,
1499 struct read_cache *rc = &ctxt->io_read;
1501 if (rc->pos == rc->end) { /* refill pio read ahead */
1502 unsigned int in_page, n;
1503 unsigned int count = ctxt->rep_prefix ?
1504 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1505 in_page = (ctxt->eflags & EFLG_DF) ?
1506 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1507 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1508 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1512 rc->pos = rc->end = 0;
1513 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1518 if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
1519 ctxt->dst.data = rc->data + rc->pos;
1520 ctxt->dst.type = OP_MEM_STR;
1521 ctxt->dst.count = (rc->end - rc->pos) / size;
1524 memcpy(dest, rc->data + rc->pos, size);
1530 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1531 u16 index, struct desc_struct *desc)
1536 ctxt->ops->get_idt(ctxt, &dt);
1538 if (dt.size < index * 8 + 7)
1539 return emulate_gp(ctxt, index << 3 | 0x2);
1541 addr = dt.address + index * 8;
1542 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1546 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1547 u16 selector, struct desc_ptr *dt)
1549 const struct x86_emulate_ops *ops = ctxt->ops;
1551 if (selector & 1 << 2) {
1552 struct desc_struct desc;
1555 memset (dt, 0, sizeof *dt);
1556 if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
1559 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1560 dt->address = get_desc_base(&desc);
1562 ops->get_gdt(ctxt, dt);
1565 /* allowed just for 8 bytes segments */
1566 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1567 u16 selector, struct desc_struct *desc,
1571 u16 index = selector >> 3;
1574 get_descriptor_table_ptr(ctxt, selector, &dt);
1576 if (dt.size < index * 8 + 7)
1577 return emulate_gp(ctxt, selector & 0xfffc);
1579 *desc_addr_p = addr = dt.address + index * 8;
1580 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1584 /* allowed just for 8 bytes segments */
1585 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1586 u16 selector, struct desc_struct *desc)
1589 u16 index = selector >> 3;
1592 get_descriptor_table_ptr(ctxt, selector, &dt);
1594 if (dt.size < index * 8 + 7)
1595 return emulate_gp(ctxt, selector & 0xfffc);
1597 addr = dt.address + index * 8;
1598 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1602 /* Does not support long mode */
1603 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1604 u16 selector, int seg)
1606 struct desc_struct seg_desc, old_desc;
1608 unsigned err_vec = GP_VECTOR;
1610 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1615 memset(&seg_desc, 0, sizeof seg_desc);
1617 if (ctxt->mode == X86EMUL_MODE_REAL) {
1618 /* set real mode segment descriptor (keep limit etc. for
1620 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1621 set_desc_base(&seg_desc, selector << 4);
1623 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1624 /* VM86 needs a clean new segment descriptor */
1625 set_desc_base(&seg_desc, selector << 4);
1626 set_desc_limit(&seg_desc, 0xffff);
1635 cpl = ctxt->ops->cpl(ctxt);
1637 /* NULL selector is not valid for TR, CS and SS (except for long mode) */
1638 if ((seg == VCPU_SREG_CS
1639 || (seg == VCPU_SREG_SS
1640 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
1641 || seg == VCPU_SREG_TR)
1645 /* TR should be in GDT only */
1646 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1649 if (null_selector) /* for NULL selector skip all following checks */
1652 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1653 if (ret != X86EMUL_CONTINUE)
1656 err_code = selector & 0xfffc;
1657 err_vec = GP_VECTOR;
1659 /* can't load system descriptor into segment selector */
1660 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1664 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1673 * segment is not a writable data segment or segment
1674 * selector's RPL != CPL or segment selector's RPL != CPL
1676 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1680 if (!(seg_desc.type & 8))
1683 if (seg_desc.type & 4) {
1689 if (rpl > cpl || dpl != cpl)
1692 /* CS(RPL) <- CPL */
1693 selector = (selector & 0xfffc) | cpl;
1696 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1698 old_desc = seg_desc;
1699 seg_desc.type |= 2; /* busy */
1700 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1701 sizeof(seg_desc), &ctxt->exception);
1702 if (ret != X86EMUL_CONTINUE)
1705 case VCPU_SREG_LDTR:
1706 if (seg_desc.s || seg_desc.type != 2)
1709 default: /* DS, ES, FS, or GS */
1711 * segment is not a data or readable code segment or
1712 * ((segment is a data or nonconforming code segment)
1713 * and (both RPL and CPL > DPL))
1715 if ((seg_desc.type & 0xa) == 0x8 ||
1716 (((seg_desc.type & 0xc) != 0xc) &&
1717 (rpl > dpl && cpl > dpl)))
1723 /* mark segment as accessed */
1725 ret = write_segment_descriptor(ctxt, selector, &seg_desc);
1726 if (ret != X86EMUL_CONTINUE)
1730 ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
1731 return X86EMUL_CONTINUE;
1733 emulate_exception(ctxt, err_vec, err_code, true);
1734 return X86EMUL_PROPAGATE_FAULT;
1737 static void write_register_operand(struct operand *op)
1739 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
1740 switch (op->bytes) {
1742 *(u8 *)op->addr.reg = (u8)op->val;
1745 *(u16 *)op->addr.reg = (u16)op->val;
1748 *op->addr.reg = (u32)op->val;
1749 break; /* 64b: zero-extend */
1751 *op->addr.reg = op->val;
1756 static int writeback(struct x86_emulate_ctxt *ctxt)
1760 if (ctxt->d & NoWrite)
1761 return X86EMUL_CONTINUE;
1763 switch (ctxt->dst.type) {
1765 write_register_operand(&ctxt->dst);
1768 if (ctxt->lock_prefix)
1769 rc = segmented_cmpxchg(ctxt,
1771 &ctxt->dst.orig_val,
1775 rc = segmented_write(ctxt,
1779 if (rc != X86EMUL_CONTINUE)
1783 rc = segmented_write(ctxt,
1786 ctxt->dst.bytes * ctxt->dst.count);
1787 if (rc != X86EMUL_CONTINUE)
1791 write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
1794 write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm);
1802 return X86EMUL_CONTINUE;
1805 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1807 struct segmented_address addr;
1809 rsp_increment(ctxt, -bytes);
1810 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1811 addr.seg = VCPU_SREG_SS;
1813 return segmented_write(ctxt, addr, data, bytes);
1816 static int em_push(struct x86_emulate_ctxt *ctxt)
1818 /* Disable writeback. */
1819 ctxt->dst.type = OP_NONE;
1820 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1823 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1824 void *dest, int len)
1827 struct segmented_address addr;
1829 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1830 addr.seg = VCPU_SREG_SS;
1831 rc = segmented_read(ctxt, addr, dest, len);
1832 if (rc != X86EMUL_CONTINUE)
1835 rsp_increment(ctxt, len);
1839 static int em_pop(struct x86_emulate_ctxt *ctxt)
1841 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1844 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1845 void *dest, int len)
1848 unsigned long val, change_mask;
1849 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1850 int cpl = ctxt->ops->cpl(ctxt);
1852 rc = emulate_pop(ctxt, &val, len);
1853 if (rc != X86EMUL_CONTINUE)
1856 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1857 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1859 switch(ctxt->mode) {
1860 case X86EMUL_MODE_PROT64:
1861 case X86EMUL_MODE_PROT32:
1862 case X86EMUL_MODE_PROT16:
1864 change_mask |= EFLG_IOPL;
1866 change_mask |= EFLG_IF;
1868 case X86EMUL_MODE_VM86:
1870 return emulate_gp(ctxt, 0);
1871 change_mask |= EFLG_IF;
1873 default: /* real mode */
1874 change_mask |= (EFLG_IOPL | EFLG_IF);
1878 *(unsigned long *)dest =
1879 (ctxt->eflags & ~change_mask) | (val & change_mask);
1884 static int em_popf(struct x86_emulate_ctxt *ctxt)
1886 ctxt->dst.type = OP_REG;
1887 ctxt->dst.addr.reg = &ctxt->eflags;
1888 ctxt->dst.bytes = ctxt->op_bytes;
1889 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1892 static int em_enter(struct x86_emulate_ctxt *ctxt)
1895 unsigned frame_size = ctxt->src.val;
1896 unsigned nesting_level = ctxt->src2.val & 31;
1900 return X86EMUL_UNHANDLEABLE;
1902 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1903 rc = push(ctxt, &rbp, stack_size(ctxt));
1904 if (rc != X86EMUL_CONTINUE)
1906 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1908 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1909 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1911 return X86EMUL_CONTINUE;
1914 static int em_leave(struct x86_emulate_ctxt *ctxt)
1916 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1918 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1921 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1923 int seg = ctxt->src2.val;
1925 ctxt->src.val = get_segment_selector(ctxt, seg);
1927 return em_push(ctxt);
1930 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1932 int seg = ctxt->src2.val;
1933 unsigned long selector;
1936 rc = emulate_pop(ctxt, &selector, ctxt->op_bytes);
1937 if (rc != X86EMUL_CONTINUE)
1940 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1944 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1946 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1947 int rc = X86EMUL_CONTINUE;
1948 int reg = VCPU_REGS_RAX;
1950 while (reg <= VCPU_REGS_RDI) {
1951 (reg == VCPU_REGS_RSP) ?
1952 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1955 if (rc != X86EMUL_CONTINUE)
1964 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1966 ctxt->src.val = (unsigned long)ctxt->eflags;
1967 return em_push(ctxt);
1970 static int em_popa(struct x86_emulate_ctxt *ctxt)
1972 int rc = X86EMUL_CONTINUE;
1973 int reg = VCPU_REGS_RDI;
1975 while (reg >= VCPU_REGS_RAX) {
1976 if (reg == VCPU_REGS_RSP) {
1977 rsp_increment(ctxt, ctxt->op_bytes);
1981 rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
1982 if (rc != X86EMUL_CONTINUE)
1989 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1991 const struct x86_emulate_ops *ops = ctxt->ops;
1998 /* TODO: Add limit checks */
1999 ctxt->src.val = ctxt->eflags;
2001 if (rc != X86EMUL_CONTINUE)
2004 ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
2006 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2008 if (rc != X86EMUL_CONTINUE)
2011 ctxt->src.val = ctxt->_eip;
2013 if (rc != X86EMUL_CONTINUE)
2016 ops->get_idt(ctxt, &dt);
2018 eip_addr = dt.address + (irq << 2);
2019 cs_addr = dt.address + (irq << 2) + 2;
2021 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2022 if (rc != X86EMUL_CONTINUE)
2025 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2026 if (rc != X86EMUL_CONTINUE)
2029 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2030 if (rc != X86EMUL_CONTINUE)
2038 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2042 invalidate_registers(ctxt);
2043 rc = __emulate_int_real(ctxt, irq);
2044 if (rc == X86EMUL_CONTINUE)
2045 writeback_registers(ctxt);
2049 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2051 switch(ctxt->mode) {
2052 case X86EMUL_MODE_REAL:
2053 return __emulate_int_real(ctxt, irq);
2054 case X86EMUL_MODE_VM86:
2055 case X86EMUL_MODE_PROT16:
2056 case X86EMUL_MODE_PROT32:
2057 case X86EMUL_MODE_PROT64:
2059 /* Protected mode interrupts unimplemented yet */
2060 return X86EMUL_UNHANDLEABLE;
2064 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2066 int rc = X86EMUL_CONTINUE;
2067 unsigned long temp_eip = 0;
2068 unsigned long temp_eflags = 0;
2069 unsigned long cs = 0;
2070 unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
2071 EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
2072 EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
2073 unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
2075 /* TODO: Add stack limit check */
2077 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2079 if (rc != X86EMUL_CONTINUE)
2082 if (temp_eip & ~0xffff)
2083 return emulate_gp(ctxt, 0);
2085 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2087 if (rc != X86EMUL_CONTINUE)
2090 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2092 if (rc != X86EMUL_CONTINUE)
2095 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2097 if (rc != X86EMUL_CONTINUE)
2100 ctxt->_eip = temp_eip;
2103 if (ctxt->op_bytes == 4)
2104 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2105 else if (ctxt->op_bytes == 2) {
2106 ctxt->eflags &= ~0xffff;
2107 ctxt->eflags |= temp_eflags;
2110 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2111 ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
2116 static int em_iret(struct x86_emulate_ctxt *ctxt)
2118 switch(ctxt->mode) {
2119 case X86EMUL_MODE_REAL:
2120 return emulate_iret_real(ctxt);
2121 case X86EMUL_MODE_VM86:
2122 case X86EMUL_MODE_PROT16:
2123 case X86EMUL_MODE_PROT32:
2124 case X86EMUL_MODE_PROT64:
2126 /* iret from protected mode unimplemented yet */
2127 return X86EMUL_UNHANDLEABLE;
2131 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2136 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2138 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
2139 if (rc != X86EMUL_CONTINUE)
2143 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
2144 return X86EMUL_CONTINUE;
2147 static int em_mul_ex(struct x86_emulate_ctxt *ctxt)
2151 emulate_1op_rax_rdx(ctxt, "mul", ex);
2152 return X86EMUL_CONTINUE;
2155 static int em_imul_ex(struct x86_emulate_ctxt *ctxt)
2159 emulate_1op_rax_rdx(ctxt, "imul", ex);
2160 return X86EMUL_CONTINUE;
2163 static int em_div_ex(struct x86_emulate_ctxt *ctxt)
2167 emulate_1op_rax_rdx(ctxt, "div", de);
2169 return emulate_de(ctxt);
2170 return X86EMUL_CONTINUE;
2173 static int em_idiv_ex(struct x86_emulate_ctxt *ctxt)
2177 emulate_1op_rax_rdx(ctxt, "idiv", de);
2179 return emulate_de(ctxt);
2180 return X86EMUL_CONTINUE;
2183 static int em_grp45(struct x86_emulate_ctxt *ctxt)
2185 int rc = X86EMUL_CONTINUE;
2187 switch (ctxt->modrm_reg) {
2188 case 2: /* call near abs */ {
2190 old_eip = ctxt->_eip;
2191 rc = assign_eip_near(ctxt, ctxt->src.val);
2192 if (rc != X86EMUL_CONTINUE)
2194 ctxt->src.val = old_eip;
2198 case 4: /* jmp abs */
2199 rc = assign_eip_near(ctxt, ctxt->src.val);
2201 case 5: /* jmp far */
2202 rc = em_jmp_far(ctxt);
2211 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2213 u64 old = ctxt->dst.orig_val64;
2215 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2216 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2217 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2218 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2219 ctxt->eflags &= ~EFLG_ZF;
2221 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2222 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2224 ctxt->eflags |= EFLG_ZF;
2226 return X86EMUL_CONTINUE;
2229 static int em_ret(struct x86_emulate_ctxt *ctxt)
2234 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2235 if (rc != X86EMUL_CONTINUE)
2238 return assign_eip_near(ctxt, eip);
2241 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2245 int cpl = ctxt->ops->cpl(ctxt);
2247 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
2248 if (rc != X86EMUL_CONTINUE)
2250 if (ctxt->op_bytes == 4)
2251 ctxt->_eip = (u32)ctxt->_eip;
2252 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2253 if (rc != X86EMUL_CONTINUE)
2255 /* Outer-privilege level return is not implemented */
2256 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2257 return X86EMUL_UNHANDLEABLE;
2258 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2262 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2264 /* Save real source value, then compare EAX against destination. */
2265 ctxt->src.orig_val = ctxt->src.val;
2266 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
2267 fastop(ctxt, em_cmp);
2269 if (ctxt->eflags & EFLG_ZF) {
2270 /* Success: write back to memory. */
2271 ctxt->dst.val = ctxt->src.orig_val;
2273 /* Failure: write the value we saw to EAX. */
2274 ctxt->dst.type = OP_REG;
2275 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2277 return X86EMUL_CONTINUE;
2280 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2282 int seg = ctxt->src2.val;
2286 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2288 rc = load_segment_descriptor(ctxt, sel, seg);
2289 if (rc != X86EMUL_CONTINUE)
2292 ctxt->dst.val = ctxt->src.val;
2297 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2298 struct desc_struct *cs, struct desc_struct *ss)
2300 cs->l = 0; /* will be adjusted later */
2301 set_desc_base(cs, 0); /* flat segment */
2302 cs->g = 1; /* 4kb granularity */
2303 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2304 cs->type = 0x0b; /* Read, Execute, Accessed */
2306 cs->dpl = 0; /* will be adjusted later */
2311 set_desc_base(ss, 0); /* flat segment */
2312 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2313 ss->g = 1; /* 4kb granularity */
2315 ss->type = 0x03; /* Read/Write, Accessed */
2316 ss->d = 1; /* 32bit stack segment */
2323 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2325 u32 eax, ebx, ecx, edx;
2328 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2329 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2330 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2331 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2334 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2336 const struct x86_emulate_ops *ops = ctxt->ops;
2337 u32 eax, ebx, ecx, edx;
2340 * syscall should always be enabled in longmode - so only become
2341 * vendor specific (cpuid) if other modes are active...
2343 if (ctxt->mode == X86EMUL_MODE_PROT64)
2348 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2350 * Intel ("GenuineIntel")
2351 * remark: Intel CPUs only support "syscall" in 64bit
2352 * longmode. Also an 64bit guest with a
2353 * 32bit compat-app running will #UD !! While this
2354 * behaviour can be fixed (by emulating) into AMD
2355 * response - CPUs of AMD can't behave like Intel.
2357 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2358 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2359 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2362 /* AMD ("AuthenticAMD") */
2363 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2364 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2365 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2368 /* AMD ("AMDisbetter!") */
2369 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2370 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2371 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2374 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2378 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2380 const struct x86_emulate_ops *ops = ctxt->ops;
2381 struct desc_struct cs, ss;
2386 /* syscall is not available in real mode */
2387 if (ctxt->mode == X86EMUL_MODE_REAL ||
2388 ctxt->mode == X86EMUL_MODE_VM86)
2389 return emulate_ud(ctxt);
2391 if (!(em_syscall_is_enabled(ctxt)))
2392 return emulate_ud(ctxt);
2394 ops->get_msr(ctxt, MSR_EFER, &efer);
2395 setup_syscalls_segments(ctxt, &cs, &ss);
2397 if (!(efer & EFER_SCE))
2398 return emulate_ud(ctxt);
2400 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2402 cs_sel = (u16)(msr_data & 0xfffc);
2403 ss_sel = (u16)(msr_data + 8);
2405 if (efer & EFER_LMA) {
2409 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2410 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2412 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2413 if (efer & EFER_LMA) {
2414 #ifdef CONFIG_X86_64
2415 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF;
2418 ctxt->mode == X86EMUL_MODE_PROT64 ?
2419 MSR_LSTAR : MSR_CSTAR, &msr_data);
2420 ctxt->_eip = msr_data;
2422 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2423 ctxt->eflags &= ~(msr_data | EFLG_RF);
2427 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2428 ctxt->_eip = (u32)msr_data;
2430 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
2433 return X86EMUL_CONTINUE;
2436 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2438 const struct x86_emulate_ops *ops = ctxt->ops;
2439 struct desc_struct cs, ss;
2444 ops->get_msr(ctxt, MSR_EFER, &efer);
2445 /* inject #GP if in real mode */
2446 if (ctxt->mode == X86EMUL_MODE_REAL)
2447 return emulate_gp(ctxt, 0);
2450 * Not recognized on AMD in compat mode (but is recognized in legacy
2453 if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
2454 && !vendor_intel(ctxt))
2455 return emulate_ud(ctxt);
2457 /* XXX sysenter/sysexit have not been tested in 64bit mode.
2458 * Therefore, we inject an #UD.
2460 if (ctxt->mode == X86EMUL_MODE_PROT64)
2461 return emulate_ud(ctxt);
2463 setup_syscalls_segments(ctxt, &cs, &ss);
2465 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2466 switch (ctxt->mode) {
2467 case X86EMUL_MODE_PROT32:
2468 if ((msr_data & 0xfffc) == 0x0)
2469 return emulate_gp(ctxt, 0);
2471 case X86EMUL_MODE_PROT64:
2472 if (msr_data == 0x0)
2473 return emulate_gp(ctxt, 0);
2479 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
2480 cs_sel = (u16)msr_data;
2481 cs_sel &= ~SELECTOR_RPL_MASK;
2482 ss_sel = cs_sel + 8;
2483 ss_sel &= ~SELECTOR_RPL_MASK;
2484 if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
2489 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2490 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2492 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2493 ctxt->_eip = msr_data;
2495 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2496 *reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
2498 return X86EMUL_CONTINUE;
2501 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2503 const struct x86_emulate_ops *ops = ctxt->ops;
2504 struct desc_struct cs, ss;
2505 u64 msr_data, rcx, rdx;
2507 u16 cs_sel = 0, ss_sel = 0;
2509 /* inject #GP if in real mode or Virtual 8086 mode */
2510 if (ctxt->mode == X86EMUL_MODE_REAL ||
2511 ctxt->mode == X86EMUL_MODE_VM86)
2512 return emulate_gp(ctxt, 0);
2514 setup_syscalls_segments(ctxt, &cs, &ss);
2516 if ((ctxt->rex_prefix & 0x8) != 0x0)
2517 usermode = X86EMUL_MODE_PROT64;
2519 usermode = X86EMUL_MODE_PROT32;
2521 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2522 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2526 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2528 case X86EMUL_MODE_PROT32:
2529 cs_sel = (u16)(msr_data + 16);
2530 if ((msr_data & 0xfffc) == 0x0)
2531 return emulate_gp(ctxt, 0);
2532 ss_sel = (u16)(msr_data + 24);
2534 case X86EMUL_MODE_PROT64:
2535 cs_sel = (u16)(msr_data + 32);
2536 if (msr_data == 0x0)
2537 return emulate_gp(ctxt, 0);
2538 ss_sel = cs_sel + 8;
2541 if (is_noncanonical_address(rcx) ||
2542 is_noncanonical_address(rdx))
2543 return emulate_gp(ctxt, 0);
2546 cs_sel |= SELECTOR_RPL_MASK;
2547 ss_sel |= SELECTOR_RPL_MASK;
2549 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2550 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2553 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2555 return X86EMUL_CONTINUE;
2558 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2561 if (ctxt->mode == X86EMUL_MODE_REAL)
2563 if (ctxt->mode == X86EMUL_MODE_VM86)
2565 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2566 return ctxt->ops->cpl(ctxt) > iopl;
2569 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2572 const struct x86_emulate_ops *ops = ctxt->ops;
2573 struct desc_struct tr_seg;
2576 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2577 unsigned mask = (1 << len) - 1;
2580 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2583 if (desc_limit_scaled(&tr_seg) < 103)
2585 base = get_desc_base(&tr_seg);
2586 #ifdef CONFIG_X86_64
2587 base |= ((u64)base3) << 32;
2589 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2590 if (r != X86EMUL_CONTINUE)
2592 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2594 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2595 if (r != X86EMUL_CONTINUE)
2597 if ((perm >> bit_idx) & mask)
2602 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2608 if (emulator_bad_iopl(ctxt))
2609 if (!emulator_io_port_access_allowed(ctxt, port, len))
2612 ctxt->perm_ok = true;
2617 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2618 struct tss_segment_16 *tss)
2620 tss->ip = ctxt->_eip;
2621 tss->flag = ctxt->eflags;
2622 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2623 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2624 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2625 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2626 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2627 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2628 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2629 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2631 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2632 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2633 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2634 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2635 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2638 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2639 struct tss_segment_16 *tss)
2643 ctxt->_eip = tss->ip;
2644 ctxt->eflags = tss->flag | 2;
2645 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2646 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2647 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2648 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2649 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2650 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2651 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2652 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2655 * SDM says that segment selectors are loaded before segment
2658 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2659 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2660 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2661 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2662 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2665 * Now load segment descriptors. If fault happens at this stage
2666 * it is handled in a context of new task
2668 ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
2669 if (ret != X86EMUL_CONTINUE)
2671 ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
2672 if (ret != X86EMUL_CONTINUE)
2674 ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
2675 if (ret != X86EMUL_CONTINUE)
2677 ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
2678 if (ret != X86EMUL_CONTINUE)
2680 ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
2681 if (ret != X86EMUL_CONTINUE)
2684 return X86EMUL_CONTINUE;
2687 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2688 u16 tss_selector, u16 old_tss_sel,
2689 ulong old_tss_base, struct desc_struct *new_desc)
2691 const struct x86_emulate_ops *ops = ctxt->ops;
2692 struct tss_segment_16 tss_seg;
2694 u32 new_tss_base = get_desc_base(new_desc);
2696 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2698 if (ret != X86EMUL_CONTINUE)
2699 /* FIXME: need to provide precise fault address */
2702 save_state_to_tss16(ctxt, &tss_seg);
2704 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2706 if (ret != X86EMUL_CONTINUE)
2707 /* FIXME: need to provide precise fault address */
2710 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2712 if (ret != X86EMUL_CONTINUE)
2713 /* FIXME: need to provide precise fault address */
2716 if (old_tss_sel != 0xffff) {
2717 tss_seg.prev_task_link = old_tss_sel;
2719 ret = ops->write_std(ctxt, new_tss_base,
2720 &tss_seg.prev_task_link,
2721 sizeof tss_seg.prev_task_link,
2723 if (ret != X86EMUL_CONTINUE)
2724 /* FIXME: need to provide precise fault address */
2728 return load_state_from_tss16(ctxt, &tss_seg);
2731 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2732 struct tss_segment_32 *tss)
2734 tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
2735 tss->eip = ctxt->_eip;
2736 tss->eflags = ctxt->eflags;
2737 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2738 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2739 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2740 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2741 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2742 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2743 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2744 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2746 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2747 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2748 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2749 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2750 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2751 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2752 tss->ldt_selector = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2755 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2756 struct tss_segment_32 *tss)
2760 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2761 return emulate_gp(ctxt, 0);
2762 ctxt->_eip = tss->eip;
2763 ctxt->eflags = tss->eflags | 2;
2765 /* General purpose registers */
2766 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2767 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2768 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2769 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2770 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2771 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2772 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2773 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2776 * SDM says that segment selectors are loaded before segment
2779 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2780 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2781 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2782 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2783 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2784 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2785 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2788 * If we're switching between Protected Mode and VM86, we need to make
2789 * sure to update the mode before loading the segment descriptors so
2790 * that the selectors are interpreted correctly.
2792 * Need to get rflags to the vcpu struct immediately because it
2793 * influences the CPL which is checked at least when loading the segment
2794 * descriptors and when pushing an error code to the new kernel stack.
2796 * TODO Introduce a separate ctxt->ops->set_cpl callback
2798 if (ctxt->eflags & X86_EFLAGS_VM)
2799 ctxt->mode = X86EMUL_MODE_VM86;
2801 ctxt->mode = X86EMUL_MODE_PROT32;
2803 ctxt->ops->set_rflags(ctxt, ctxt->eflags);
2806 * Now load segment descriptors. If fault happenes at this stage
2807 * it is handled in a context of new task
2809 ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2810 if (ret != X86EMUL_CONTINUE)
2812 ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
2813 if (ret != X86EMUL_CONTINUE)
2815 ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
2816 if (ret != X86EMUL_CONTINUE)
2818 ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
2819 if (ret != X86EMUL_CONTINUE)
2821 ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
2822 if (ret != X86EMUL_CONTINUE)
2824 ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
2825 if (ret != X86EMUL_CONTINUE)
2827 ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
2828 if (ret != X86EMUL_CONTINUE)
2831 return X86EMUL_CONTINUE;
2834 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2835 u16 tss_selector, u16 old_tss_sel,
2836 ulong old_tss_base, struct desc_struct *new_desc)
2838 const struct x86_emulate_ops *ops = ctxt->ops;
2839 struct tss_segment_32 tss_seg;
2841 u32 new_tss_base = get_desc_base(new_desc);
2843 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2845 if (ret != X86EMUL_CONTINUE)
2846 /* FIXME: need to provide precise fault address */
2849 save_state_to_tss32(ctxt, &tss_seg);
2851 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2853 if (ret != X86EMUL_CONTINUE)
2854 /* FIXME: need to provide precise fault address */
2857 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2859 if (ret != X86EMUL_CONTINUE)
2860 /* FIXME: need to provide precise fault address */
2863 if (old_tss_sel != 0xffff) {
2864 tss_seg.prev_task_link = old_tss_sel;
2866 ret = ops->write_std(ctxt, new_tss_base,
2867 &tss_seg.prev_task_link,
2868 sizeof tss_seg.prev_task_link,
2870 if (ret != X86EMUL_CONTINUE)
2871 /* FIXME: need to provide precise fault address */
2875 return load_state_from_tss32(ctxt, &tss_seg);
2878 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2879 u16 tss_selector, int idt_index, int reason,
2880 bool has_error_code, u32 error_code)
2882 const struct x86_emulate_ops *ops = ctxt->ops;
2883 struct desc_struct curr_tss_desc, next_tss_desc;
2885 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2886 ulong old_tss_base =
2887 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2891 /* FIXME: old_tss_base == ~0 ? */
2893 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2894 if (ret != X86EMUL_CONTINUE)
2896 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2897 if (ret != X86EMUL_CONTINUE)
2900 /* FIXME: check that next_tss_desc is tss */
2903 * Check privileges. The three cases are task switch caused by...
2905 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2906 * 2. Exception/IRQ/iret: No check is performed
2907 * 3. jmp/call to TSS: Check against DPL of the TSS
2909 if (reason == TASK_SWITCH_GATE) {
2910 if (idt_index != -1) {
2911 /* Software interrupts */
2912 struct desc_struct task_gate_desc;
2915 ret = read_interrupt_descriptor(ctxt, idt_index,
2917 if (ret != X86EMUL_CONTINUE)
2920 dpl = task_gate_desc.dpl;
2921 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2922 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2924 } else if (reason != TASK_SWITCH_IRET) {
2925 int dpl = next_tss_desc.dpl;
2926 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2927 return emulate_gp(ctxt, tss_selector);
2931 desc_limit = desc_limit_scaled(&next_tss_desc);
2932 if (!next_tss_desc.p ||
2933 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2934 desc_limit < 0x2b)) {
2935 emulate_ts(ctxt, tss_selector & 0xfffc);
2936 return X86EMUL_PROPAGATE_FAULT;
2939 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2940 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2941 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2944 if (reason == TASK_SWITCH_IRET)
2945 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2947 /* set back link to prev task only if NT bit is set in eflags
2948 note that old_tss_sel is not used after this point */
2949 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2950 old_tss_sel = 0xffff;
2952 if (next_tss_desc.type & 8)
2953 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
2954 old_tss_base, &next_tss_desc);
2956 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
2957 old_tss_base, &next_tss_desc);
2958 if (ret != X86EMUL_CONTINUE)
2961 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2962 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2964 if (reason != TASK_SWITCH_IRET) {
2965 next_tss_desc.type |= (1 << 1); /* set busy flag */
2966 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
2969 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
2970 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
2972 if (has_error_code) {
2973 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2974 ctxt->lock_prefix = 0;
2975 ctxt->src.val = (unsigned long) error_code;
2976 ret = em_push(ctxt);
2982 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2983 u16 tss_selector, int idt_index, int reason,
2984 bool has_error_code, u32 error_code)
2988 invalidate_registers(ctxt);
2989 ctxt->_eip = ctxt->eip;
2990 ctxt->dst.type = OP_NONE;
2992 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
2993 has_error_code, error_code);
2995 if (rc == X86EMUL_CONTINUE) {
2996 ctxt->eip = ctxt->_eip;
2997 writeback_registers(ctxt);
3000 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3003 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3006 int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
3008 register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
3009 op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
3012 static int em_das(struct x86_emulate_ctxt *ctxt)
3015 bool af, cf, old_cf;
3017 cf = ctxt->eflags & X86_EFLAGS_CF;
3023 af = ctxt->eflags & X86_EFLAGS_AF;
3024 if ((al & 0x0f) > 9 || af) {
3026 cf = old_cf | (al >= 250);
3031 if (old_al > 0x99 || old_cf) {
3037 /* Set PF, ZF, SF */
3038 ctxt->src.type = OP_IMM;
3040 ctxt->src.bytes = 1;
3041 fastop(ctxt, em_or);
3042 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3044 ctxt->eflags |= X86_EFLAGS_CF;
3046 ctxt->eflags |= X86_EFLAGS_AF;
3047 return X86EMUL_CONTINUE;
3050 static int em_aam(struct x86_emulate_ctxt *ctxt)
3054 if (ctxt->src.val == 0)
3055 return emulate_de(ctxt);
3057 al = ctxt->dst.val & 0xff;
3058 ah = al / ctxt->src.val;
3059 al %= ctxt->src.val;
3061 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3063 /* Set PF, ZF, SF */
3064 ctxt->src.type = OP_IMM;
3066 ctxt->src.bytes = 1;
3067 fastop(ctxt, em_or);
3069 return X86EMUL_CONTINUE;
3072 static int em_aad(struct x86_emulate_ctxt *ctxt)
3074 u8 al = ctxt->dst.val & 0xff;
3075 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3077 al = (al + (ah * ctxt->src.val)) & 0xff;
3079 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3081 /* Set PF, ZF, SF */
3082 ctxt->src.type = OP_IMM;
3084 ctxt->src.bytes = 1;
3085 fastop(ctxt, em_or);
3087 return X86EMUL_CONTINUE;
3090 static int em_call(struct x86_emulate_ctxt *ctxt)
3093 long rel = ctxt->src.val;
3095 ctxt->src.val = (unsigned long)ctxt->_eip;
3096 rc = jmp_rel(ctxt, rel);
3097 if (rc != X86EMUL_CONTINUE)
3099 return em_push(ctxt);
3102 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3108 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3109 old_eip = ctxt->_eip;
3111 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3112 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
3113 return X86EMUL_CONTINUE;
3116 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
3118 ctxt->src.val = old_cs;
3120 if (rc != X86EMUL_CONTINUE)
3123 ctxt->src.val = old_eip;
3124 return em_push(ctxt);
3127 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3132 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3133 if (rc != X86EMUL_CONTINUE)
3135 rc = assign_eip_near(ctxt, eip);
3136 if (rc != X86EMUL_CONTINUE)
3138 rsp_increment(ctxt, ctxt->src.val);
3139 return X86EMUL_CONTINUE;
3142 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3144 /* Write back the register source. */
3145 ctxt->src.val = ctxt->dst.val;
3146 write_register_operand(&ctxt->src);
3148 /* Write back the memory destination with implicit LOCK prefix. */
3149 ctxt->dst.val = ctxt->src.orig_val;
3150 ctxt->lock_prefix = 1;
3151 return X86EMUL_CONTINUE;
3154 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3156 ctxt->dst.val = ctxt->src2.val;
3157 return fastop(ctxt, em_imul);
3160 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3162 ctxt->dst.type = OP_REG;
3163 ctxt->dst.bytes = ctxt->src.bytes;
3164 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3165 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3167 return X86EMUL_CONTINUE;
3170 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3174 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3175 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3176 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3177 return X86EMUL_CONTINUE;
3180 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3184 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3185 return emulate_gp(ctxt, 0);
3186 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3187 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3188 return X86EMUL_CONTINUE;
3191 static int em_mov(struct x86_emulate_ctxt *ctxt)
3193 memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
3194 return X86EMUL_CONTINUE;
3197 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3199 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3200 return emulate_gp(ctxt, 0);
3202 /* Disable writeback. */
3203 ctxt->dst.type = OP_NONE;
3204 return X86EMUL_CONTINUE;
3207 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3211 if (ctxt->mode == X86EMUL_MODE_PROT64)
3212 val = ctxt->src.val & ~0ULL;
3214 val = ctxt->src.val & ~0U;
3216 /* #UD condition is already handled. */
3217 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3218 return emulate_gp(ctxt, 0);
3220 /* Disable writeback. */
3221 ctxt->dst.type = OP_NONE;
3222 return X86EMUL_CONTINUE;
3225 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3229 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3230 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3231 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3232 return emulate_gp(ctxt, 0);
3234 return X86EMUL_CONTINUE;
3237 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3241 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3242 return emulate_gp(ctxt, 0);
3244 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3245 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3246 return X86EMUL_CONTINUE;
3249 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3251 if (ctxt->modrm_reg > VCPU_SREG_GS)
3252 return emulate_ud(ctxt);
3254 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3255 return X86EMUL_CONTINUE;
3258 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3260 u16 sel = ctxt->src.val;
3262 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3263 return emulate_ud(ctxt);
3265 if (ctxt->modrm_reg == VCPU_SREG_SS)
3266 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3268 /* Disable writeback. */
3269 ctxt->dst.type = OP_NONE;
3270 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3273 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3275 u16 sel = ctxt->src.val;
3277 /* Disable writeback. */
3278 ctxt->dst.type = OP_NONE;
3279 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3282 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3284 u16 sel = ctxt->src.val;
3286 /* Disable writeback. */
3287 ctxt->dst.type = OP_NONE;
3288 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3291 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3296 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3297 if (rc == X86EMUL_CONTINUE)
3298 ctxt->ops->invlpg(ctxt, linear);
3299 /* Disable writeback. */
3300 ctxt->dst.type = OP_NONE;
3301 return X86EMUL_CONTINUE;
3304 static int em_clts(struct x86_emulate_ctxt *ctxt)
3308 cr0 = ctxt->ops->get_cr(ctxt, 0);
3310 ctxt->ops->set_cr(ctxt, 0, cr0);
3311 return X86EMUL_CONTINUE;
3314 static int em_vmcall(struct x86_emulate_ctxt *ctxt)
3318 if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
3319 return X86EMUL_UNHANDLEABLE;
3321 rc = ctxt->ops->fix_hypercall(ctxt);
3322 if (rc != X86EMUL_CONTINUE)
3325 /* Let the processor re-execute the fixed hypercall */
3326 ctxt->_eip = ctxt->eip;
3327 /* Disable writeback. */
3328 ctxt->dst.type = OP_NONE;
3329 return X86EMUL_CONTINUE;
3332 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3333 void (*get)(struct x86_emulate_ctxt *ctxt,
3334 struct desc_ptr *ptr))
3336 struct desc_ptr desc_ptr;
3338 if (ctxt->mode == X86EMUL_MODE_PROT64)
3340 get(ctxt, &desc_ptr);
3341 if (ctxt->op_bytes == 2) {
3343 desc_ptr.address &= 0x00ffffff;
3345 /* Disable writeback. */
3346 ctxt->dst.type = OP_NONE;
3347 return segmented_write(ctxt, ctxt->dst.addr.mem,
3348 &desc_ptr, 2 + ctxt->op_bytes);
3351 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3353 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3356 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3358 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3361 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3363 struct desc_ptr desc_ptr;
3366 if (ctxt->mode == X86EMUL_MODE_PROT64)
3368 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3369 &desc_ptr.size, &desc_ptr.address,
3371 if (rc != X86EMUL_CONTINUE)
3373 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3374 /* Disable writeback. */
3375 ctxt->dst.type = OP_NONE;
3376 return X86EMUL_CONTINUE;
3379 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
3383 rc = ctxt->ops->fix_hypercall(ctxt);
3385 /* Disable writeback. */
3386 ctxt->dst.type = OP_NONE;
3390 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3392 struct desc_ptr desc_ptr;
3395 if (ctxt->mode == X86EMUL_MODE_PROT64)
3397 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3398 &desc_ptr.size, &desc_ptr.address,
3400 if (rc != X86EMUL_CONTINUE)
3402 ctxt->ops->set_idt(ctxt, &desc_ptr);
3403 /* Disable writeback. */
3404 ctxt->dst.type = OP_NONE;
3405 return X86EMUL_CONTINUE;
3408 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3410 ctxt->dst.bytes = 2;
3411 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3412 return X86EMUL_CONTINUE;
3415 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3417 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3418 | (ctxt->src.val & 0x0f));
3419 ctxt->dst.type = OP_NONE;
3420 return X86EMUL_CONTINUE;
3423 static int em_loop(struct x86_emulate_ctxt *ctxt)
3425 int rc = X86EMUL_CONTINUE;
3427 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3428 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3429 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3430 rc = jmp_rel(ctxt, ctxt->src.val);
3435 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3437 int rc = X86EMUL_CONTINUE;
3439 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3440 rc = jmp_rel(ctxt, ctxt->src.val);
3445 static int em_in(struct x86_emulate_ctxt *ctxt)
3447 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3449 return X86EMUL_IO_NEEDED;
3451 return X86EMUL_CONTINUE;
3454 static int em_out(struct x86_emulate_ctxt *ctxt)
3456 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3458 /* Disable writeback. */
3459 ctxt->dst.type = OP_NONE;
3460 return X86EMUL_CONTINUE;
3463 static int em_cli(struct x86_emulate_ctxt *ctxt)
3465 if (emulator_bad_iopl(ctxt))
3466 return emulate_gp(ctxt, 0);
3468 ctxt->eflags &= ~X86_EFLAGS_IF;
3469 return X86EMUL_CONTINUE;
3472 static int em_sti(struct x86_emulate_ctxt *ctxt)
3474 if (emulator_bad_iopl(ctxt))
3475 return emulate_gp(ctxt, 0);
3477 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3478 ctxt->eflags |= X86_EFLAGS_IF;
3479 return X86EMUL_CONTINUE;
3482 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3484 u32 eax, ebx, ecx, edx;
3486 eax = reg_read(ctxt, VCPU_REGS_RAX);
3487 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3488 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3489 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3490 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3491 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3492 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3493 return X86EMUL_CONTINUE;
3496 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3498 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3499 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3500 return X86EMUL_CONTINUE;
3503 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3505 switch (ctxt->op_bytes) {
3506 #ifdef CONFIG_X86_64
3508 asm("bswap %0" : "+r"(ctxt->dst.val));
3512 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3515 return X86EMUL_CONTINUE;
3518 static bool valid_cr(int nr)
3530 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
3532 if (!valid_cr(ctxt->modrm_reg))
3533 return emulate_ud(ctxt);
3535 return X86EMUL_CONTINUE;
3538 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3540 u64 new_val = ctxt->src.val64;
3541 int cr = ctxt->modrm_reg;
3544 static u64 cr_reserved_bits[] = {
3545 0xffffffff00000000ULL,
3546 0, 0, 0, /* CR3 checked later */
3553 return emulate_ud(ctxt);
3555 if (new_val & cr_reserved_bits[cr])
3556 return emulate_gp(ctxt, 0);
3561 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
3562 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
3563 return emulate_gp(ctxt, 0);
3565 cr4 = ctxt->ops->get_cr(ctxt, 4);
3566 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3568 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
3569 !(cr4 & X86_CR4_PAE))
3570 return emulate_gp(ctxt, 0);
3577 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3578 if (efer & EFER_LMA)
3579 rsvd = CR3_L_MODE_RESERVED_BITS;
3580 else if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PAE)
3581 rsvd = CR3_PAE_RESERVED_BITS;
3582 else if (ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PG)
3583 rsvd = CR3_NONPAE_RESERVED_BITS;
3586 return emulate_gp(ctxt, 0);
3591 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3593 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
3594 return emulate_gp(ctxt, 0);
3600 return X86EMUL_CONTINUE;
3603 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3607 ctxt->ops->get_dr(ctxt, 7, &dr7);
3609 /* Check if DR7.Global_Enable is set */
3610 return dr7 & (1 << 13);
3613 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3615 int dr = ctxt->modrm_reg;
3619 return emulate_ud(ctxt);
3621 cr4 = ctxt->ops->get_cr(ctxt, 4);
3622 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3623 return emulate_ud(ctxt);
3625 if (check_dr7_gd(ctxt))
3626 return emulate_db(ctxt);
3628 return X86EMUL_CONTINUE;
3631 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3633 u64 new_val = ctxt->src.val64;
3634 int dr = ctxt->modrm_reg;
3636 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3637 return emulate_gp(ctxt, 0);
3639 return check_dr_read(ctxt);
3642 static int check_svme(struct x86_emulate_ctxt *ctxt)
3646 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3648 if (!(efer & EFER_SVME))
3649 return emulate_ud(ctxt);
3651 return X86EMUL_CONTINUE;
3654 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3656 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3658 /* Valid physical address? */
3659 if (rax & 0xffff000000000000ULL)
3660 return emulate_gp(ctxt, 0);
3662 return check_svme(ctxt);
3665 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3667 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3669 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3670 return emulate_ud(ctxt);
3672 return X86EMUL_CONTINUE;
3675 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3677 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3678 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3680 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3682 return emulate_gp(ctxt, 0);
3684 return X86EMUL_CONTINUE;
3687 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3689 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3690 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
3691 return emulate_gp(ctxt, 0);
3693 return X86EMUL_CONTINUE;
3696 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3698 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3699 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
3700 return emulate_gp(ctxt, 0);
3702 return X86EMUL_CONTINUE;
3705 #define D(_y) { .flags = (_y) }
3706 #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
3707 #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
3708 .check_perm = (_p) }
3709 #define N D(NotImpl)
3710 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3711 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3712 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3713 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3714 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3715 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3716 #define II(_f, _e, _i) \
3717 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3718 #define IIP(_f, _e, _i, _p) \
3719 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
3720 .check_perm = (_p) }
3721 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3723 #define D2bv(_f) D((_f) | ByteOp), D(_f)
3724 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
3725 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
3726 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
3727 #define I2bvIP(_f, _e, _i, _p) \
3728 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
3730 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
3731 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
3732 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
3734 static const struct opcode group7_rm1[] = {
3735 DI(SrcNone | Priv, monitor),
3736 DI(SrcNone | Priv, mwait),
3740 static const struct opcode group7_rm3[] = {
3741 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
3742 II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall),
3743 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
3744 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
3745 DIP(SrcNone | Prot | Priv, stgi, check_svme),
3746 DIP(SrcNone | Prot | Priv, clgi, check_svme),
3747 DIP(SrcNone | Prot | Priv, skinit, check_svme),
3748 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
3751 static const struct opcode group7_rm7[] = {
3753 DIP(SrcNone, rdtscp, check_rdtsc),
3757 static const struct opcode group1[] = {
3759 F(Lock | PageTable, em_or),
3762 F(Lock | PageTable, em_and),
3768 static const struct opcode group1A[] = {
3769 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
3772 static const struct opcode group2[] = {
3773 F(DstMem | ModRM, em_rol),
3774 F(DstMem | ModRM, em_ror),
3775 F(DstMem | ModRM, em_rcl),
3776 F(DstMem | ModRM, em_rcr),
3777 F(DstMem | ModRM, em_shl),
3778 F(DstMem | ModRM, em_shr),
3779 F(DstMem | ModRM, em_shl),
3780 F(DstMem | ModRM, em_sar),
3783 static const struct opcode group3[] = {
3784 F(DstMem | SrcImm | NoWrite, em_test),
3785 F(DstMem | SrcImm | NoWrite, em_test),
3786 F(DstMem | SrcNone | Lock, em_not),
3787 F(DstMem | SrcNone | Lock, em_neg),
3788 I(SrcMem, em_mul_ex),
3789 I(SrcMem, em_imul_ex),
3790 I(SrcMem, em_div_ex),
3791 I(SrcMem, em_idiv_ex),
3794 static const struct opcode group4[] = {
3795 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
3796 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
3800 static const struct opcode group5[] = {
3801 F(DstMem | SrcNone | Lock, em_inc),
3802 F(DstMem | SrcNone | Lock, em_dec),
3803 I(SrcMem | Stack, em_grp45),
3804 I(SrcMemFAddr | ImplicitOps | Stack, em_call_far),
3805 I(SrcMem | Stack, em_grp45),
3806 I(SrcMemFAddr | ImplicitOps, em_grp45),
3807 I(SrcMem | Stack, em_grp45), D(Undefined),
3810 static const struct opcode group6[] = {
3813 II(Prot | Priv | SrcMem16, em_lldt, lldt),
3814 II(Prot | Priv | SrcMem16, em_ltr, ltr),
3818 static const struct group_dual group7 = { {
3819 II(Mov | DstMem | Priv, em_sgdt, sgdt),
3820 II(Mov | DstMem | Priv, em_sidt, sidt),
3821 II(SrcMem | Priv, em_lgdt, lgdt),
3822 II(SrcMem | Priv, em_lidt, lidt),
3823 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
3824 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3825 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
3827 I(SrcNone | Priv | VendorSpecific, em_vmcall),
3829 N, EXT(0, group7_rm3),
3830 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
3831 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
3835 static const struct opcode group8[] = {
3837 F(DstMem | SrcImmByte | NoWrite, em_bt),
3838 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
3839 F(DstMem | SrcImmByte | Lock, em_btr),
3840 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
3843 static const struct group_dual group9 = { {
3844 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
3846 N, N, N, N, N, N, N, N,
3849 static const struct opcode group11[] = {
3850 I(DstMem | SrcImm | Mov | PageTable, em_mov),
3854 static const struct gprefix pfx_0f_6f_0f_7f = {
3855 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3858 static const struct gprefix pfx_vmovntpx = {
3859 I(0, em_mov), N, N, N,
3862 static const struct escape escape_d9 = { {
3863 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3866 N, N, N, N, N, N, N, N,
3868 N, N, N, N, N, N, N, N,
3870 N, N, N, N, N, N, N, N,
3872 N, N, N, N, N, N, N, N,
3874 N, N, N, N, N, N, N, N,
3876 N, N, N, N, N, N, N, N,
3878 N, N, N, N, N, N, N, N,
3880 N, N, N, N, N, N, N, N,
3883 static const struct escape escape_db = { {
3884 N, N, N, N, N, N, N, N,
3887 N, N, N, N, N, N, N, N,
3889 N, N, N, N, N, N, N, N,
3891 N, N, N, N, N, N, N, N,
3893 N, N, N, N, N, N, N, N,
3895 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
3897 N, N, N, N, N, N, N, N,
3899 N, N, N, N, N, N, N, N,
3901 N, N, N, N, N, N, N, N,
3904 static const struct escape escape_dd = { {
3905 N, N, N, N, N, N, N, I(DstMem, em_fnstsw),
3908 N, N, N, N, N, N, N, N,
3910 N, N, N, N, N, N, N, N,
3912 N, N, N, N, N, N, N, N,
3914 N, N, N, N, N, N, N, N,
3916 N, N, N, N, N, N, N, N,
3918 N, N, N, N, N, N, N, N,
3920 N, N, N, N, N, N, N, N,
3922 N, N, N, N, N, N, N, N,
3925 static const struct opcode opcode_table[256] = {
3927 F6ALU(Lock, em_add),
3928 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
3929 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
3931 F6ALU(Lock | PageTable, em_or),
3932 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
3935 F6ALU(Lock, em_adc),
3936 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
3937 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
3939 F6ALU(Lock, em_sbb),
3940 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
3941 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
3943 F6ALU(Lock | PageTable, em_and), N, N,
3945 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
3947 F6ALU(Lock, em_xor), N, N,
3949 F6ALU(NoWrite, em_cmp), N, N,
3951 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
3953 X8(I(SrcReg | Stack, em_push)),
3955 X8(I(DstReg | Stack, em_pop)),
3957 I(ImplicitOps | Stack | No64, em_pusha),
3958 I(ImplicitOps | Stack | No64, em_popa),
3959 N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
3962 I(SrcImm | Mov | Stack, em_push),
3963 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
3964 I(SrcImmByte | Mov | Stack, em_push),
3965 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
3966 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
3967 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
3971 G(ByteOp | DstMem | SrcImm, group1),
3972 G(DstMem | SrcImm, group1),
3973 G(ByteOp | DstMem | SrcImm | No64, group1),
3974 G(DstMem | SrcImmByte, group1),
3975 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
3976 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
3978 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
3979 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
3980 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
3981 D(ModRM | SrcMem | NoAccess | DstReg),
3982 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
3985 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
3987 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
3988 I(SrcImmFAddr | No64, em_call_far), N,
3989 II(ImplicitOps | Stack, em_pushf, pushf),
3990 II(ImplicitOps | Stack, em_popf, popf), N, I(ImplicitOps, em_lahf),
3992 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
3993 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
3994 I2bv(SrcSI | DstDI | Mov | String, em_mov),
3995 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
3997 F2bv(DstAcc | SrcImm | NoWrite, em_test),
3998 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
3999 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4000 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
4002 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4004 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4006 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4007 I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
4008 I(ImplicitOps | Stack, em_ret),
4009 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4010 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4011 G(ByteOp, group11), G(0, group11),
4013 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4014 N, I(ImplicitOps | Stack, em_ret_far),
4015 D(ImplicitOps), DI(SrcImmByte, intn),
4016 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4018 G(Src2One | ByteOp, group2), G(Src2One, group2),
4019 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4020 I(DstAcc | SrcImmUByte | No64, em_aam),
4021 I(DstAcc | SrcImmUByte | No64, em_aad),
4022 F(DstAcc | ByteOp | No64, em_salc),
4023 I(DstAcc | SrcXLat | ByteOp, em_mov),
4025 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4027 X3(I(SrcImmByte, em_loop)),
4028 I(SrcImmByte, em_jcxz),
4029 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4030 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4032 I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
4033 I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
4034 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4035 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4037 N, DI(ImplicitOps, icebp), N, N,
4038 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4039 G(ByteOp, group3), G(0, group3),
4041 D(ImplicitOps), D(ImplicitOps),
4042 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4043 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4046 static const struct opcode twobyte_table[256] = {
4048 G(0, group6), GD(0, &group7), N, N,
4049 N, I(ImplicitOps | VendorSpecific, em_syscall),
4050 II(ImplicitOps | Priv, em_clts, clts), N,
4051 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4052 N, D(ImplicitOps | ModRM), N, N,
4054 N, N, N, N, N, N, N, N,
4055 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM),
4057 DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read),
4058 DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read),
4059 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
4060 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
4062 N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
4065 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4066 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4067 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4068 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4069 I(ImplicitOps | VendorSpecific, em_sysenter),
4070 I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
4072 N, N, N, N, N, N, N, N,
4074 X16(D(DstReg | SrcMem | ModRM | Mov)),
4076 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4081 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4086 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4090 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4092 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4093 II(ImplicitOps, em_cpuid, cpuid),
4094 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4095 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4096 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4098 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4099 DI(ImplicitOps, rsm),
4100 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4101 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4102 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4103 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul),
4105 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
4106 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4107 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4108 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4109 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4110 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4114 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4115 F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
4116 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4118 D2bv(DstMem | SrcReg | ModRM | Lock),
4119 N, D(DstMem | SrcReg | ModRM | Mov),
4120 N, N, N, GD(0, &group9),
4122 X8(I(DstReg, em_bswap)),
4124 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4126 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4128 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4145 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4149 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4155 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4156 unsigned size, bool sign_extension)
4158 int rc = X86EMUL_CONTINUE;
4162 op->addr.mem.ea = ctxt->_eip;
4163 /* NB. Immediates are sign-extended as necessary. */
4164 switch (op->bytes) {
4166 op->val = insn_fetch(s8, ctxt);
4169 op->val = insn_fetch(s16, ctxt);
4172 op->val = insn_fetch(s32, ctxt);
4175 op->val = insn_fetch(s64, ctxt);
4178 if (!sign_extension) {
4179 switch (op->bytes) {
4187 op->val &= 0xffffffff;
4195 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4198 int rc = X86EMUL_CONTINUE;
4202 decode_register_operand(ctxt, op);
4205 rc = decode_imm(ctxt, op, 1, false);
4208 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4212 if ((ctxt->d & BitOp) && op == &ctxt->dst)
4213 fetch_bit_operand(ctxt);
4214 op->orig_val = op->val;
4217 ctxt->memop.bytes = 8;
4221 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4222 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4223 fetch_register_operand(op);
4224 op->orig_val = op->val;
4228 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4230 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
4231 op->addr.mem.seg = VCPU_SREG_ES;
4238 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4239 fetch_register_operand(op);
4243 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4246 rc = decode_imm(ctxt, op, 1, true);
4253 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4256 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4259 ctxt->memop.bytes = 1;
4260 if (ctxt->memop.type == OP_REG) {
4261 int highbyte_regs = ctxt->rex_prefix == 0;
4263 ctxt->memop.addr.reg = decode_register(ctxt, ctxt->modrm_rm,
4265 fetch_register_operand(&ctxt->memop);
4269 ctxt->memop.bytes = 2;
4272 ctxt->memop.bytes = 4;
4275 rc = decode_imm(ctxt, op, 2, false);
4278 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4282 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4284 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
4285 op->addr.mem.seg = seg_override(ctxt);
4291 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4293 register_address(ctxt,
4294 reg_read(ctxt, VCPU_REGS_RBX) +
4295 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4296 op->addr.mem.seg = seg_override(ctxt);
4301 op->addr.mem.ea = ctxt->_eip;
4302 op->bytes = ctxt->op_bytes + 2;
4303 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4306 ctxt->memop.bytes = ctxt->op_bytes + 2;
4309 op->val = VCPU_SREG_ES;
4312 op->val = VCPU_SREG_CS;
4315 op->val = VCPU_SREG_SS;
4318 op->val = VCPU_SREG_DS;
4321 op->val = VCPU_SREG_FS;
4324 op->val = VCPU_SREG_GS;
4327 /* Special instructions do their own operand decoding. */
4329 op->type = OP_NONE; /* Disable writeback. */
4337 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4339 int rc = X86EMUL_CONTINUE;
4340 int mode = ctxt->mode;
4341 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4342 bool op_prefix = false;
4343 struct opcode opcode;
4345 ctxt->memop.type = OP_NONE;
4346 ctxt->memopp = NULL;
4347 ctxt->_eip = ctxt->eip;
4348 ctxt->fetch.start = ctxt->_eip;
4349 ctxt->fetch.end = ctxt->fetch.start + insn_len;
4351 memcpy(ctxt->fetch.data, insn, insn_len);
4354 case X86EMUL_MODE_REAL:
4355 case X86EMUL_MODE_VM86:
4356 case X86EMUL_MODE_PROT16:
4357 def_op_bytes = def_ad_bytes = 2;
4359 case X86EMUL_MODE_PROT32:
4360 def_op_bytes = def_ad_bytes = 4;
4362 #ifdef CONFIG_X86_64
4363 case X86EMUL_MODE_PROT64:
4369 return EMULATION_FAILED;
4372 ctxt->op_bytes = def_op_bytes;
4373 ctxt->ad_bytes = def_ad_bytes;
4375 /* Legacy prefixes. */
4377 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4378 case 0x66: /* operand-size override */
4380 /* switch between 2/4 bytes */
4381 ctxt->op_bytes = def_op_bytes ^ 6;
4383 case 0x67: /* address-size override */
4384 if (mode == X86EMUL_MODE_PROT64)
4385 /* switch between 4/8 bytes */
4386 ctxt->ad_bytes = def_ad_bytes ^ 12;
4388 /* switch between 2/4 bytes */
4389 ctxt->ad_bytes = def_ad_bytes ^ 6;
4391 case 0x26: /* ES override */
4392 case 0x2e: /* CS override */
4393 case 0x36: /* SS override */
4394 case 0x3e: /* DS override */
4395 set_seg_override(ctxt, (ctxt->b >> 3) & 3);
4397 case 0x64: /* FS override */
4398 case 0x65: /* GS override */
4399 set_seg_override(ctxt, ctxt->b & 7);
4401 case 0x40 ... 0x4f: /* REX */
4402 if (mode != X86EMUL_MODE_PROT64)
4404 ctxt->rex_prefix = ctxt->b;
4406 case 0xf0: /* LOCK */
4407 ctxt->lock_prefix = 1;
4409 case 0xf2: /* REPNE/REPNZ */
4410 case 0xf3: /* REP/REPE/REPZ */
4411 ctxt->rep_prefix = ctxt->b;
4417 /* Any legacy prefix after a REX prefix nullifies its effect. */
4419 ctxt->rex_prefix = 0;
4425 if (ctxt->rex_prefix & 8)
4426 ctxt->op_bytes = 8; /* REX.W */
4428 /* Opcode byte(s). */
4429 opcode = opcode_table[ctxt->b];
4430 /* Two-byte opcode? */
4431 if (ctxt->b == 0x0f) {
4433 ctxt->b = insn_fetch(u8, ctxt);
4434 opcode = twobyte_table[ctxt->b];
4436 ctxt->d = opcode.flags;
4438 if (ctxt->d & ModRM)
4439 ctxt->modrm = insn_fetch(u8, ctxt);
4441 while (ctxt->d & GroupMask) {
4442 switch (ctxt->d & GroupMask) {
4444 goffset = (ctxt->modrm >> 3) & 7;
4445 opcode = opcode.u.group[goffset];
4448 goffset = (ctxt->modrm >> 3) & 7;
4449 if ((ctxt->modrm >> 6) == 3)
4450 opcode = opcode.u.gdual->mod3[goffset];
4452 opcode = opcode.u.gdual->mod012[goffset];
4455 goffset = ctxt->modrm & 7;
4456 opcode = opcode.u.group[goffset];
4459 if (ctxt->rep_prefix && op_prefix)
4460 return EMULATION_FAILED;
4461 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4462 switch (simd_prefix) {
4463 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4464 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4465 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4466 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4470 if (ctxt->modrm > 0xbf)
4471 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4473 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4476 return EMULATION_FAILED;
4479 ctxt->d &= ~(u64)GroupMask;
4480 ctxt->d |= opcode.flags;
4483 ctxt->execute = opcode.u.execute;
4484 ctxt->check_perm = opcode.check_perm;
4485 ctxt->intercept = opcode.intercept;
4488 if (ctxt->d == 0 || (ctxt->d & NotImpl))
4489 return EMULATION_FAILED;
4491 if (!(ctxt->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
4492 return EMULATION_FAILED;
4494 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
4497 if (ctxt->d & Op3264) {
4498 if (mode == X86EMUL_MODE_PROT64)
4505 ctxt->op_bytes = 16;
4506 else if (ctxt->d & Mmx)
4509 /* ModRM and SIB bytes. */
4510 if (ctxt->d & ModRM) {
4511 rc = decode_modrm(ctxt, &ctxt->memop);
4512 if (!ctxt->has_seg_override)
4513 set_seg_override(ctxt, ctxt->modrm_seg);
4514 } else if (ctxt->d & MemAbs)
4515 rc = decode_abs(ctxt, &ctxt->memop);
4516 if (rc != X86EMUL_CONTINUE)
4519 if (!ctxt->has_seg_override)
4520 set_seg_override(ctxt, VCPU_SREG_DS);
4522 ctxt->memop.addr.mem.seg = seg_override(ctxt);
4524 if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
4525 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
4528 * Decode and fetch the source operand: register, memory
4531 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
4532 if (rc != X86EMUL_CONTINUE)
4536 * Decode and fetch the second source operand: register, memory
4539 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
4540 if (rc != X86EMUL_CONTINUE)
4543 /* Decode and fetch the destination operand: register or memory. */
4544 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4547 if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative)
4548 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4550 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4553 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
4555 return ctxt->d & PageTable;
4558 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
4560 /* The second termination condition only applies for REPE
4561 * and REPNE. Test if the repeat string operation prefix is
4562 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
4563 * corresponding termination condition according to:
4564 * - if REPE/REPZ and ZF = 0 then done
4565 * - if REPNE/REPNZ and ZF = 1 then done
4567 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
4568 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
4569 && (((ctxt->rep_prefix == REPE_PREFIX) &&
4570 ((ctxt->eflags & EFLG_ZF) == 0))
4571 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
4572 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
4578 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
4582 ctxt->ops->get_fpu(ctxt);
4583 asm volatile("1: fwait \n\t"
4585 ".pushsection .fixup,\"ax\" \n\t"
4587 "movb $1, %[fault] \n\t"
4590 _ASM_EXTABLE(1b, 3b)
4591 : [fault]"+qm"(fault));
4592 ctxt->ops->put_fpu(ctxt);
4594 if (unlikely(fault))
4595 return emulate_exception(ctxt, MF_VECTOR, 0, false);
4597 return X86EMUL_CONTINUE;
4600 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4603 if (op->type == OP_MM)
4604 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4607 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4609 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4610 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4611 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4612 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
4613 : "c"(ctxt->src2.val), [fastop]"S"(fop));
4614 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4615 return X86EMUL_CONTINUE;
4618 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4620 const struct x86_emulate_ops *ops = ctxt->ops;
4621 int rc = X86EMUL_CONTINUE;
4622 int saved_dst_type = ctxt->dst.type;
4624 ctxt->mem_read.pos = 0;
4626 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4627 (ctxt->d & Undefined)) {
4628 rc = emulate_ud(ctxt);
4632 /* LOCK prefix is allowed only with some instructions */
4633 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
4634 rc = emulate_ud(ctxt);
4638 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
4639 rc = emulate_ud(ctxt);
4643 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
4644 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
4645 rc = emulate_ud(ctxt);
4649 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
4650 rc = emulate_nm(ctxt);
4654 if (ctxt->d & Mmx) {
4655 rc = flush_pending_x87_faults(ctxt);
4656 if (rc != X86EMUL_CONTINUE)
4659 * Now that we know the fpu is exception safe, we can fetch
4662 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4663 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4664 if (!(ctxt->d & Mov))
4665 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4668 if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
4669 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4670 X86_ICPT_PRE_EXCEPT);
4671 if (rc != X86EMUL_CONTINUE)
4675 /* Privileged instruction can be executed only in CPL=0 */
4676 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4677 rc = emulate_gp(ctxt, 0);
4681 /* Instruction can only be executed in protected mode */
4682 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4683 rc = emulate_ud(ctxt);
4687 /* Do instruction specific permission checks */
4688 if (ctxt->check_perm) {
4689 rc = ctxt->check_perm(ctxt);
4690 if (rc != X86EMUL_CONTINUE)
4694 if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
4695 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4696 X86_ICPT_POST_EXCEPT);
4697 if (rc != X86EMUL_CONTINUE)
4701 if (ctxt->rep_prefix && (ctxt->d & String)) {
4702 /* All REP prefixes have the same first termination condition */
4703 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
4704 ctxt->eip = ctxt->_eip;
4709 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
4710 rc = segmented_read(ctxt, ctxt->src.addr.mem,
4711 ctxt->src.valptr, ctxt->src.bytes);
4712 if (rc != X86EMUL_CONTINUE)
4714 ctxt->src.orig_val64 = ctxt->src.val64;
4717 if (ctxt->src2.type == OP_MEM) {
4718 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
4719 &ctxt->src2.val, ctxt->src2.bytes);
4720 if (rc != X86EMUL_CONTINUE)
4724 if ((ctxt->d & DstMask) == ImplicitOps)
4728 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
4729 /* optimisation - avoid slow emulated read if Mov */
4730 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
4731 &ctxt->dst.val, ctxt->dst.bytes);
4732 if (rc != X86EMUL_CONTINUE)
4735 ctxt->dst.orig_val = ctxt->dst.val;
4739 if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
4740 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4741 X86_ICPT_POST_MEMACCESS);
4742 if (rc != X86EMUL_CONTINUE)
4746 if (ctxt->execute) {
4747 if (ctxt->d & Fastop) {
4748 void (*fop)(struct fastop *) = (void *)ctxt->execute;
4749 rc = fastop(ctxt, fop);
4750 if (rc != X86EMUL_CONTINUE)
4754 rc = ctxt->execute(ctxt);
4755 if (rc != X86EMUL_CONTINUE)
4764 case 0x63: /* movsxd */
4765 if (ctxt->mode != X86EMUL_MODE_PROT64)
4766 goto cannot_emulate;
4767 ctxt->dst.val = (s32) ctxt->src.val;
4769 case 0x70 ... 0x7f: /* jcc (short) */
4770 if (test_cc(ctxt->b, ctxt->eflags))
4771 rc = jmp_rel(ctxt, ctxt->src.val);
4773 case 0x8d: /* lea r16/r32, m */
4774 ctxt->dst.val = ctxt->src.addr.mem.ea;
4776 case 0x90 ... 0x97: /* nop / xchg reg, rax */
4777 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
4781 case 0x98: /* cbw/cwde/cdqe */
4782 switch (ctxt->op_bytes) {
4783 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
4784 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
4785 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
4788 case 0xcc: /* int3 */
4789 rc = emulate_int(ctxt, 3);
4791 case 0xcd: /* int n */
4792 rc = emulate_int(ctxt, ctxt->src.val);
4794 case 0xce: /* into */
4795 if (ctxt->eflags & EFLG_OF)
4796 rc = emulate_int(ctxt, 4);
4798 case 0xe9: /* jmp rel */
4799 case 0xeb: /* jmp rel short */
4800 rc = jmp_rel(ctxt, ctxt->src.val);
4801 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4803 case 0xf4: /* hlt */
4804 ctxt->ops->halt(ctxt);
4806 case 0xf5: /* cmc */
4807 /* complement carry flag from eflags reg */
4808 ctxt->eflags ^= EFLG_CF;
4810 case 0xf8: /* clc */
4811 ctxt->eflags &= ~EFLG_CF;
4813 case 0xf9: /* stc */
4814 ctxt->eflags |= EFLG_CF;
4816 case 0xfc: /* cld */
4817 ctxt->eflags &= ~EFLG_DF;
4819 case 0xfd: /* std */
4820 ctxt->eflags |= EFLG_DF;
4823 goto cannot_emulate;
4826 if (rc != X86EMUL_CONTINUE)
4830 rc = writeback(ctxt);
4831 if (rc != X86EMUL_CONTINUE)
4835 * restore dst type in case the decoding will be reused
4836 * (happens for string instruction )
4838 ctxt->dst.type = saved_dst_type;
4840 if ((ctxt->d & SrcMask) == SrcSI)
4841 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
4843 if ((ctxt->d & DstMask) == DstDI)
4844 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
4846 if (ctxt->rep_prefix && (ctxt->d & String)) {
4848 struct read_cache *r = &ctxt->io_read;
4849 if ((ctxt->d & SrcMask) == SrcSI)
4850 count = ctxt->src.count;
4852 count = ctxt->dst.count;
4853 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
4856 if (!string_insn_completed(ctxt)) {
4858 * Re-enter guest when pio read ahead buffer is empty
4859 * or, if it is not used, after each 1024 iteration.
4861 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
4862 (r->end == 0 || r->end != r->pos)) {
4864 * Reset read cache. Usually happens before
4865 * decode, but since instruction is restarted
4866 * we have to do it here.
4868 ctxt->mem_read.end = 0;
4869 writeback_registers(ctxt);
4870 return EMULATION_RESTART;
4872 goto done; /* skip rip writeback */
4876 ctxt->eip = ctxt->_eip;
4879 if (rc == X86EMUL_PROPAGATE_FAULT)
4880 ctxt->have_exception = true;
4881 if (rc == X86EMUL_INTERCEPTED)
4882 return EMULATION_INTERCEPTED;
4884 if (rc == X86EMUL_CONTINUE)
4885 writeback_registers(ctxt);
4887 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
4891 case 0x09: /* wbinvd */
4892 (ctxt->ops->wbinvd)(ctxt);
4894 case 0x08: /* invd */
4895 case 0x0d: /* GrpP (prefetch) */
4896 case 0x18: /* Grp16 (prefetch/nop) */
4897 case 0x1f: /* nop */
4899 case 0x20: /* mov cr, reg */
4900 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
4902 case 0x21: /* mov from dr to reg */
4903 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
4905 case 0x40 ... 0x4f: /* cmov */
4906 ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val;
4907 if (!test_cc(ctxt->b, ctxt->eflags))
4908 ctxt->dst.type = OP_NONE; /* no writeback */
4910 case 0x80 ... 0x8f: /* jnz rel, etc*/
4911 if (test_cc(ctxt->b, ctxt->eflags))
4912 rc = jmp_rel(ctxt, ctxt->src.val);
4914 case 0x90 ... 0x9f: /* setcc r/m8 */
4915 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4917 case 0xae: /* clflush */
4919 case 0xb6 ... 0xb7: /* movzx */
4920 ctxt->dst.bytes = ctxt->op_bytes;
4921 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
4922 : (u16) ctxt->src.val;
4924 case 0xbe ... 0xbf: /* movsx */
4925 ctxt->dst.bytes = ctxt->op_bytes;
4926 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
4927 (s16) ctxt->src.val;
4929 case 0xc0 ... 0xc1: /* xadd */
4930 fastop(ctxt, em_add);
4931 /* Write back the register source. */
4932 ctxt->src.val = ctxt->dst.orig_val;
4933 write_register_operand(&ctxt->src);
4935 case 0xc3: /* movnti */
4936 ctxt->dst.bytes = ctxt->op_bytes;
4937 ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
4938 (u64) ctxt->src.val;
4941 goto cannot_emulate;
4944 if (rc != X86EMUL_CONTINUE)
4950 return EMULATION_FAILED;
4953 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
4955 invalidate_registers(ctxt);
4958 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
4960 writeback_registers(ctxt);