From 75bb472c0612fe905e15e3ceec558d70dcda9dd5 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Mon, 27 Oct 2014 08:38:54 +0000 Subject: [PATCH] [asan-asm-instrumentation] Added comment describing how asm instrumentation works. Summary: [asan-asm-instrumentation] Added comment describing how asm instrumentation works. Reviewers: eugenis Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5970 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220670 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/AsmParser/X86AsmInstrumentation.cpp | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index 32c107deb06..9c49a113638 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -30,6 +30,70 @@ #include #include +// Following comment describes how assembly instrumentation works. +// Currently we have only AddressSanitizer instrumentation, but we're +// planning to implement MemorySanitizer for inline assembly too. If +// you're not familiar with AddressSanitizer algorithm, please, read +// https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm. +// +// When inline assembly is parsed by an instance of X86AsmParser, all +// instructions are emitted via EmitInstruction method. That's the +// place where X86AsmInstrumentation analyzes an instruction and +// decides, whether the instruction should be emitted as is or +// instrumentation is required. The latter case happens when an +// instruction reads from or writes to memory. Now instruction opcode +// is explicitly checked, and if an instruction has a memory operand +// (for instance, movq (%rsi, %rcx, 8), %rax) - it should be +// instrumented. There're also exist instructions that modify +// memory but don't have an explicit memory operands, for instance, +// movs. +// +// Let's consider at first 8-byte memory accesses when an instruction +// has an explicit memory operand. In this case we need two registers - +// AddressReg to compute address of a memory cells which are accessed +// and ShadowReg to compute corresponding shadow address. So, we need +// to spill both registers before instrumentation code and restore them +// after instrumentation. Thus, in general, instrumentation code will +// look like this: +// PUSHF # Store flags, otherwise they will be overwritten +// PUSH AddressReg # spill AddressReg +// PUSH ShadowReg # spill ShadowReg +// LEA MemOp, AddressReg # compute address of the memory operand +// MOV AddressReg, ShadowReg +// SHR ShadowReg, 3 +// # ShadowOffset(AddressReg >> 3) contains address of a shadow +// # corresponding to MemOp. +// CMP ShadowOffset(ShadowReg), 0 # test shadow value +// JZ .Done # when shadow equals to zero, everything is fine +// MOV AddressReg, RDI +// # Call __asan_report function with AddressReg as an argument +// CALL __asan_report +// .Done: +// POP ShadowReg # Restore ShadowReg +// POP AddressReg # Restore AddressReg +// POPF # Restore flags +// +// Memory accesses with different size (1-, 2-, 4- and 16-byte) are +// handled in a similar manner, but small memory accesses (less than 8 +// byte) require an additional ScratchReg, which is used for shadow value. +// +// If, suppose, we're instrumenting an instruction like movs, only +// contents of RDI, RDI + AccessSize * RCX, RSI, RSI + AccessSize * +// RCX are checked. In this case there're no need to spill and restore +// AddressReg , ShadowReg or flags four times, they're saved on stack +// just once, before instrumentation of these four addresses, and restored +// at the end of the instrumentation. +// +// There exist several things which complicate this simple algorithm. +// * Instrumented memory operand can have RSP as a base or an index +// register. So we need to add a constant offset before computation +// of memory address, since flags, AddressReg, ShadowReg, etc. were +// already stored on stack and RSP was modified. +// * Debug info (usually, DWARF) should be adjusted, because sometimes +// RSP is used as a frame register. So, we need to select some +// register as a frame register and temprorary override current CFA +// register. + namespace llvm { namespace { -- 2.34.1