1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the TypeBasedAliasAnalysis pass, which implements
11 // metadata-based TBAA.
13 // In LLVM IR, memory does not have types, so LLVM's own type system is not
14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe
15 // a type system of a higher level language. This can be used to implement
16 // typical C/C++ TBAA, but it can also be used to implement custom alias
17 // analysis behavior for other languages.
19 // We now support two types of metadata format: scalar TBAA and struct-path
20 // aware TBAA. After all testing cases are upgraded to use struct-path aware
21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
25 // three fields, e.g.:
26 // !0 = metadata !{ metadata !"an example type tree" }
27 // !1 = metadata !{ metadata !"int", metadata !0 }
28 // !2 = metadata !{ metadata !"float", metadata !0 }
29 // !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
31 // The first field is an identity field. It can be any value, usually
32 // an MDString, which uniquely identifies the type. The most important
33 // name in the tree is the name of the root node. Two trees with
34 // different root node names are entirely disjoint, even if they
35 // have leaves with common names.
37 // The second field identifies the type's parent node in the tree, or
38 // is null or omitted for a root node. A type is considered to alias
39 // all of its descendants and all of its ancestors in the tree. Also,
40 // a type is considered to alias all types in other trees, so that
41 // bitcode produced from multiple front-ends is handled conservatively.
43 // If the third field is present, it's an integer which if equal to 1
44 // indicates that the type is "constant" (meaning pointsToConstantMemory
45 // should return true; see
46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
48 // With struct-path aware TBAA, the MDNodes attached to an instruction using
49 // "!tbaa" are called path tag nodes.
51 // The path tag node has 4 fields with the last field being optional.
53 // The first field is the base type node, it can be a struct type node
54 // or a scalar type node. The second field is the access type node, it
55 // must be a scalar type node. The third field is the offset into the base type.
56 // The last field has the same meaning as the last field of our scalar TBAA:
57 // it's an integer which if equal to 1 indicates that the access is "constant".
59 // The struct type node has a name and a list of pairs, one pair for each member
60 // of the struct. The first element of each pair is a type node (a struct type
61 // node or a sclar type node), specifying the type of the member, the second
62 // element of each pair is the offset of the member.
73 // For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
75 // type short) and the offset is 4.
77 // !0 = metadata !{metadata !"Simple C/C++ TBAA"}
78 // !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
79 // !2 = metadata !{metadata !"short", metadata !1} // Scalar type node
80 // !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node
81 // !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
82 // // Struct type node
83 // !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node
85 // The struct type nodes and the scalar type nodes form a type DAG.
87 // char (!1) -- edge to Root
88 // short (!2) -- edge to char
89 // A (!3) -- edge with offset 0 to short
90 // B (!4) -- edge with offset 0 to short and edge with offset 4 to A
92 // To check if two tags (tagX and tagY) can alias, we start from the base type
93 // of tagX, follow the edge with the correct offset in the type DAG and adjust
94 // the offset until we reach the base type of tagY or until we reach the Root
96 // If we reach the base type of tagY, compare the adjusted offset with
97 // offset of tagY, return Alias if the offsets are the same, return NoAlias
99 // If we reach the Root node, perform the above starting from base type of tagY
100 // to see if we reach base type of tagX.
102 // If they have different roots, they're part of different potentially
103 // unrelated type systems, so we return Alias to be conservative.
104 // If neither node is an ancestor of the other and they have the same root,
105 // then we say NoAlias.
107 // TODO: The current metadata format doesn't support struct
108 // fields. For example:
113 // void foo(struct X *x, struct X *y, double *p) {
117 // Struct X has a double member, so the store to *x can alias the store to *p.
118 // Currently it's not possible to precisely describe all the things struct X
119 // aliases, so struct assignments must use conservative TBAA nodes. There's
120 // no scheme for attaching metadata to @llvm.memcpy yet either.
122 //===----------------------------------------------------------------------===//
124 #include "llvm/Analysis/Passes.h"
125 #include "llvm/Analysis/AliasAnalysis.h"
126 #include "llvm/IR/Constants.h"
127 #include "llvm/IR/LLVMContext.h"
128 #include "llvm/IR/Metadata.h"
129 #include "llvm/IR/Module.h"
130 #include "llvm/Pass.h"
131 #include "llvm/Support/CommandLine.h"
132 #include "llvm/ADT/SetVector.h"
133 using namespace llvm;
135 // A handy option for disabling TBAA functionality. The same effect can also be
136 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
138 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
141 /// TBAANode - This is a simple wrapper around an MDNode which provides a
142 /// higher-level interface by hiding the details of how alias analysis
143 /// information is encoded in its operands.
148 TBAANode() : Node(nullptr) {}
149 explicit TBAANode(const MDNode *N) : Node(N) {}
151 /// getNode - Get the MDNode for this TBAANode.
152 const MDNode *getNode() const { return Node; }
154 /// getParent - Get this TBAANode's Alias tree parent.
155 TBAANode getParent() const {
156 if (Node->getNumOperands() < 2)
158 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
161 // Ok, this node has a valid parent. Return it.
165 /// TypeIsImmutable - Test if this TBAANode represents a type for objects
166 /// which are not modified (by any means) in the context where this
167 /// AliasAnalysis is relevant.
168 bool TypeIsImmutable() const {
169 if (Node->getNumOperands() < 3)
171 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
174 return CI->getValue()[0];
178 /// This is a simple wrapper around an MDNode which provides a
179 /// higher-level interface by hiding the details of how alias analysis
180 /// information is encoded in its operands.
181 class TBAAStructTagNode {
182 /// This node should be created with createTBAAStructTagNode.
186 explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
188 /// Get the MDNode for this TBAAStructTagNode.
189 const MDNode *getNode() const { return Node; }
191 const MDNode *getBaseType() const {
192 return dyn_cast_or_null<MDNode>(Node->getOperand(0));
194 const MDNode *getAccessType() const {
195 return dyn_cast_or_null<MDNode>(Node->getOperand(1));
197 uint64_t getOffset() const {
198 return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
200 /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
201 /// objects which are not modified (by any means) in the context where this
202 /// AliasAnalysis is relevant.
203 bool TypeIsImmutable() const {
204 if (Node->getNumOperands() < 4)
206 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
209 return CI->getValue()[0];
213 /// This is a simple wrapper around an MDNode which provides a
214 /// higher-level interface by hiding the details of how alias analysis
215 /// information is encoded in its operands.
216 class TBAAStructTypeNode {
217 /// This node should be created with createTBAAStructTypeNode.
221 TBAAStructTypeNode() : Node(nullptr) {}
222 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
224 /// Get the MDNode for this TBAAStructTypeNode.
225 const MDNode *getNode() const { return Node; }
227 /// Get this TBAAStructTypeNode's field in the type DAG with
228 /// given offset. Update the offset to be relative to the field type.
229 TBAAStructTypeNode getParent(uint64_t &Offset) const {
230 // Parent can be omitted for the root node.
231 if (Node->getNumOperands() < 2)
232 return TBAAStructTypeNode();
234 // Fast path for a scalar type node and a struct type node with a single
236 if (Node->getNumOperands() <= 3) {
237 uint64_t Cur = Node->getNumOperands() == 2
239 : mdconst::extract<ConstantInt>(Node->getOperand(2))
242 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
244 return TBAAStructTypeNode();
245 return TBAAStructTypeNode(P);
248 // Assume the offsets are in order. We return the previous field if
249 // the current offset is bigger than the given offset.
251 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
252 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
256 "TBAAStructTypeNode::getParent should have an offset match!");
261 // Move along the last field.
263 TheIdx = Node->getNumOperands() - 2;
264 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
267 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
269 return TBAAStructTypeNode();
270 return TBAAStructTypeNode(P);
276 /// TypeBasedAliasAnalysis - This is a simple alias analysis
277 /// implementation that uses TypeBased to answer queries.
278 class TypeBasedAliasAnalysis : public ImmutablePass,
279 public AliasAnalysis {
281 static char ID; // Class identification, replacement for typeinfo
282 TypeBasedAliasAnalysis() : ImmutablePass(ID) {
283 initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
286 bool doInitialization(Module &M) override;
288 /// getAdjustedAnalysisPointer - This method is used when a pass implements
289 /// an analysis interface through multiple inheritance. If needed, it
290 /// should override this to adjust the this pointer as needed for the
291 /// specified pass info.
292 void *getAdjustedAnalysisPointer(const void *PI) override {
293 if (PI == &AliasAnalysis::ID)
294 return (AliasAnalysis*)this;
298 bool Aliases(const MDNode *A, const MDNode *B) const;
299 bool PathAliases(const MDNode *A, const MDNode *B) const;
302 void getAnalysisUsage(AnalysisUsage &AU) const override;
303 AliasResult alias(const MemoryLocation &LocA,
304 const MemoryLocation &LocB) override;
305 bool pointsToConstantMemory(const MemoryLocation &Loc,
306 bool OrLocal) override;
307 ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
308 ModRefBehavior getModRefBehavior(const Function *F) override;
309 ModRefResult getModRefInfo(ImmutableCallSite CS,
310 const MemoryLocation &Loc) override;
311 ModRefResult getModRefInfo(ImmutableCallSite CS1,
312 ImmutableCallSite CS2) override;
314 } // End of anonymous namespace
316 // Register this pass...
317 char TypeBasedAliasAnalysis::ID = 0;
318 INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
319 "Type-Based Alias Analysis", false, true, false)
321 ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
322 return new TypeBasedAliasAnalysis();
325 bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
326 InitializeAliasAnalysis(this, &M.getDataLayout());
331 TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
332 AU.setPreservesAll();
333 AliasAnalysis::getAnalysisUsage(AU);
336 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
337 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
339 static bool isStructPathTBAA(const MDNode *MD) {
340 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
342 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
345 /// Aliases - Test whether the type represented by A may alias the
346 /// type represented by B.
348 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
349 const MDNode *B) const {
350 // Make sure that both MDNodes are struct-path aware.
351 if (isStructPathTBAA(A) && isStructPathTBAA(B))
352 return PathAliases(A, B);
354 // Keep track of the root node for A and B.
355 TBAANode RootA, RootB;
357 // Climb the tree from A to see if we reach B.
358 for (TBAANode T(A); ; ) {
359 if (T.getNode() == B)
360 // B is an ancestor of A.
369 // Climb the tree from B to see if we reach A.
370 for (TBAANode T(B); ; ) {
371 if (T.getNode() == A)
372 // A is an ancestor of B.
381 // Neither node is an ancestor of the other.
383 // If they have different roots, they're part of different potentially
384 // unrelated type systems, so we must be conservative.
385 if (RootA.getNode() != RootB.getNode())
388 // If they have the same root, then we've proved there's no alias.
392 /// Test whether the struct-path tag represented by A may alias the
393 /// struct-path tag represented by B.
395 TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
396 const MDNode *B) const {
397 // Verify that both input nodes are struct-path aware.
398 assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
399 assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
401 // Keep track of the root node for A and B.
402 TBAAStructTypeNode RootA, RootB;
403 TBAAStructTagNode TagA(A), TagB(B);
405 // TODO: We need to check if AccessType of TagA encloses AccessType of
406 // TagB to support aggregate AccessType. If yes, return true.
408 // Start from the base type of A, follow the edge with the correct offset in
409 // the type DAG and adjust the offset until we reach the base type of B or
410 // until we reach the Root node.
411 // Compare the adjusted offset once we have the same base.
413 // Climb the type DAG from base type of A to see if we reach base type of B.
414 const MDNode *BaseA = TagA.getBaseType();
415 const MDNode *BaseB = TagB.getBaseType();
416 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
417 for (TBAAStructTypeNode T(BaseA); ; ) {
418 if (T.getNode() == BaseB)
419 // Base type of A encloses base type of B, check if the offsets match.
420 return OffsetA == OffsetB;
423 // Follow the edge with the correct offset, OffsetA will be adjusted to
424 // be relative to the field type.
425 T = T.getParent(OffsetA);
430 // Reset OffsetA and climb the type DAG from base type of B to see if we reach
432 OffsetA = TagA.getOffset();
433 for (TBAAStructTypeNode T(BaseB); ; ) {
434 if (T.getNode() == BaseA)
435 // Base type of B encloses base type of A, check if the offsets match.
436 return OffsetA == OffsetB;
439 // Follow the edge with the correct offset, OffsetB will be adjusted to
440 // be relative to the field type.
441 T = T.getParent(OffsetB);
446 // Neither node is an ancestor of the other.
448 // If they have different roots, they're part of different potentially
449 // unrelated type systems, so we must be conservative.
450 if (RootA.getNode() != RootB.getNode())
453 // If they have the same root, then we've proved there's no alias.
457 AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
458 const MemoryLocation &LocB) {
460 return AliasAnalysis::alias(LocA, LocB);
462 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
464 const MDNode *AM = LocA.AATags.TBAA;
465 if (!AM) return AliasAnalysis::alias(LocA, LocB);
466 const MDNode *BM = LocB.AATags.TBAA;
467 if (!BM) return AliasAnalysis::alias(LocA, LocB);
469 // If they may alias, chain to the next AliasAnalysis.
471 return AliasAnalysis::alias(LocA, LocB);
473 // Otherwise return a definitive result.
477 bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
480 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
482 const MDNode *M = Loc.AATags.TBAA;
483 if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
485 // If this is an "immutable" type, we can assume the pointer is pointing
486 // to constant memory.
487 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
488 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
491 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
494 AliasAnalysis::ModRefBehavior
495 TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
497 return AliasAnalysis::getModRefBehavior(CS);
499 ModRefBehavior Min = UnknownModRefBehavior;
501 // If this is an "immutable" type, we can assume the call doesn't write
503 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
504 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
505 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
506 Min = OnlyReadsMemory;
508 return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
511 AliasAnalysis::ModRefBehavior
512 TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
513 // Functions don't have metadata. Just chain to the next implementation.
514 return AliasAnalysis::getModRefBehavior(F);
517 AliasAnalysis::ModRefResult
518 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
519 const MemoryLocation &Loc) {
521 return AliasAnalysis::getModRefInfo(CS, Loc);
523 if (const MDNode *L = Loc.AATags.TBAA)
524 if (const MDNode *M =
525 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
529 return AliasAnalysis::getModRefInfo(CS, Loc);
532 AliasAnalysis::ModRefResult
533 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
534 ImmutableCallSite CS2) {
536 return AliasAnalysis::getModRefInfo(CS1, CS2);
538 if (const MDNode *M1 =
539 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
540 if (const MDNode *M2 =
541 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
542 if (!Aliases(M1, M2))
545 return AliasAnalysis::getModRefInfo(CS1, CS2);
548 bool MDNode::isTBAAVtableAccess() const {
549 if (!isStructPathTBAA(this)) {
550 if (getNumOperands() < 1) return false;
551 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
552 if (Tag1->getString() == "vtable pointer") return true;
557 // For struct-path aware TBAA, we use the access type of the tag.
558 if (getNumOperands() < 2) return false;
559 MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
560 if (!Tag) return false;
561 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
562 if (Tag1->getString() == "vtable pointer") return true;
567 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
574 // For struct-path aware TBAA, we use the access type of the tag.
575 bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
577 A = cast_or_null<MDNode>(A->getOperand(1));
578 if (!A) return nullptr;
579 B = cast_or_null<MDNode>(B->getOperand(1));
580 if (!B) return nullptr;
583 SmallSetVector<MDNode *, 4> PathA;
587 report_fatal_error("Cycle found in TBAA metadata.");
589 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
593 SmallSetVector<MDNode *, 4> PathB;
597 report_fatal_error("Cycle found in TBAA metadata.");
599 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
603 int IA = PathA.size() - 1;
604 int IB = PathB.size() - 1;
606 MDNode *Ret = nullptr;
607 while (IA >= 0 && IB >=0) {
608 if (PathA[IA] == PathB[IB])
620 // We need to convert from a type node to a tag node.
621 Type *Int64 = IntegerType::get(A->getContext(), 64);
622 Metadata *Ops[3] = {Ret, Ret,
623 ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
624 return MDNode::get(A->getContext(), Ops);
627 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
630 MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
632 N.TBAA = getMetadata(LLVMContext::MD_tbaa);
635 N.Scope = MDNode::getMostGenericAliasScope(
636 N.Scope, getMetadata(LLVMContext::MD_alias_scope));
638 N.Scope = getMetadata(LLVMContext::MD_alias_scope);
642 MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
644 N.NoAlias = getMetadata(LLVMContext::MD_noalias);