X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAMDGPU%2FAMDGPUPromoteAlloca.cpp;h=87d50d5870598726b0e86e371d0ad2e3022f9d70;hb=e0d5233e365fe138f0afd9860ca11e0dd5bf0039;hp=4a65bfc57f14916b20883fd9a492a0dbdea0e1f8;hpb=953c6814730951ad9a286d7991e9c8c481433d45;p=oota-llvm.git diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 4a65bfc57f1..87d50d58705 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -54,7 +54,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) { bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { - const FunctionType *FTy = F.getFunctionType(); + FunctionType *FTy = F.getFunctionType(); LocalMemAvailable = ST.getLocalMemorySize(); @@ -63,7 +63,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // possible these arguments require the entire local memory space, so // we cannot use local memory in the pass. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { - const Type *ParamTy = FTy->getParamType(i); + Type *ParamTy = FTy->getParamType(i); if (ParamTy->isPointerTy() && ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { LocalMemAvailable = 0; @@ -77,7 +77,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // Check how much local memory is being used by global objects for (Module::global_iterator I = Mod->global_begin(), E = Mod->global_end(); I != E; ++I) { - GlobalVariable *GV = I; + GlobalVariable *GV = &*I; PointerType *GVTy = GV->getType(); if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) continue; @@ -101,7 +101,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { return false; } -static VectorType *arrayTypeToVecType(const Type *ArrayTy) { +static VectorType *arrayTypeToVecType(Type *ArrayTy) { return VectorType::get(ArrayTy->getArrayElementType(), ArrayTy->getArrayNumElements()); } @@ -134,13 +134,17 @@ static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { // // TODO: Check isTriviallyVectorizable for calls and handle other // instructions. -static bool canVectorizeInst(Instruction *Inst) { +static bool canVectorizeInst(Instruction *Inst, User *User) { switch (Inst->getOpcode()) { case Instruction::Load: - case Instruction::Store: case Instruction::BitCast: case Instruction::AddrSpaceCast: return true; + case Instruction::Store: { + // Must be the stored pointer operand, not a stored value. + StoreInst *SI = cast(Inst); + return SI->getPointerOperand() == User; + } default: return false; } @@ -166,7 +170,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { for (User *AllocaUser : Alloca->users()) { GetElementPtrInst *GEP = dyn_cast(AllocaUser); if (!GEP) { - if (!canVectorizeInst(cast(AllocaUser))) + if (!canVectorizeInst(cast(AllocaUser), Alloca)) return false; WorkList.push_back(AllocaUser); @@ -184,7 +188,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { GEPVectorIdx[GEP] = Index; for (User *GEPUser : AllocaUser->users()) { - if (!canVectorizeInst(cast(GEPUser))) + if (!canVectorizeInst(cast(GEPUser), AllocaUser)) return false; WorkList.push_back(GEPUser); @@ -240,7 +244,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { for (User *User : Val->users()) { if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) continue; - if (isa(User)) { + if (CallInst *CI = dyn_cast(User)) { + // TODO: We might be able to handle some cases where the callee is a + // constantexpr bitcast of a function. + if (!CI->getCalledFunction()) + return false; + WorkList.push_back(User); continue; } @@ -250,6 +259,12 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt) return false; + if (StoreInst *SI = dyn_cast_or_null(UseInst)) { + // Reject if the stored value is not the pointer operand. + if (SI->getPointerOperand() != Val) + return false; + } + if (!User->getType()->isPointerTy()) continue; @@ -261,6 +276,9 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector &WorkList) { } void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { + if (!I.isStaticAlloca()) + return; + IRBuilder<> Builder(&I); // First try to replace the alloca with a vector