* Use the MachineConstantPool for storing constants instead of a hash_set;

[oota-llvm.git] / lib / Target / SparcV9 / SparcV9InstrSelection.cpp
diff --git a/lib/Target/SparcV9/SparcV9InstrSelection.cpp b/lib/Target/SparcV9/SparcV9InstrSelection.cpp

index e6452615dfae04989de586c38c38081c1b9c8283..b377658b9c8113717365c23ed5e2193d49021e60 100644 (file)
--- a/lib/Target/SparcV9/SparcV9InstrSelection.cpp
+++ b/lib/Target/SparcV9/SparcV9InstrSelection.cpp
@@ -1,31 +1,36 @@
  //===-- SparcInstrSelection.cpp -------------------------------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
  //
  //  BURS instruction selection for SPARC V9 architecture.      
  //
  //===----------------------------------------------------------------------===//
  
-#include "SparcInternals.h"
  #include "SparcInstrSelectionSupport.h"
+#include "SparcInternals.h"
  #include "SparcRegClassInfo.h"
-#include "llvm/CodeGen/InstrSelectionSupport.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrAnnot.h"
+#include "llvm/Constants.h"
+#include "llvm/ConstantHandling.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
  #include "llvm/CodeGen/InstrForest.h"
  #include "llvm/CodeGen/InstrSelection.h"
+#include "llvm/CodeGen/InstrSelectionSupport.h"
+#include "llvm/CodeGen/MachineCodeForInstruction.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineCodeForInstruction.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/iTerminators.h"
-#include "llvm/iMemory.h"
-#include "llvm/iOther.h"
-#include "llvm/Function.h"
-#include "llvm/Constants.h"
-#include "llvm/ConstantHandling.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrAnnot.h"
  #include "Support/MathExtras.h"
-#include <math.h>
  #include <algorithm>
+#include <cmath>
  
  static inline void Add3OperandInstr(unsigned Opcode, InstructionNode* Node,
                                      std::vector<MachineInstr*>& mvec) {
@@ -35,25 +40,6 @@ static inline void Add3OperandInstr(unsigned Opcode, InstructionNode* Node,
  }
  
  
-
-//---------------------------------------------------------------------------
-// Function: GetMemInstArgs
-// 
-// Purpose:
-//   Get the pointer value and the index vector for a memory operation
-//   (GetElementPtr, Load, or Store).  If all indices of the given memory
-//   operation are constant, fold in constant indices in a chain of
-//   preceding GetElementPtr instructions (if any), and return the
-//   pointer value of the first instruction in the chain.
-//   All folded instructions are marked so no code is generated for them.
-//
-// Return values:
-//   Returns the pointer Value to use.
-//   Returns the resulting IndexVector in idxVec.
-//   Returns true/false in allConstantIndices if all indices are/aren't const.
-//---------------------------------------------------------------------------
-
-
  //---------------------------------------------------------------------------
  // Function: FoldGetElemChain
  // 
@@ -179,7 +165,7 @@ GetGEPInstArgs(InstructionNode* gepNode,
    Value* ptrVal = gepI->getPointerOperand();
    InstrTreeNode* ptrChild = gepNode->leftChild(); 
  
-  // Extract the index vector of the GEP instructin.
+  // Extract the index vector of the GEP instruction.
    // If all indices are constant and first index is zero, try to fold
    // in preceding GEPs with all constant indices.
    for (User::op_iterator OI=gepI->idx_begin(),  OE=gepI->idx_end();
@@ -493,64 +479,66 @@ ChooseMovpregiForSetCC(const InstructionNode* instrNode)
  
  
  static inline MachineOpCode
-ChooseConvertToFloatInstr(OpLabel vopCode, const Type* opType)
+ChooseConvertToFloatInstr(const TargetMachine& target,
+                          OpLabel vopCode, const Type* opType)
  {
    assert((vopCode == ToFloatTy || vopCode == ToDoubleTy) &&
           "Unrecognized convert-to-float opcode!");
+  assert((opType->isIntegral() || opType->isFloatingPoint() ||
+          isa<PointerType>(opType))
+         && "Trying to convert a non-scalar type to FLOAT/DOUBLE?");
  
    MachineOpCode opCode = V9::INVALID_OPCODE;
-  
-  if (opType == Type::SByteTy || opType == Type::UByteTy ||
-      opType == Type::ShortTy || opType == Type::UShortTy ||
-      opType == Type::IntTy   || opType == Type::UIntTy)
-      opCode = (vopCode == ToFloatTy? V9::FITOS : V9::FITOD);
-  else if (opType == Type::LongTy || opType == Type::ULongTy ||
-           isa<PointerType>(opType))
-      opCode = (vopCode == ToFloatTy? V9::FXTOS : V9::FXTOD);
-  else if (opType == Type::FloatTy)
-      opCode = (vopCode == ToFloatTy? V9::INVALID_OPCODE : V9::FSTOD);
+
+  unsigned opSize = target.getTargetData().getTypeSize(opType);
+
+  if (opType == Type::FloatTy)
+    opCode = (vopCode == ToFloatTy? V9::NOP : V9::FSTOD);
    else if (opType == Type::DoubleTy)
-      opCode = (vopCode == ToFloatTy? V9::FDTOS : V9::INVALID_OPCODE);
-  else
-    assert(0 && "Trying to convert a non-scalar type to DOUBLE?");
+    opCode = (vopCode == ToFloatTy? V9::FDTOS : V9::NOP);
+  else if (opSize <= 4)
+    opCode = (vopCode == ToFloatTy? V9::FITOS : V9::FITOD);
+  else {
+    assert(opSize == 8 && "Unrecognized type size > 4 and < 8!");
+    opCode = (vopCode == ToFloatTy? V9::FXTOS : V9::FXTOD);
+  }
    
    return opCode;
  }
  
  static inline MachineOpCode 
-ChooseConvertFPToIntInstr(Type::PrimitiveID tid, const Type* opType)
+ChooseConvertFPToIntInstr(const TargetMachine& target,
+                          const Type* destType, const Type* opType)
  {
-  MachineOpCode opCode = V9::INVALID_OPCODE;;
-
    assert((opType == Type::FloatTy || opType == Type::DoubleTy)
           && "This function should only be called for FLOAT or DOUBLE");
+  assert((destType->isIntegral() || isa<PointerType>(destType))
+         && "Trying to convert FLOAT/DOUBLE to a non-scalar type?");
  
-  // SPARC does not have a float-to-uint conversion, only a float-to-int.
-  // For converting an FP value to uint32_t, we first need to convert to
-  // uint64_t and then to uint32_t, or we may overflow the signed int
-  // representation even for legal uint32_t values.  This expansion is
-  // done by the Preselection pass.
-  // 
-  if (tid == Type::UIntTyID) {
-    assert(tid != Type::UIntTyID && "FP-to-uint conversions must be expanded"
-           " into FP->long->uint for SPARC v9:  SO RUN PRESELECTION PASS!");
-  } else if (tid == Type::SByteTyID || tid == Type::ShortTyID || 
-             tid == Type::IntTyID   || tid == Type::UByteTyID ||
-             tid == Type::UShortTyID) {
+  MachineOpCode opCode = V9::INVALID_OPCODE;
+
+  unsigned destSize = target.getTargetData().getTypeSize(destType);
+
+  if (destType == Type::UIntTy)
+    assert(destType != Type::UIntTy && "Expand FP-to-uint beforehand.");
+  else if (destSize <= 4)
      opCode = (opType == Type::FloatTy)? V9::FSTOI : V9::FDTOI;
-  } else if (tid == Type::LongTyID || tid == Type::ULongTyID) {
-      opCode = (opType == Type::FloatTy)? V9::FSTOX : V9::FDTOX;
-  } else
-    assert(0 && "Should not get here, Mo!");
+  else {
+    assert(destSize == 8 && "Unrecognized type size > 4 and < 8!");
+    opCode = (opType == Type::FloatTy)? V9::FSTOX : V9::FDTOX;
+  }
  
    return opCode;
  }
  
-MachineInstr*
-CreateConvertFPToIntInstr(Type::PrimitiveID destTID,
-                          Value* srcVal, Value* destVal)
+static MachineInstr*
+CreateConvertFPToIntInstr(const TargetMachine& target,
+                          Value* srcVal,
+                          Value* destVal,
+                          const Type* destType)
  {
-  MachineOpCode opCode = ChooseConvertFPToIntInstr(destTID, srcVal->getType());
+  MachineOpCode opCode = ChooseConvertFPToIntInstr(target, destType,
+                                                   srcVal->getType());
    assert(opCode != V9::INVALID_OPCODE && "Expected to need conversion!");
    return BuildMI(opCode, 2).addReg(srcVal).addRegDef(destVal);
  }
@@ -558,19 +546,11 @@ CreateConvertFPToIntInstr(Type::PrimitiveID destTID,
  // CreateCodeToConvertFloatToInt: Convert FP value to signed or unsigned integer
  // The FP value must be converted to the dest type in an FP register,
  // and the result is then copied from FP to int register via memory.
-//
+// SPARC does not have a float-to-uint conversion, only a float-to-int (fdtoi).
  // Since fdtoi converts to signed integers, any FP value V between MAXINT+1
-// and MAXUNSIGNED (i.e., 2^31 <= V <= 2^32-1) would be converted incorrectly
-// *only* when converting to an unsigned.  (Unsigned byte, short or long
-// don't have this problem.)
-// For unsigned int, we therefore have to generate the code sequence:
-// 
-//      if (V > (float) MAXINT) {
-//        unsigned result = (unsigned) (V  - (float) MAXINT);
-//        result = result + (unsigned) MAXINT;
-//      }
-//      else
-//        result = (unsigned) V;
+// and MAXUNSIGNED (i.e., 2^31 <= V <= 2^32-1) would be converted incorrectly.
+// Therefore, for converting an FP value to uint32_t, we first need to convert
+// to uint64_t and then to uint32_t.
  // 
  static void
  CreateCodeToConvertFloatToInt(const TargetMachine& target,
@@ -579,24 +559,46 @@ CreateCodeToConvertFloatToInt(const TargetMachine& target,
                                std::vector<MachineInstr*>& mvec,
                                MachineCodeForInstruction& mcfi)
  {
+  Function* F = destI->getParent()->getParent();
+
    // Create a temporary to represent the FP register into which the
    // int value will placed after conversion.  The type of this temporary
    // depends on the type of FP register to use: single-prec for a 32-bit
    // int or smaller; double-prec for a 64-bit int.
    // 
    size_t destSize = target.getTargetData().getTypeSize(destI->getType());
-  const Type* destTypeToUse = (destSize > 4)? Type::DoubleTy : Type::FloatTy;
-  TmpInstruction* destForCast = new TmpInstruction(mcfi, destTypeToUse, opVal);
  
-  // Create the fp-to-int conversion code
-  MachineInstr* M =CreateConvertFPToIntInstr(destI->getType()->getPrimitiveID(),
-                                             opVal, destForCast);
-  mvec.push_back(M);
+  const Type* castDestType = destI->getType(); // type for the cast instr result
+  const Type* castDestRegType;          // type for cast instruction result reg
+  TmpInstruction* destForCast;          // dest for cast instruction
+  Instruction* fpToIntCopyDest = destI; // dest for fp-reg-to-int-reg copy instr
+
+  // For converting an FP value to uint32_t, we first need to convert to
+  // uint64_t and then to uint32_t, as explained above.
+  if (destI->getType() == Type::UIntTy) {
+    castDestType    = Type::ULongTy;       // use this instead of type of destI
+    castDestRegType = Type::DoubleTy;      // uint64_t needs 64-bit FP register.
+    destForCast     = new TmpInstruction(mcfi, castDestRegType, opVal);
+    fpToIntCopyDest = new TmpInstruction(mcfi, castDestType, destForCast);
+  }
+  else {
+    castDestRegType = (destSize > 4)? Type::DoubleTy : Type::FloatTy;
+    destForCast = new TmpInstruction(mcfi, castDestRegType, opVal);
+  }
+
+  // Create the fp-to-int conversion instruction (src and dest regs are FP regs)
+  mvec.push_back(CreateConvertFPToIntInstr(target, opVal, destForCast,
+                                           castDestType));
  
    // Create the fpreg-to-intreg copy code
-  target.getInstrInfo().
-    CreateCodeToCopyFloatToInt(target, destI->getParent()->getParent(),
-                               destForCast, destI, mvec, mcfi);
+  target.getInstrInfo().CreateCodeToCopyFloatToInt(target, F, destForCast,
+                                                   fpToIntCopyDest, mvec, mcfi);
+
+  // Create the uint64_t to uint32_t conversion, if needed
+  if (destI->getType() == Type::UIntTy)
+    target.getInstrInfo().
+      CreateZeroExtensionInstructions(target, F, fpToIntCopyDest, destI,
+                                      /*numLowBits*/ 32, mvec, mcfi);
  }
  
  
@@ -992,6 +994,7 @@ CreateDivConstInstruction(TargetMachine &target,
        } else if (isPowerOf2(C, pow)) {
          unsigned opCode;
          Value* shiftOperand;
+        unsigned opSize = target.getTargetData().getTypeSize(resultType);
  
          if (resultType->isSigned()) {
            // For N / 2^k, if the operand N is negative,
@@ -1020,15 +1023,13 @@ CreateDivConstInstruction(TargetMachine &target,
            addTmp = new TmpInstruction(mcfi, resultType, LHS, srlTmp,"incIfNeg");
  
            // Create the SRA or SRAX instruction to get the sign bit
-          mvec.push_back(BuildMI((resultType==Type::LongTy) ?
-                                 V9::SRAXi6 : V9::SRAi5, 3)
+          mvec.push_back(BuildMI((opSize > 4)? V9::SRAXi6 : V9::SRAi5, 3)
                           .addReg(LHS)
                           .addSImm((resultType==Type::LongTy)? pow-1 : 31)
                           .addRegDef(sraTmp));
  
            // Create the SRL or SRLX instruction to get the sign bit
-          mvec.push_back(BuildMI((resultType==Type::LongTy) ?
-                                 V9::SRLXi6 : V9::SRLi5, 3)
+          mvec.push_back(BuildMI((opSize > 4)? V9::SRLXi6 : V9::SRLi5, 3)
                           .addReg(sraTmp)
                           .addSImm((resultType==Type::LongTy)? 64-pow : 32-pow)
                           .addRegDef(srlTmp));
@@ -1039,11 +1040,11 @@ CreateDivConstInstruction(TargetMachine &target,
  
            // Get the shift operand and "right-shift" opcode to do the divide
            shiftOperand = addTmp;
-          opCode = (resultType==Type::LongTy) ? V9::SRAXi6 : V9::SRAi5;
+          opCode = (opSize > 4)? V9::SRAXi6 : V9::SRAi5;
          } else {
            // Get the shift operand and "right-shift" opcode to do the divide
            shiftOperand = LHS;
-          opCode = (resultType==Type::LongTy) ? V9::SRLXi6 : V9::SRLi5;
+          opCode = (opSize > 4)? V9::SRLXi6 : V9::SRLi5;
          }
  
          // Now do the actual shift!
@@ -1395,8 +1396,7 @@ bool CodeGenIntrinsic(LLVMIntrinsic::ID iid, CallInst &callInstr,
  {
    switch (iid) {
    case LLVMIntrinsic::va_start: {
-    // Get the address of the first vararg value on stack and copy it to
-    // the argument of va_start(va_list* ap).
+    // Get the address of the first incoming vararg argument on the stack
      bool ignore;
      Function* func = cast<Function>(callInstr.getParent()->getParent());
      int numFixedArgs   = func->getFunctionType()->getNumParams();
@@ -1405,7 +1405,7 @@ bool CodeGenIntrinsic(LLVMIntrinsic::ID iid, CallInst &callInstr,
      int firstVarArgOff = numFixedArgs * argSize + target.getFrameInfo().
        getFirstIncomingArgOffset(MachineFunction::get(func), ignore);
      mvec.push_back(BuildMI(V9::ADDi, 3).addMReg(fpReg).addSImm(firstVarArgOff).
-                   addReg(callInstr.getOperand(1)));
+                   addRegDef(&callInstr));
      return true;
    }
  
@@ -1413,12 +1413,50 @@ bool CodeGenIntrinsic(LLVMIntrinsic::ID iid, CallInst &callInstr,
      return true;                        // no-op on Sparc
  
    case LLVMIntrinsic::va_copy:
-    // Simple copy of current va_list (arg2) to new va_list (arg1)
+    // Simple copy of current va_list (arg1) to new va_list (result)
      mvec.push_back(BuildMI(V9::ORr, 3).
                     addMReg(target.getRegInfo().getZeroRegNum()).
-                   addReg(callInstr.getOperand(2)).
-                   addReg(callInstr.getOperand(1)));
+                   addReg(callInstr.getOperand(1)).
+                   addRegDef(&callInstr));
+    return true;
+
+  case LLVMIntrinsic::sigsetjmp:
+  case LLVMIntrinsic::setjmp: {
+    // act as if we return 0
+    unsigned g0 = target.getRegInfo().getZeroRegNum();
+    mvec.push_back(BuildMI(V9::ORr,3).addMReg(g0).addMReg(g0)
+                   .addReg(&callInstr, MOTy::Def));
+    return true;
+  }
+
+  case LLVMIntrinsic::siglongjmp:
+  case LLVMIntrinsic::longjmp: {
+    // call abort()
+    Module* M = callInstr.getParent()->getParent()->getParent();
+    const FunctionType *voidvoidFuncTy =
+      FunctionType::get(Type::VoidTy, std::vector<const Type*>(), false);
+    Function *F = M->getOrInsertFunction("abort", voidvoidFuncTy);
+    assert(F && "Unable to get or create `abort' function declaration");
+
+    // Create hidden virtual register for return address with type void*
+    TmpInstruction* retAddrReg =
+      new TmpInstruction(MachineCodeForInstruction::get(&callInstr),
+                         PointerType::get(Type::VoidTy), &callInstr);
+    
+    // Use a descriptor to pass information about call arguments
+    // to the register allocator.  This descriptor will be "owned"
+    // and freed automatically when the MachineCodeForInstruction
+    // object for the callInstr goes away.
+    CallArgsDescriptor* argDesc =
+      new CallArgsDescriptor(&callInstr, retAddrReg, false, false);
+
+    MachineInstr* callMI = BuildMI(V9::CALL, 1).addPCDisp(F);
+    callMI->addImplicitRef(retAddrReg, /*isDef*/ true);
+    
+    mvec.push_back(callMI);
+    mvec.push_back(BuildMI(V9::NOP, 0));
      return true;
+  }
  
    default:
      return false;
@@ -1503,22 +1541,19 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
    // Let's check for chain rules outside the switch so that we don't have
    // to duplicate the list of chain rule production numbers here again
    // 
-  if (ThisIsAChainRule(ruleForNode))
-    {
-      // Chain rules have a single nonterminal on the RHS.
-      // Get the rule that matches the RHS non-terminal and use that instead.
-      // 
-      assert(nts[0] && ! nts[1]
-             && "A chain rule should have only one RHS non-terminal!");
-      nextRule = burm_rule(subtreeRoot->state, nts[0]);
-      nts = burm_nts[nextRule];
-      GetInstructionsByRule(subtreeRoot, nextRule, nts, target, mvec);
-    }
-  else
-    {
-      switch(ruleForNode) {
-      case 1:  // stmt:   Ret
-      case 2:  // stmt:   RetValue(reg)
+  if (ThisIsAChainRule(ruleForNode)) {
+    // Chain rules have a single nonterminal on the RHS.
+    // Get the rule that matches the RHS non-terminal and use that instead.
+    // 
+    assert(nts[0] && ! nts[1]
+           && "A chain rule should have only one RHS non-terminal!");
+    nextRule = burm_rule(subtreeRoot->state, nts[0]);
+    nts = burm_nts[nextRule];
+    GetInstructionsByRule(subtreeRoot, nextRule, nts, target, mvec);
+  } else {
+    switch(ruleForNode) {
+      case 1:   // stmt:   Ret
+      case 2:   // stmt:   RetValue(reg)
        {         // NOTE: Prepass of register allocation is responsible
                  //      for moving return value to appropriate register.
                  // Copy the return value to the required return register.
@@ -1860,7 +1895,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          } else if (opType->isFloatingPoint()) {
  
            CreateCodeToConvertFloatToInt(target, opVal, destI, mvec, mcfi);
-          if (destI->getType()->isUnsigned())
+          if (destI->getType()->isUnsigned() && destI->getType() !=Type::UIntTy)
              maskUnsignedResult = true; // not handled by fp->int code
  
          } else if (isIntegral) {
@@ -1928,9 +1963,9 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          if (forwardOperandNum != 0) {    // we do need the cast
            Value* leftVal = subtreeRoot->leftChild()->getValue();
            const Type* opType = leftVal->getType();
-          MachineOpCode opCode=ChooseConvertToFloatInstr(
+          MachineOpCode opCode=ChooseConvertToFloatInstr(target,
                                         subtreeRoot->getOpLabel(), opType);
-          if (opCode == V9::INVALID_OPCODE) {  // no conversion needed
+          if (opCode == V9::NOP) {      // no conversion needed
              forwardOperandNum = 0;      // forward first operand to user
            } else {
              // If the source operand is a non-FP type it must be
@@ -2154,11 +2189,11 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          mvec.push_back(BuildMI(V9::ANDNr, 3).addReg(lhs).addReg(notArg)
                                         .addReg(dest, MOTy::Def));
  
-        if (notArg->getType() == Type::BoolTy)
-          { // set 1 in result register if result of above is non-zero
-            mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
-                           .addReg(dest, MOTy::UseAndDef));
-          }
+        if (notArg->getType() == Type::BoolTy) {
+          // set 1 in result register if result of above is non-zero
+          mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
+                         .addReg(dest, MOTy::UseAndDef));
+        }
  
          break;
        }
@@ -2185,11 +2220,11 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          mvec.push_back(BuildMI(V9::ORNr, 3).addReg(lhs).addReg(notArg)
                         .addReg(dest, MOTy::Def));
  
-        if (notArg->getType() == Type::BoolTy)
-          { // set 1 in result register if result of above is non-zero
-            mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
-                           .addReg(dest, MOTy::UseAndDef));
-          }
+        if (notArg->getType() == Type::BoolTy) {
+          // set 1 in result register if result of above is non-zero
+          mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
+                         .addReg(dest, MOTy::UseAndDef));
+        }
  
          break;
        }
@@ -2215,11 +2250,11 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          mvec.push_back(BuildMI(V9::XNORr, 3).addReg(lhs).addReg(notArg)
                         .addReg(dest, MOTy::Def));
  
-        if (notArg->getType() == Type::BoolTy)
-          { // set 1 in result register if result of above is non-zero
-            mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
-                           .addReg(dest, MOTy::UseAndDef));
-          }
+        if (notArg->getType() == Type::BoolTy) {
+          // set 1 in result register if result of above is non-zero
+          mvec.push_back(BuildMI(V9::MOVRNZi, 3).addReg(dest).addZImm(1)
+                         .addReg(dest, MOTy::UseAndDef));
+        }
          break;
        }
  
@@ -2240,37 +2275,36 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          bool computeBoolVal = (subtreeRoot->parent() == NULL ||
                                 ! AllUsesAreBranches(setCCInstr));
  
-        if (computeBoolVal)
-          {
-            InstrTreeNode* constNode = subtreeRoot->rightChild();
-            assert(constNode &&
-                   constNode->getNodeType() ==InstrTreeNode::NTConstNode);
-            Constant *constVal = cast<Constant>(constNode->getValue());
-            bool isValidConst;
-            
-            if ((constVal->getType()->isInteger()
-                 || isa<PointerType>(constVal->getType()))
-                && target.getInstrInfo().ConvertConstantToIntType(target,
+        if (computeBoolVal) {
+          InstrTreeNode* constNode = subtreeRoot->rightChild();
+          assert(constNode &&
+                 constNode->getNodeType() ==InstrTreeNode::NTConstNode);
+          Constant *constVal = cast<Constant>(constNode->getValue());
+          bool isValidConst;
+          
+          if ((constVal->getType()->isInteger()
+               || isa<PointerType>(constVal->getType()))
+              && target.getInstrInfo().ConvertConstantToIntType(target,
                               constVal, constVal->getType(), isValidConst) == 0
-                && isValidConst)
-              {
-                // That constant is an integer zero after all...
-                // Use a MOVR[op] to compute the boolean result
-                // Unconditionally set register to 0
-                mvec.push_back(BuildMI(V9::SETHI, 2).addZImm(0)
-                               .addRegDef(setCCInstr));
+              && isValidConst)
+          {
+            // That constant is an integer zero after all...
+            // Use a MOVR[op] to compute the boolean result
+            // Unconditionally set register to 0
+            mvec.push_back(BuildMI(V9::SETHI, 2).addZImm(0)
+                           .addRegDef(setCCInstr));
                  
-                // Now conditionally move 1 into the register.
-                // Mark the register as a use (as well as a def) because the old
-                // value will be retained if the condition is false.
-                MachineOpCode movOpCode = ChooseMovpregiForSetCC(subtreeRoot);
-                mvec.push_back(BuildMI(movOpCode, 3)
-                               .addReg(subtreeRoot->leftChild()->getValue())
-                               .addZImm(1).addReg(setCCInstr, MOTy::UseAndDef));
+            // Now conditionally move 1 into the register.
+            // Mark the register as a use (as well as a def) because the old
+            // value will be retained if the condition is false.
+            MachineOpCode movOpCode = ChooseMovpregiForSetCC(subtreeRoot);
+            mvec.push_back(BuildMI(movOpCode, 3)
+                           .addReg(subtreeRoot->leftChild()->getValue())
+                           .addZImm(1).addReg(setCCInstr, MOTy::UseAndDef));
                  
-                break;
-              }
+            break;
            }
+        }
          // ELSE FALL THROUGH
        }
  
@@ -2753,9 +2787,10 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          const Type* opType = argVal1->getType();
          assert((opType->isInteger() || isa<PointerType>(opType)) &&
                 "Shl unsupported for other types");
+        unsigned opSize = target.getTargetData().getTypeSize(opType);
          
          CreateShiftInstructions(target, shlInstr->getParent()->getParent(),
-                                (opType == Type::LongTy)? V9::SLLXr6:V9::SLLr5,
+                                (opSize > 4)? V9::SLLXr6:V9::SLLr5,
                                  argVal1, argVal2, 0, shlInstr, mvec,
                                  MachineCodeForInstruction::get(shlInstr));
          break;
@@ -2766,9 +2801,10 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
          assert((opType->isInteger() || isa<PointerType>(opType)) &&
                 "Shr unsupported for other types");
+        unsigned opSize = target.getTargetData().getTypeSize(opType);
          Add3OperandInstr(opType->isSigned()
-                         ? (opType == Type::LongTy ? V9::SRAXr6 : V9::SRAr5)
-                         : (opType == Type::ULongTy ? V9::SRLXr6 : V9::SRLr5),
+                         ? (opSize > 4? V9::SRAXr6 : V9::SRAr5)
+                         : (opSize > 4? V9::SRLXr6 : V9::SRLr5),
                           subtreeRoot, mvec);
          break;
        }
@@ -2776,16 +2812,28 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
        case 64: // reg:   Phi(reg,reg)
          break;                          // don't forward the value
  
-      case 65: // reg:   VaArg(reg)
-      {
-        // Use value initialized by va_start as pointer to args on the stack.
-        // Load argument via current pointer value, then increment pointer.
+      case 65: // reg:   VANext(reg):  the va_next(va_list, type) instruction
+      { // Increment the va_list pointer register according to the type.
+        // All LLVM argument types are <= 64 bits, so use one doubleword.
+        Instruction* vaNextI = subtreeRoot->getInstruction();
+        assert(target.getTargetData().getTypeSize(vaNextI->getType()) <= 8 &&
+               "We assumed that all LLVM parameter types <= 8 bytes!");
          int argSize = target.getFrameInfo().getSizeOfEachArgOnStack();
+        mvec.push_back(BuildMI(V9::ADDi, 3).addReg(vaNextI->getOperand(0)).
+                       addSImm(argSize).addRegDef(vaNextI));
+        break;
+      }
+
+      case 66: // reg:   VAArg (reg): the va_arg instruction
+      { // Load argument from stack using current va_list pointer value.
+        // Use 64-bit load for all non-FP args, and LDDF or double for FP.
          Instruction* vaArgI = subtreeRoot->getInstruction();
-        mvec.push_back(BuildMI(V9::LDXi, 3).addReg(vaArgI->getOperand(0)).
+        MachineOpCode loadOp = (vaArgI->getType()->isFloatingPoint()
+                                ? (vaArgI->getType() == Type::FloatTy
+                                   ? V9::LDFi : V9::LDDFi)
+                                : V9::LDXi);
+        mvec.push_back(BuildMI(loadOp, 3).addReg(vaArgI->getOperand(0)).
                         addSImm(0).addRegDef(vaArgI));
-        mvec.push_back(BuildMI(V9::ADDi, 3).addReg(vaArgI->getOperand(0)).
-                       addSImm(argSize).addRegDef(vaArgI->getOperand(0)));
          break;
        }
        
@@ -2842,9 +2890,19 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
  
          unsigned numSubst = 0;
          for (unsigned i=0, N=mvec.size(); i < N; ++i) {
+
+          // Make sure we substitute all occurrences of dest in these instrs.
+          // Otherwise, we will have bogus code.
            bool someArgsWereIgnored = false;
-          numSubst += mvec[i]->substituteValue(dest, tmpI, /*defsOnly*/ true,
-                                               /*defsAndUses*/ false,
+
+          // Make sure not to substitute an upwards-exposed use -- that would
+          // introduce a use of `tmpI' with no preceding def.  Therefore,
+          // substitute a use or def-and-use operand only if a previous def
+          // operand has already been substituted (i.e., numSusbt > 0).
+          // 
+          numSubst += mvec[i]->substituteValue(dest, tmpI,
+                                               /*defsOnly*/ numSubst == 0,
+                                               /*notDefsAndUses*/ numSubst > 0,
                                                 someArgsWereIgnored);
            assert(!someArgsWereIgnored &&
                   "Operand `dest' exists but not replaced: probably bogus!");
@@ -2852,7 +2910,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
          assert(numSubst > 0 && "Operand `dest' not replaced: probably bogus!");
  
          // Left shift 32-N if size (N) is less than 32 bits.
-        // Use another tmp. virtual registe to represent this result.
+        // Use another tmp. virtual register to represent this result.
          if (destSize < 4) {
            srlArgToUse = new TmpInstruction(mcfi, dest->getType(),
                                             tmpI, NULL, "maskHi2");