[DAG] Pass the argument list to the CallLoweringInfo via move semantics. NFCI.

[oota-llvm.git] / lib / Target / Mips / MipsISelLowering.cpp
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp

index 88210a48b8d0bf2a1b981332b678751f7b18b9c4..9b95664c044cbca026838824c6ecee6cb0bf2e81 100644 (file)
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -287,7 +287,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
    setOperationAction(ISD::BR_CC,             MVT::f64,   Expand);
    setOperationAction(ISD::BR_CC,             MVT::i32,   Expand);
    setOperationAction(ISD::BR_CC,             MVT::i64,   Expand);
-  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::i32,   Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::i64,   Expand);
    setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
    setOperationAction(ISD::UINT_TO_FP,        MVT::i64,   Expand);
    setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
@@ -352,20 +353,21 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
  
    setInsertFencesForAtomic(true);
  
-  if (!Subtarget->hasSEInReg()) {
+  if (!Subtarget->hasMips32r2()) {
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    }
  
-  if (!Subtarget->hasBitCount()) {
+  // MIPS16 lacks MIPS32's clz and clo instructions.
+  if (!Subtarget->hasMips32() || Subtarget->inMips16Mode())
      setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+  if (!Subtarget->hasMips64())
      setOperationAction(ISD::CTLZ, MVT::i64, Expand);
-  }
  
-  if (!Subtarget->hasSwap()) {
+  if (!Subtarget->hasMips32r2())
      setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  if (!Subtarget->hasMips64r2())
      setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-  }
  
    if (isGP64bit()) {
      setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
@@ -814,10 +816,10 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
    return VReg;
  }
  
-static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
-                                          MachineBasicBlock &MBB,
-                                          const TargetInstrInfo &TII,
-                                          bool Is64Bit) {
+static MachineBasicBlock *insertDivByZeroTrap(MachineInstr *MI,
+                                              MachineBasicBlock &MBB,
+                                              const TargetInstrInfo &TII,
+                                              bool Is64Bit) {
    if (NoZeroDivCheck)
      return &MBB;
  
@@ -835,6 +837,10 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
  
    // Clear Divisor's kill flag.
    Divisor.setIsKill(false);
+
+  // We would normally delete the original instruction here but in this case
+  // we only needed to inject an additional instruction rather than replace it.
+
    return &MBB;
  }
  
@@ -917,10 +923,22 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      return emitAtomicCmpSwap(MI, BB, 8);
    case Mips::PseudoSDIV:
    case Mips::PseudoUDIV:
-    return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), false);
+  case Mips::DIV:
+  case Mips::DIVU:
+  case Mips::MOD:
+  case Mips::MODU:
+    return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+                               false);
    case Mips::PseudoDSDIV:
    case Mips::PseudoDUDIV:
-    return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), true);
+  case Mips::DDIV:
+  case Mips::DDIVU:
+  case Mips::DMOD:
+  case Mips::DMODU:
+    return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+                               true);
+  case Mips::SEL_D:
+    return emitSEL_D(MI, BB);
    }
  }
  
@@ -940,16 +958,20 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    unsigned LL, SC, AND, NOR, ZERO, BEQ;
  
    if (Size == 4) {
-    LL = isMicroMips ? Mips::LL_MM : Mips::LL;
-    SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+    if (isMicroMips) {
+      LL = Mips::LL_MM;
+      SC = Mips::SC_MM;
+    } else {
+      LL = Subtarget->hasMips32r6() ? Mips::LL : Mips::LL_R6;
+      SC = Subtarget->hasMips32r6() ? Mips::SC : Mips::SC_R6;
+    }
      AND = Mips::AND;
      NOR = Mips::NOR;
      ZERO = Mips::ZERO;
      BEQ = Mips::BEQ;
-  }
-  else {
-    LL = Mips::LLD;
-    SC = Mips::SCD;
+  } else {
+    LL = Subtarget->hasMips64r6() ? Mips::LLD : Mips::LLD_R6;
+    SC = Subtarget->hasMips64r6() ? Mips::SCD : Mips::SCD_R6;
      AND = Mips::AND64;
      NOR = Mips::NOR64;
      ZERO = Mips::ZERO_64;
@@ -1011,11 +1033,39 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    return exitMBB;
  }
  
-MachineBasicBlock *
-MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
-                                             MachineBasicBlock *BB,
-                                             unsigned Size, unsigned BinOpcode,
-                                             bool Nand) const {
+MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
+    MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
+    unsigned SrcReg) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  if (Subtarget->hasMips32r2() && Size == 1) {
+    BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg);
+    return BB;
+  }
+
+  if (Subtarget->hasMips32r2() && Size == 2) {
+    BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg);
+    return BB;
+  }
+
+  MachineFunction *MF = BB->getParent();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  unsigned ScrReg = RegInfo.createVirtualRegister(RC);
+
+  assert(Size < 32);
+  int64_t ShiftImm = 32 - (Size * 8);
+
+  BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm);
+  BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm);
+
+  return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
+    MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+    bool Nand) const {
    assert((Size == 1 || Size == 2) &&
           "Unsupported size for EmitAtomicBinaryPartial.");
  
@@ -1045,7 +1095,6 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
    unsigned StoreVal = RegInfo.createVirtualRegister(RC);
    unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
    unsigned SrlRes = RegInfo.createVirtualRegister(RC);
-  unsigned SllRes = RegInfo.createVirtualRegister(RC);
    unsigned Success = RegInfo.createVirtualRegister(RC);
  
    // insert new blocks after the current block
@@ -1151,19 +1200,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
    //  sinkMBB:
    //    and     maskedoldval1,oldval,mask
    //    srl     srlres,maskedoldval1,shiftamt
-  //    sll     sllres,srlres,24
-  //    sra     dest,sllres,24
+  //    sign_extend dest,srlres
    BB = sinkMBB;
-  int64_t ShiftImm = (Size == 1) ? 24 : 16;
  
    BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
      .addReg(OldVal).addReg(Mask);
    BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
        .addReg(MaskedOldVal1).addReg(ShiftAmt);
-  BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
-      .addReg(SrlRes).addImm(ShiftImm);
-  BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
-      .addReg(SllRes).addImm(ShiftImm);
+  BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
  
    MI->eraseFromParent(); // The instruction is gone now.
  
@@ -1284,7 +1328,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
    unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
    unsigned StoreVal = RegInfo.createVirtualRegister(RC);
    unsigned SrlRes = RegInfo.createVirtualRegister(RC);
-  unsigned SllRes = RegInfo.createVirtualRegister(RC);
    unsigned Success = RegInfo.createVirtualRegister(RC);
  
    // insert new blocks after the current block
@@ -1381,23 +1424,44 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
  
    //  sinkMBB:
    //    srl     srlres,maskedoldval0,shiftamt
-  //    sll     sllres,srlres,24
-  //    sra     dest,sllres,24
+  //    sign_extend dest,srlres
    BB = sinkMBB;
-  int64_t ShiftImm = (Size == 1) ? 24 : 16;
  
    BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
        .addReg(MaskedOldVal0).addReg(ShiftAmt);
-  BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
-      .addReg(SrlRes).addImm(ShiftImm);
-  BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
-      .addReg(SllRes).addImm(ShiftImm);
+  BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
  
    MI->eraseFromParent();   // The instruction is gone now.
  
    return exitMBB;
  }
  
+MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineBasicBlock::iterator II(MI);
+
+  unsigned Fc = MI->getOperand(1).getReg();
+  const auto &FGR64RegClass = TRI->getRegClass(Mips::FGR64RegClassID);
+
+  unsigned Fc2 = RegInfo.createVirtualRegister(FGR64RegClass);
+
+  BuildMI(*BB, II, DL, TII->get(Mips::SUBREG_TO_REG), Fc2)
+      .addImm(0)
+      .addReg(Fc)
+      .addImm(Mips::sub_lo);
+
+  // We don't erase the original instruction, we just replace the condition
+  // register with the 64-bit super-register.
+  MI->getOperand(1).setReg(Fc2);
+
+  return BB;
+}
+
  //===----------------------------------------------------------------------===//
  //  Misc Lower Operation implementation
  //===----------------------------------------------------------------------===//
@@ -1438,6 +1502,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
    SDValue Dest = Op.getOperand(2);
    SDLoc DL(Op);
  
+  assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
    SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
  
    // Return if flag is not set by a floating point comparison.
@@ -1457,6 +1522,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
  SDValue MipsTargetLowering::
  lowerSELECT(SDValue Op, SelectionDAG &DAG) const
  {
+  assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
    SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
  
    // Return if flag is not set by a floating point comparison.
@@ -1482,6 +1548,7 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
  }
  
  SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
    SDValue Cond = createFPCmp(DAG, Op);
  
    assert(Cond.getOpcode() == MipsISD::FPCmp &&
@@ -1576,11 +1643,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
      Entry.Ty = PtrTy;
      Args.push_back(Entry);
  
-    TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
-                  false, false, false, false, 0, CallingConv::C,
-                  /*IsTailCall=*/false, /*doesNotRet=*/false,
-                  /*isReturnValueUsed=*/true,
-                  TlsGetAddr, Args, DAG, DL);
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
+      .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args), 0);
      std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
  
      SDValue Ret = CallResult.first;
@@ -1876,7 +1941,7 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
    Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
  
    SDValue Ops[2] = {Lo, Hi};
-  return DAG.getMergeValues(Ops, 2, DL);
+  return DAG.getMergeValues(Ops, DL);
  }
  
  SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
@@ -1917,7 +1982,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
                     ShiftRightHi);
  
    SDValue Ops[2] = {Lo, Hi};
-  return DAG.getMergeValues(Ops, 2, DL);
+  return DAG.getMergeValues(Ops, DL);
  }
  
  static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
@@ -1942,6 +2007,9 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    LoadSDNode *LD = cast<LoadSDNode>(Op);
    EVT MemVT = LD->getMemoryVT();
  
+  if (Subtarget->systemSupportsUnalignedAccess())
+    return Op;
+
    // Return if load is aligned or if MemVT is neither i32 nor i64.
    if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
        ((MemVT != MVT::i32) && (MemVT != MVT::i64)))
@@ -1996,7 +2064,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32);
    SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32);
    SDValue Ops[] = { SRL, LWR.getValue(1) };
-  return DAG.getMergeValues(Ops, 2, DL);
+  return DAG.getMergeValues(Ops, DL);
  }
  
  static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
@@ -2065,7 +2133,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    EVT MemVT = SD->getMemoryVT();
  
    // Lower unaligned integer stores.
-  if ((SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
+  if (!Subtarget->systemSupportsUnalignedAccess() &&
+      (SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
        ((MemVT == MVT::i32) || (MemVT == MVT::i64)))
      return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle());
  
@@ -2328,7 +2397,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
  
    MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
                                   Subtarget->mipsSEUsesSoftFloat(),
-                                 Callee.getNode(), CLI.Args);
+                                 Callee.getNode(), CLI.getArgs());
  
    // Get a count of how many bytes are to be pushed on the stack.
    unsigned NextStackOffset = CCInfo.getNextStackOffset();
@@ -2661,18 +2730,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
      }
    }
  
-  // The mips ABIs for returning structs by value requires that we copy
-  // the sret argument into $v0 for the return. Save the argument into
-  // a virtual register so that we can access it from the return points.
-  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
-    unsigned Reg = MipsFI->getSRetReturnReg();
-    if (!Reg) {
-      Reg = MF.getRegInfo().createVirtualRegister(
-          getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
-      MipsFI->setSRetReturnReg(Reg);
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    // The mips ABIs for returning structs by value requires that we copy
+    // the sret argument into $v0 for the return. Save the argument into
+    // a virtual register so that we can access it from the return points.
+    if (Ins[i].Flags.isSRet()) {
+      unsigned Reg = MipsFI->getSRetReturnReg();
+      if (!Reg) {
+        Reg = MF.getRegInfo().createVirtualRegister(
+            getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
+        MipsFI->setSRetReturnReg(Reg);
+      }
+      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
+      break;
      }
-    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
    }
  
    if (IsVarArg)
@@ -3268,7 +3340,7 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
        dbgs() << "Call operand #" << I << " has unhandled type "
               << EVT(ArgVT).getEVTString();
  #endif
-      llvm_unreachable(0);
+      llvm_unreachable(nullptr);
      }
    }
  }
@@ -3300,7 +3372,7 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
      dbgs() << "Formal Arg #" << I << " has unhandled type "
             << EVT(ArgVT).getEVTString();
  #endif
-    llvm_unreachable(0);
+    llvm_unreachable(nullptr);
    }
  }
  
@@ -3325,7 +3397,7 @@ analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
        dbgs() << "Call result #" << I << " has unhandled type "
               << EVT(VT).getEVTString() << '\n';
  #endif
-      llvm_unreachable(0);
+      llvm_unreachable(nullptr);
      }
    }
  }
@@ -3483,21 +3555,22 @@ passByValArg(SDValue Chain, SDLoc DL,
               MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
               const MipsCC &CC, const ByValArgInfo &ByVal,
               const ISD::ArgFlagsTy &Flags, bool isLittle) const {
-  unsigned ByValSize = Flags.getByValSize();
-  unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
-  unsigned RegSize = CC.regSize();
-  unsigned Alignment = std::min(Flags.getByValAlign(), RegSize);
-  EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8);
+  unsigned ByValSizeInBytes = Flags.getByValSize();
+  unsigned OffsetInBytes = 0; // From beginning of struct
+  unsigned RegSizeInBytes = CC.regSize();
+  unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
+  EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
  
    if (ByVal.NumRegs) {
      const MCPhysReg *ArgRegs = CC.intArgRegs();
-    bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize);
+    bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes);
      unsigned I = 0;
  
      // Copy words to registers.
-    for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) {
+    for (; I < ByVal.NumRegs - LeftoverBytes;
+         ++I, OffsetInBytes += RegSizeInBytes) {
        SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                    DAG.getConstant(Offset, PtrTy));
+                                    DAG.getConstant(OffsetInBytes, PtrTy));
        SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
                                      MachinePointerInfo(), false, false, false,
                                      Alignment);
@@ -3507,38 +3580,38 @@ passByValArg(SDValue Chain, SDLoc DL,
      }
  
      // Return if the struct has been fully copied.
-    if (ByValSize == Offset)
+    if (ByValSizeInBytes == OffsetInBytes)
        return;
  
      // Copy the remainder of the byval argument with sub-word loads and shifts.
      if (LeftoverBytes) {
-      assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) &&
-             "Size of the remainder should be smaller than RegSize.");
+      assert((ByValSizeInBytes > OffsetInBytes) &&
+             (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) &&
+             "Size of the remainder should be smaller than RegSizeInBytes.");
        SDValue Val;
  
-      for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0;
-           Offset < ByValSize; LoadSize /= 2) {
-        unsigned RemSize = ByValSize - Offset;
+      for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
+           OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) {
+        unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes;
  
-        if (RemSize < LoadSize)
+        if (RemainingSizeInBytes < LoadSizeInBytes)
            continue;
  
          // Load subword.
          SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                                      DAG.getConstant(Offset, PtrTy));
-        SDValue LoadVal =
-          DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr,
-                         MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
-                         false, false, Alignment);
+                                      DAG.getConstant(OffsetInBytes, PtrTy));
+        SDValue LoadVal = DAG.getExtLoad(
+            ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
+            MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment);
          MemOpChains.push_back(LoadVal.getValue(1));
  
          // Shift the loaded value.
          unsigned Shamt;
  
          if (isLittle)
-          Shamt = TotalSizeLoaded;
+          Shamt = TotalBytesLoaded * 8;
          else
-          Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8;
+          Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8;
  
          SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
                                      DAG.getConstant(Shamt, MVT::i32));
@@ -3548,9 +3621,9 @@ passByValArg(SDValue Chain, SDLoc DL,
          else
            Val = Shift;
  
-        Offset += LoadSize;
-        TotalSizeLoaded += LoadSize;
-        Alignment = std::min(Alignment, LoadSize);
+        OffsetInBytes += LoadSizeInBytes;
+        TotalBytesLoaded += LoadSizeInBytes;
+        Alignment = std::min(Alignment, LoadSizeInBytes);
        }
  
        unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
@@ -3560,9 +3633,9 @@ passByValArg(SDValue Chain, SDLoc DL,
    }
  
    // Copy remainder of byval arg to it with memcpy.
-  unsigned MemCpySize = ByValSize - Offset;
+  unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes;
    SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
-                            DAG.getConstant(Offset, PtrTy));
+                            DAG.getConstant(OffsetInBytes, PtrTy));
    SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
                              DAG.getIntPtrConstant(ByVal.Address));
    Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),