rk: move cpu_axi.h from arch/arm/mach-rockchip to include/linux/rockchip
[firefly-linux-kernel-4.4.55.git] / arch / arm / mach-rockchip / ddr_rk32.c
index 31383a5a4976b5546541d3122d4a2cf04f7ef2be..c11c2bbbb8da94acd7f742b4f70c75b53e4c5c40 100755 (executable)
 #include <asm/tlbflush.h>
 #include <linux/cpu.h>
 #include <dt-bindings/clock/ddr.h>
+#include <linux/rockchip/cpu_axi.h>
 #include <linux/rockchip/cru.h>
 #include <linux/rk_fb.h>
-#include "cpu_axi.h"
 
 typedef uint32_t uint32;
 
+#ifdef CONFIG_FB_ROCKCHIP
 #define DDR_CHANGE_FREQ_IN_LCDC_VSYNC
+#endif
 /***********************************
  * Global Control Macro
  ***********************************/
@@ -32,7 +34,7 @@ typedef uint32_t uint32;
 
 #define DDR3_DDR2_ODT_DISABLE_FREQ    (333)
 #define DDR3_DDR2_DLL_DISABLE_FREQ    (333)
-#define SR_IDLE                       (0x1)   //unit:32*DDR clk cycle, and 0 for disable auto self-refresh
+#define SR_IDLE                       (0x3)   //unit:32*DDR clk cycle, and 0 for disable auto self-refresh
 #define PD_IDLE                       (0X40)  //unit:DDR clk cycle, and 0 for disable auto power-down
 
 //#if (DDR3_DDR2_ODT_DISABLE_FREQ > DDR3_DDR2_DLL_DISABLE_FREQ)
@@ -1378,6 +1380,7 @@ typedef struct CHANNEL_INFO_Tag
     DRAM_TYPE     mem_type; // =DRAM_MAX, channel invalid
     uint32        ddr_speed_bin;    // used for ddr3 only
     uint32        ddr_capability_per_die;  // one chip cs capability
+    uint32        dtt_cs;  //data training cs
 }CH_INFO,*pCH_INFO;
 
 struct ddr_freq_t {
@@ -1446,7 +1449,9 @@ static __sramdata uint32 clkr;
 static __sramdata uint32 clkf;
 static __sramdata uint32 clkod;
 uint32 DEFINE_PIE_DATA(ddr_select_gpll_div); // 0-Disable, 1-1:1, 2-2:1, 4-4:1
+#if defined(ENABLE_DDR_CLCOK_GPLL_PATH)
 static uint32 *p_ddr_select_gpll_div;
+#endif
 
 static void __sramfunc ddr_delayus(uint32 us);
 
@@ -1556,18 +1561,18 @@ static uint32 (*p_ddr_set_pll)(uint32 nMHz, uint32 set);
 static void __sramfunc idle_port(void)
 {
     register int i,j;
-    uint32 clk_gate[14];
+    uint32 clk_gate[19];
 
     pPMU_Reg->PMU_IDLE_REQ |= idle_req_core_cfg;
     dsb();
     while( (pPMU_Reg->PMU_IDLE_ST & idle_core) == 0 );
 
     //save clock gate status
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         clk_gate[i]=pCRU_Reg->CRU_CLKGATE_CON[i];
 
     //enable all clock gate for request idle
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         pCRU_Reg->CRU_CLKGATE_CON[i]=0xffff0000;
 
     i = pPMU_Reg->PMU_PWRDN_ST;
@@ -1603,21 +1608,21 @@ static void __sramfunc idle_port(void)
     while( (pPMU_Reg->PMU_IDLE_ST & j) != j );
 
     //resume clock gate status
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         pCRU_Reg->CRU_CLKGATE_CON[i]=  (clk_gate[i] | 0xffff0000);
 }
 
-static void __sramfunc deidle_port(void)
+static void inline deidle_port(void)
 {
     register int i,j;
-    uint32 clk_gate[14];
+    uint32 clk_gate[19];
 
     //save clock gate status
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         clk_gate[i]=pCRU_Reg->CRU_CLKGATE_CON[i];
 
     //enable all clock gate for request idle
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         pCRU_Reg->CRU_CLKGATE_CON[i]=0xffff0000;
 
     i = pPMU_Reg->PMU_PWRDN_ST;
@@ -1657,7 +1662,7 @@ static void __sramfunc deidle_port(void)
     while( (pPMU_Reg->PMU_IDLE_ST & idle_core) != 0 );
 
     //resume clock gate status
-    for(i=0;i<14;i++)
+    for(i=0;i<19;i++)
         pCRU_Reg->CRU_CLKGATE_CON[i]=  (clk_gate[i] | 0xffff0000);
 
 }
@@ -1712,6 +1717,7 @@ static void ddr_get_datatraing_addr(uint32 *pdtar)
     uint32          bank;
     uint32          bw;
     uint32          conf;
+    uint32          cap1;
 
     for(ch=0,chCnt=0;ch<CH_MAX;ch++)
     {
@@ -1719,6 +1725,7 @@ static void ddr_get_datatraing_addr(uint32 *pdtar)
         {
             chCnt++;
         }
+        p_ddr_ch[ch]->dtt_cs = 0;
     }
 
     // caculate aglined physical address
@@ -1748,7 +1755,23 @@ static void ddr_get_datatraing_addr(uint32 *pdtar)
             socAddr[1] = addr + strideSize;
         }
         ddr_print("socAddr[0]=0x%x, socAddr[1]=0x%x\n", socAddr[0], socAddr[1]);
-        if((stride >= 0x10) && (stride <= 0x13))  // 3GB stride
+        if(stride < 4)
+        {
+                cap1 = (1 << (READ_ROW_INFO(1,0)+READ_COL_INFO(1)+READ_BK_INFO(1)+READ_BW_INFO(1)));
+               if(READ_CS_INFO(1) > 1)
+               {
+                   cap1 += cap1 >> (READ_ROW_INFO(1,0)-READ_ROW_INFO(1,1));
+               }
+               if(READ_CH_ROW_INFO(1))
+               {
+                   cap1 = cap1*3/4;
+               }
+                chAddr[0] = addr;
+                chAddr[1] = cap1 - PAGE_SIZE;
+                if(READ_CS_INFO(1) > 1)
+                        p_ddr_ch[1]->dtt_cs = 1;
+        }
+        else if((stride >= 0x10) && (stride <= 0x13))  // 3GB stride
         {
             //conver to ch addr
             if(addr < 0x40000000)
@@ -1786,13 +1809,13 @@ static void ddr_get_datatraing_addr(uint32 *pdtar)
                 chAddr[1] = socAddr[1] - halfCap;
             }
         }
-        ddr_print("chAddr[0]=0x%x, chAddr[1]=0x%x\n", chAddr[0], chAddr[1]);
     }
     else
     {
         chAddr[0] = addr;
         chAddr[1] = addr;
     }
+    ddr_print("chAddr[0]=0x%x, chAddr[1]=0x%x\n", chAddr[0], chAddr[1]);
 
     for(ch=0,chCnt=0;ch<CH_MAX;ch++)
     {
@@ -1990,7 +2013,10 @@ static uint32 __sramfunc ddr_data_training_trigger(uint32 ch)
     // clear DTDONE status
     pPHY_Reg->PIR |= CLRSR;
     cs = ((pPHY_Reg->PGCR>>18) & 0xF);
-    pPHY_Reg->PGCR = (pPHY_Reg->PGCR & (~(0xF<<18))) | (1<<18);  //use cs0 dtt
+    if(DATA(ddr_ch[ch]).dtt_cs == 0)
+            pPHY_Reg->PGCR = (pPHY_Reg->PGCR & (~(0xF<<18))) | (1<<18);  //use cs0 dtt
+    else
+            pPHY_Reg->PGCR = (pPHY_Reg->PGCR & (~(0xF<<18))) | (2<<18);  //use cs1 dtt
     // trigger DTT
     pPHY_Reg->PIR |= INIT | QSTRN | LOCKBYP | ZCALBYP | CLRSR | ICPC;
     return cs;
@@ -2000,25 +2026,39 @@ static uint32 __sramfunc ddr_data_training_trigger(uint32 ch)
 //!0 DTTʧ°Ü
 static uint32 __sramfunc ddr_data_training(uint32 ch, uint32 cs)
 {
-    uint32        i,byte;
+    uint32        i,byte=2,cs_msk;
     pDDR_REG_T    pDDR_Reg = DATA(ddr_ch[ch]).pDDR_Reg;
     pDDRPHY_REG_T pPHY_Reg = DATA(ddr_ch[ch]).pPHY_Reg;
-    
+
+    if(DATA(ddr_ch[ch]).dtt_cs == 0){
+        cs_msk = 1;
+    }else{
+        cs_msk = 2;
+    }
     // wait echo byte DTDONE
-    while((pPHY_Reg->DATX8[0].DXGSR[0] & 1) != 1);
-    while((pPHY_Reg->DATX8[1].DXGSR[0] & 1) != 1);
+    while((pPHY_Reg->DATX8[0].DXGSR[0] & cs_msk) != cs_msk);
+    while((pPHY_Reg->DATX8[1].DXGSR[0] & cs_msk) != cs_msk);
     if(!(pDDR_Reg->PPCFG & 1))
     {
-        while((pPHY_Reg->DATX8[2].DXGSR[0] & 1) != 1);
-        while((pPHY_Reg->DATX8[3].DXGSR[0] & 1) != 1);
+        while((pPHY_Reg->DATX8[2].DXGSR[0] & cs_msk) != cs_msk);
+        while((pPHY_Reg->DATX8[3].DXGSR[0] & cs_msk) != cs_msk);
         byte=4;
     }
     pPHY_Reg->PGCR = (pPHY_Reg->PGCR & (~(0xF<<18))) | (cs<<18);  //restore cs
-    for(i=0;i<byte;i++)
-    {
-        pPHY_Reg->DATX8[i].DXDQSTR = (pPHY_Reg->DATX8[i].DXDQSTR & (~((0x7<<3)|(0x3<<14))))
-                                      | ((pPHY_Reg->DATX8[i].DXDQSTR & 0x7)<<3)
-                                      | (((pPHY_Reg->DATX8[i].DXDQSTR>>12) & 0x3)<<14);
+    if(DATA(ddr_ch[ch]).dtt_cs == 0){
+        for(i=0;i<byte;i++)
+        {
+            pPHY_Reg->DATX8[i].DXDQSTR = (pPHY_Reg->DATX8[i].DXDQSTR & (~((0x7<<3)|(0x3<<14))))\
+                                          | ((pPHY_Reg->DATX8[i].DXDQSTR & 0x7)<<3)\
+                                          | (((pPHY_Reg->DATX8[i].DXDQSTR>>12) & 0x3)<<14);
+        }
+    }else{
+        for(i=0;i<byte;i++)
+        {
+            pPHY_Reg->DATX8[i].DXDQSTR = (pPHY_Reg->DATX8[i].DXDQSTR & (~((0x7<<0)|(0x3<<12))))\
+                                          | ((pPHY_Reg->DATX8[i].DXDQSTR>>3) & 0x7)\
+                                          | (((pPHY_Reg->DATX8[i].DXDQSTR>>14) & 0x3)<<12);
+        }
     }
     // send some auto refresh to complement the lost while DTT£¬//²âµ½1¸öCSµÄDTT×ʱ¼äÊÇ10.7us¡£×î¶à²¹2´ÎË¢ÐÂ
     if(cs > 1)
@@ -2052,6 +2092,7 @@ static uint32 __sramfunc ddr_data_training(uint32 ch, uint32 cs)
     }
 }
 
+
 static void __sramfunc ddr_set_dll_bypass(uint32 ch, uint32 freq)
 {
     pDDR_REG_T    pDDR_Reg = DATA(ddr_ch[ch]).pDDR_Reg;
@@ -2102,8 +2143,8 @@ static noinline uint32 ddr_get_parameter(uint32 nMHz)
     uint32 cwl;
     PCTL_TIMING_T *p_pctl_timing=&(p_ddr_reg->pctl.pctl_timing);
     PHY_TIMING_T  *p_publ_timing=&(p_ddr_reg->publ.phy_timing);
-    NOC_TIMING_T  *p_noc_timing=&(p_ddr_reg->noc[0].ddrtiming);
-    NOC_ACTIVATE_T  *p_noc_activate=&(p_ddr_reg->noc[0].activate);
+    volatile NOC_TIMING_T  *p_noc_timing=&(p_ddr_reg->noc[0].ddrtiming);
+    volatile NOC_ACTIVATE_T  *p_noc_activate=&(p_ddr_reg->noc[0].activate);
     uint32 ch;
     uint32 mem_type;
     uint32 ddr_speed_bin=DDR3_DEFAULT;
@@ -2543,14 +2584,7 @@ static noinline uint32 ddr_get_parameter(uint32 nMHz)
         uint32 twr_tmp;
 
         al = 0;
-        if(nMHz>=200)
-        {
-            bl = 4;  //you can change burst here
-        }
-        else
-        {
-            bl = 8;  // freq < 200MHz, BL fixed 8
-        }
+        bl = 8;
         /*     1066 933 800 667 533 400 333
          * RL,   8   7   6   5   4   3   3
          * WL,   4   4   3   2   2   1   1
@@ -3278,8 +3312,8 @@ static uint32 __sramfunc ddr_update_timing(uint32 ch)
     uint32 i,bl_tmp=0;
     PCTL_TIMING_T *p_pctl_timing=&(DATA(ddr_reg).pctl.pctl_timing);
     PHY_TIMING_T  *p_publ_timing=&(DATA(ddr_reg).publ.phy_timing);
-    NOC_TIMING_T  *p_noc_timing=&(DATA(ddr_reg).noc[0].ddrtiming);    
-    NOC_ACTIVATE_T  *p_noc_activate=&(DATA(ddr_reg).noc[0].activate);
+    volatile NOC_TIMING_T  *p_noc_timing=&(DATA(ddr_reg).noc[0].ddrtiming);
+    volatile NOC_ACTIVATE_T  *p_noc_activate=&(DATA(ddr_reg).noc[0].activate);
     pDDR_REG_T    pDDR_Reg = DATA(ddr_ch[ch]).pDDR_Reg;
     pDDRPHY_REG_T pPHY_Reg = DATA(ddr_ch[ch]).pPHY_Reg;
     pMSCH_REG     pMSCH_Reg= DATA(ddr_ch[ch]).pMSCH_Reg;
@@ -3450,101 +3484,6 @@ static void __sramfunc ddr_update_odt(uint32 ch)
     dsb();
 }
 
-void PIE_FUNC(ddr_adjust_config)(void *arg)
-{
-    uint32 value[CH_MAX];
-    uint32 ch;
-    pDDR_REG_T    pDDR_Reg;
-    pDDRPHY_REG_T pPHY_Reg;
-
-    for(ch=0;ch<CH_MAX;ch++)
-    {
-        if(DATA(ddr_ch[ch]).mem_type != DRAM_MAX)
-        {
-            value[ch] = ((uint32 *)arg)[ch];
-            pDDR_Reg = DATA(ddr_ch[ch]).pDDR_Reg;
-            pPHY_Reg = DATA(ddr_ch[ch]).pPHY_Reg;
-            
-            //enter config state
-            ddr_move_to_Config_state(ch);
-
-            //set data training address
-            pPHY_Reg->DTAR = value[ch];
-
-            //set auto power down idle
-            pDDR_Reg->MCFG=(pDDR_Reg->MCFG&0xffff00ff)|(PD_IDLE<<8);
-
-            //CKDV=00
-            pPHY_Reg->PGCR &= ~(0x3<<12);
-
-            //enable the hardware low-power interface
-            pDDR_Reg->SCFG.b.hw_low_power_en = 1;
-
-            if(pDDR_Reg->PPCFG & 1)
-            {
-                pPHY_Reg->DATX8[2].DXGCR &= ~(1);          //disable byte
-                pPHY_Reg->DATX8[3].DXGCR &= ~(1);
-                pPHY_Reg->DATX8[2].DXDLLCR |= 0x80000000;  //disable DLL
-                pPHY_Reg->DATX8[3].DXDLLCR |= 0x80000000;
-            }
-
-            ddr_update_odt(ch);
-
-            //enter access state
-            ddr_move_to_Access_state(ch);
-        }
-    }
-}
-EXPORT_PIE_SYMBOL(FUNC(ddr_adjust_config));
-
-static void ddr_adjust_config(void)
-{
-    uint32 dtar[CH_MAX];
-    uint32 i;
-    volatile uint32 n;
-    volatile unsigned int * temp=(volatile unsigned int *)SRAM_CODE_OFFSET;
-
-    //get data training address before idle port
-    ddr_get_datatraing_addr(dtar);
-
-    /** 1. Make sure there is no host access */
-    flush_cache_all();
-    outer_flush_all();
-    flush_tlb_all();
-    isb();
-
-    for(i=0;i<SRAM_SIZE/4096;i++)
-    {
-        n=temp[1024*i];
-        barrier();
-    }
-    for(i=0;i<CH_MAX;i++)
-    {
-        if(p_ddr_ch[i]->mem_type != DRAM_MAX)
-        {
-            n= p_ddr_ch[i]->pDDR_Reg->SCFG.d32;
-            n= p_ddr_ch[i]->pPHY_Reg->RIDR;
-            n= p_ddr_ch[i]->pMSCH_Reg->ddrconf;
-        }
-    }
-    n= pCRU_Reg->CRU_PLL_CON[0][0];
-    n= pPMU_Reg->PMU_WAKEUP_CFG[0];
-    n= READ_GRF_REG();
-    dsb();
-
-    call_with_stack(fn_to_pie(rockchip_pie_chunk, &FUNC(ddr_adjust_config)),
-                    (void *)dtar,
-                    rockchip_sram_stack);
-    //disable unused channel
-    for(i=0;i<CH_MAX;i++)
-    {
-        if(p_ddr_ch[i]->mem_type != DRAM_MAX)
-        {
-            //FIXME
-        }
-    }
-}
-
 static void __sramfunc ddr_selfrefresh_enter(uint32 nMHz)
 {
     uint32 ch;
@@ -3735,8 +3674,12 @@ void PIE_FUNC(ddr_change_freq_sram)(void *arg)
 }
 EXPORT_PIE_SYMBOL(FUNC(ddr_change_freq_sram));
 
-static int dclk_div;
-static noinline uint32 ddr_change_freq_sram(uint32 nMHz , struct ddr_freq_t ddr_freq_t)
+typedef struct freq_tag{
+    uint32_t nMHz;
+    struct ddr_freq_t *p_ddr_freq_t;
+}freq_t;
+
+static noinline uint32 ddr_change_freq_sram(void *arg)
 {
     uint32 freq;
     uint32 freq_slew=0;
@@ -3747,6 +3690,14 @@ static noinline uint32 ddr_change_freq_sram(uint32 nMHz , struct ddr_freq_t ddr_
     volatile unsigned int * temp=(volatile unsigned int *)SRAM_CODE_OFFSET;
     uint32 i;
     uint32 gpllvaluel;
+    freq_t *p_freq_t=(freq_t *)arg;    
+    uint32 nMHz=p_freq_t->nMHz;
+       static struct rk_screen screen;
+       static int dclk_div, down_dclk_div;
+
+#if defined (DDR_CHANGE_FREQ_IN_LCDC_VSYNC)
+    struct ddr_freq_t *p_ddr_freq_t=p_freq_t->p_ddr_freq_t;
+#endif
 
 #if defined(CONFIG_ARCH_RK3066B)
     if(dqstr_flag==true)
@@ -3755,9 +3706,14 @@ static noinline uint32 ddr_change_freq_sram(uint32 nMHz , struct ddr_freq_t ddr_
         freq_slew = (nMHz>ddr_freq)? 1 : 0;
     }
 #endif
-
-    dclk_div = (cru_readl(RK3288_CRU_CLKSELS_CON(29)) >> 8) & 0xff;
-
+       if (!screen.mode.pixclock) {
+               rk_fb_get_prmry_screen(&screen);
+               if (screen.lcdc_id == 0)
+                       dclk_div = (cru_readl(RK3288_CRU_CLKSELS_CON(27)) >> 8) & 0xff;
+               else if (screen.lcdc_id == 1)
+                       dclk_div = (cru_readl(RK3288_CRU_CLKSELS_CON(29)) >> 8) & 0xff;
+               down_dclk_div = 64*(dclk_div+1)-1;
+       }
     param.arm_freq = ddr_get_pll_freq(APLL);
     gpllvaluel = ddr_get_pll_freq(GPLL);
     if((200 < gpllvaluel) ||( gpllvaluel <1600))      //GPLL:200MHz~1600MHz
@@ -3786,26 +3742,24 @@ static noinline uint32 ddr_change_freq_sram(uint32 nMHz , struct ddr_freq_t ddr_
     /** 1. Make sure there is no host access */
     local_irq_save(flags);
     local_fiq_disable();
-    flush_cache_all();
-    outer_flush_all();
     flush_tlb_all();
     isb();
 
 #if defined (DDR_CHANGE_FREQ_IN_LCDC_VSYNC)
-    if(ddr_freq_t.screen_ft_us > 0)
+    if(p_ddr_freq_t->screen_ft_us > 0)
     {
-        ddr_freq_t.t1 = cpu_clock(0);
-        ddr_freq_t.t2 = (uint32)(ddr_freq_t.t1 - ddr_freq_t.t0);   //ns
+        p_ddr_freq_t->t1 = cpu_clock(0);
+        p_ddr_freq_t->t2 = (uint32)(p_ddr_freq_t->t1 - p_ddr_freq_t->t0);   //ns
 
         //if test_count exceed maximum test times,ddr_freq_t.screen_ft_us == 0xfefefefe by ddr_freq.c
-        if( (ddr_freq_t.t2 > ddr_freq_t.screen_ft_us*1000) && (ddr_freq_t.screen_ft_us != 0xfefefefe))
+        if( (p_ddr_freq_t->t2 > p_ddr_freq_t->screen_ft_us*1000) && (p_ddr_freq_t->screen_ft_us != 0xfefefefe))
         {
             freq = 0;
             goto end;
         }
         else
         {
-            rk_fb_poll_wait_frame_complete();
+            rk_fb_poll_wait_frame_complete();
         }
     }
 #endif
@@ -3832,11 +3786,26 @@ static noinline uint32 ddr_change_freq_sram(uint32 nMHz , struct ddr_freq_t ddr_
     param.freq = freq;
     param.freq_slew = freq_slew;
     param.dqstr_value = dqstr_value;
-    cru_writel(0 |CRU_W_MSK_SETBITS(0xff,8,0xff), RK3288_CRU_CLKSELS_CON(29));
+       rk_fb_set_prmry_screen_status(SCREEN_PREPARE_DDR_CHANGE);
+       if (screen.lcdc_id == 0)
+               cru_writel(0 | CRU_W_MSK_SETBITS(down_dclk_div, 8, 0xff),
+                          RK3288_CRU_CLKSELS_CON(27));
+       else if (screen.lcdc_id == 1)
+               cru_writel(0 | CRU_W_MSK_SETBITS(down_dclk_div, 8, 0xff),
+                          RK3288_CRU_CLKSELS_CON(29));
+
     call_with_stack(fn_to_pie(rockchip_pie_chunk, &FUNC(ddr_change_freq_sram)),
                     &param,
-                    rockchip_sram_stack-(NR_CPUS-1)*PAUSE_CPU_STACK_SZIE);
-    cru_writel(0 |CRU_W_MSK_SETBITS(dclk_div,8,0xff), RK3288_CRU_CLKSELS_CON(29));
+                    rockchip_sram_stack-(NR_CPUS-1)*PAUSE_CPU_STACK_SIZE);
+
+       if (screen.lcdc_id == 0)
+               cru_writel(0 | CRU_W_MSK_SETBITS(dclk_div, 8, 0xff),
+               RK3288_CRU_CLKSELS_CON(27));
+       else if (screen.lcdc_id == 1)
+               cru_writel(0 | CRU_W_MSK_SETBITS(dclk_div, 8, 0xff),
+               RK3288_CRU_CLKSELS_CON(29));
+       rk_fb_set_prmry_screen_status(SCREEN_UNPREPARE_DDR_CHANGE);
+
 #if defined (DDR_CHANGE_FREQ_IN_LCDC_VSYNC)
 end:
 #endif
@@ -3893,37 +3862,259 @@ static uint32 ddr_change_freq_gpll_dpll(uint32 nMHz)
 }
 #endif
 
+bool DEFINE_PIE_DATA(cpu_pause[NR_CPUS]);
+volatile bool *DATA(p_cpu_pause);
+static inline bool is_cpu0_paused(unsigned int cpu) { smp_rmb(); return DATA(cpu_pause)[0]; }
+static inline void set_cpuX_paused(unsigned int cpu, bool pause) { DATA(cpu_pause)[cpu] = pause; smp_wmb(); }
+static inline bool is_cpuX_paused(unsigned int cpu) { smp_rmb(); return DATA(p_cpu_pause)[cpu]; }
+static inline void set_cpu0_paused(bool pause) { DATA(p_cpu_pause)[0] = pause; smp_wmb();}
+
+/* Do not use stack, safe on SMP */
+void PIE_FUNC(_pause_cpu)(void *arg)
+{       
+    unsigned int cpu = (unsigned int)arg;
+    
+    set_cpuX_paused(cpu, true);
+    while (is_cpu0_paused(cpu));
+    set_cpuX_paused(cpu, false);
+}
+
+static void pause_cpu(void *info)
+{
+    unsigned int cpu = raw_smp_processor_id();
+
+    call_with_stack(fn_to_pie(rockchip_pie_chunk, &FUNC(_pause_cpu)),
+            (void *)cpu,
+            rockchip_sram_stack-(cpu-1)*PAUSE_CPU_STACK_SIZE);
+}
+
+static void wait_cpu(void *info)
+{
+}
+
+static int call_with_single_cpu(u32 (*fn)(void *arg), void *arg)
+{
+       s64 now_ns, timeout_ns;
+       unsigned int cpu;
+       unsigned int this_cpu = smp_processor_id();
+       int ret = 0;
+
+       cpu_maps_update_begin();
+       local_bh_disable();
+
+       /* It should take much less than 1s to pause the cpus. It typically
+       * takes around 20us. */
+       timeout_ns = ktime_to_ns(ktime_add_ns(ktime_get(), NSEC_PER_SEC));
+       now_ns = ktime_to_ns(ktime_get());
+       set_cpu0_paused(true);
+       smp_call_function((smp_call_func_t)pause_cpu, NULL, 0);
+       for_each_online_cpu(cpu) {
+               if (cpu == this_cpu)
+                       continue;
+               while (!is_cpuX_paused(cpu) && (now_ns < timeout_ns))
+                       now_ns = ktime_to_ns(ktime_get());
+               if (now_ns >= timeout_ns) {
+                       pr_err("pause cpu %d timeout\n", cpu);
+                       ret = -EPERM;
+                       goto out;
+               }
+       }
+       ret = fn(arg);
+out:
+       set_cpu0_paused(false);
+       local_bh_enable();
+       smp_call_function(wait_cpu, NULL, true);
+       cpu_maps_update_done();
+
+       return ret;
+}
+
+void PIE_FUNC(ddr_adjust_config)(void *arg)
+{
+    uint32 value[CH_MAX];
+    uint32 ch;
+    pDDR_REG_T    pDDR_Reg;
+    pDDRPHY_REG_T pPHY_Reg;
+
+    for(ch=0;ch<CH_MAX;ch++)
+    {
+        if(DATA(ddr_ch[ch]).mem_type != DRAM_MAX)
+        {
+            value[ch] = ((uint32 *)arg)[ch];
+            pDDR_Reg = DATA(ddr_ch[ch]).pDDR_Reg;
+            pPHY_Reg = DATA(ddr_ch[ch]).pPHY_Reg;
+            
+            //enter config state
+            ddr_move_to_Config_state(ch);
+
+            //set data training address
+            pPHY_Reg->DTAR = value[ch];
+
+            //set auto power down idle
+            pDDR_Reg->MCFG=(pDDR_Reg->MCFG&0xffff00ff)|(PD_IDLE<<8);
+
+            //CKDV=00
+            pPHY_Reg->PGCR &= ~(0x3<<12);
+
+            //enable the hardware low-power interface
+            pDDR_Reg->SCFG.b.hw_low_power_en = 1;
+
+            if(pDDR_Reg->PPCFG & 1)
+            {
+                pPHY_Reg->DATX8[2].DXGCR &= ~(1);          //disable byte
+                pPHY_Reg->DATX8[3].DXGCR &= ~(1);
+                pPHY_Reg->DATX8[2].DXDLLCR |= 0x80000000;  //disable DLL
+                pPHY_Reg->DATX8[3].DXDLLCR |= 0x80000000;
+            }
+
+            ddr_update_odt(ch);
+
+            //enter access state
+            ddr_move_to_Access_state(ch);
+        }
+    }
+}
+EXPORT_PIE_SYMBOL(FUNC(ddr_adjust_config));
+
+static uint32 _ddr_adjust_config(void *dtar)
+{
+    uint32 i;
+    unsigned long flags;
+    volatile uint32 n;
+    volatile unsigned int * temp=(volatile unsigned int *)SRAM_CODE_OFFSET;
+    
+     /** 1. Make sure there is no host access */
+    local_irq_save(flags);
+    local_fiq_disable();
+    flush_tlb_all();
+    isb();
+
+    for(i=0;i<SRAM_SIZE/4096;i++)
+    {
+        n=temp[1024*i];
+        barrier();
+    }
+    for(i=0;i<CH_MAX;i++)
+    {
+        if(p_ddr_ch[i]->mem_type != DRAM_MAX)
+        {
+            n= p_ddr_ch[i]->pDDR_Reg->SCFG.d32;
+            n= p_ddr_ch[i]->pPHY_Reg->RIDR;
+            n= p_ddr_ch[i]->pMSCH_Reg->ddrconf;
+        }
+    }
+    n= pCRU_Reg->CRU_PLL_CON[0][0];
+    n= pPMU_Reg->PMU_WAKEUP_CFG[0];
+    n= READ_GRF_REG();
+    dsb();
+
+    call_with_stack(fn_to_pie(rockchip_pie_chunk, &FUNC(ddr_adjust_config)),
+                    (void *)dtar,
+                    rockchip_sram_stack-(NR_CPUS-1)*PAUSE_CPU_STACK_SIZE);
+    local_fiq_enable();
+    local_irq_restore(flags);
+    return 0;
+}
+
+static void ddr_adjust_config(void)
+{
+    uint32 dtar[CH_MAX];
+    uint32 i;
+
+    //get data training address before idle port
+    ddr_get_datatraing_addr(dtar);
+
+    call_with_single_cpu(&_ddr_adjust_config, (void*)dtar);
+    //_ddr_adjust_config(dtar);
+    //disable unused channel
+    for(i=0;i<CH_MAX;i++)
+    {
+        if(p_ddr_ch[i]->mem_type != DRAM_MAX)
+        {
+            //FIXME
+        }
+    }
+}
+
+static int __ddr_change_freq(uint32_t nMHz, struct ddr_freq_t ddr_freq_t)
+{
+    freq_t freq;
+    int ret = 0;
+
+    freq.nMHz = nMHz;
+    freq.p_ddr_freq_t = &ddr_freq_t;
+    ret = call_with_single_cpu(&ddr_change_freq_sram, 
+                               (void*)&freq);
+
+    return ret;
+}
+
 static int _ddr_change_freq(uint32 nMHz)
 {
        struct ddr_freq_t ddr_freq_t;
+        #if defined (DDR_CHANGE_FREQ_IN_LCDC_VSYNC)
+       unsigned long remain_t, vblank_t, pass_t;
+       static unsigned long reserve_t = 800;//us
+       unsigned long long tmp;
        int test_count=0;
+        #endif
+        int ret;
+
+       memset(&ddr_freq_t, 0x00, sizeof(ddr_freq_t));
 
-       ddr_freq_t.screen_ft_us = 0;
-       ddr_freq_t.t0 = 0;
-       ddr_freq_t.t1 = 0;
 #if defined (DDR_CHANGE_FREQ_IN_LCDC_VSYNC)
        do
        {
-               if(rk_fb_poll_wait_frame_complete() == true)
-               {
-                       ddr_freq_t.t0 = cpu_clock(0);
-                       ddr_freq_t.screen_ft_us = rk_fb_get_prmry_screen_ft();
-
-                       test_count++;
-                        if(test_count > 10) //test 10 times
-                        {
-                               ddr_freq_t.screen_ft_us = 0xfefefefe;
-                        }
-                       usleep_range(ddr_freq_t.screen_ft_us-test_count*1000,ddr_freq_t.screen_ft_us-test_count*1000);
-
-                       flush_cache_all();
-                       outer_flush_all();
-                       flush_tlb_all();
+               ddr_freq_t.screen_ft_us = rk_fb_get_prmry_screen_ft();
+               ddr_freq_t.t0 = rk_fb_get_prmry_screen_framedone_t();
+               if (!ddr_freq_t.screen_ft_us)
+                       return __ddr_change_freq(nMHz, ddr_freq_t);
+
+               tmp = cpu_clock(0) - ddr_freq_t.t0;
+               do_div(tmp, 1000);
+               pass_t = tmp;
+               //lost frame interrupt
+               while (pass_t > ddr_freq_t.screen_ft_us){
+                       int n = pass_t/ddr_freq_t.screen_ft_us;
+
+                       //printk("lost frame int, pass_t:%lu\n", pass_t);
+                       pass_t -= n*ddr_freq_t.screen_ft_us;
+                       ddr_freq_t.t0 += n*ddr_freq_t.screen_ft_us*1000;
                }
-       }while(ddr_change_freq_sram(nMHz, ddr_freq_t)==0);
+
+               remain_t = ddr_freq_t.screen_ft_us - pass_t;
+               if (remain_t < reserve_t) {
+                       //printk("remain_t(%lu) < reserve_t(%lu)\n", remain_t, reserve_t);
+                       vblank_t = rk_fb_get_prmry_screen_vbt();
+                       usleep_range(remain_t+vblank_t, remain_t+vblank_t);
+                       continue;
+               }
+
+               //test 10 times
+               test_count++;
+                if(test_count > 10)
+                {
+                       ddr_freq_t.screen_ft_us = 0xfefefefe;
+                }
+               //printk("ft:%lu, pass_t:%lu, remaint_t:%lu, reservet_t:%lu\n",
+               //      ddr_freq_t.screen_ft_us, (unsigned long)pass_t, remain_t, reserve_t);
+               usleep_range(remain_t-reserve_t, remain_t-reserve_t);
+               flush_tlb_all();
+
+               ret = __ddr_change_freq(nMHz, ddr_freq_t);
+               if (ret) {
+                       reserve_t = 800;
+                       return ret;
+               } else {
+                       if (reserve_t < 3000)
+                               reserve_t += 200;
+               }
+       }while(1);
 #else
-       return ddr_change_freq_sram(nMHz, ddr_freq_t);
+       ret = __ddr_change_freq(nMHz, ddr_freq_t);
 #endif
+
+       return ret;
 }
 
 static long _ddr_round_rate(uint32 nMHz)
@@ -3945,10 +4136,10 @@ static void _ddr_set_auto_self_refresh(bool en)
 
 #define PERI_PCLK_DIV_MASK 0x3
 #define PERI_PCLK_DIV_OFF 12
+#if 0
 static __sramdata u32 cru_sel32_sram;
 static void __sramfunc ddr_suspend(void)
 {
-#if 0
     u32 i;
     volatile u32 n;
     volatile unsigned int * temp=(volatile unsigned int *)SRAM_CODE_OFFSET;
@@ -3991,12 +4182,10 @@ static void __sramfunc ddr_suspend(void)
                                   |CRU_W_MSK_SETBITS(0, PERI_PCLK_DIV_OFF, PERI_PCLK_DIV_MASK);
     }
     pPHY_Reg->DSGCR = pPHY_Reg->DSGCR&(~((0x1<<28)|(0x1<<29)));  //CKOE
-#endif
 }
 
 static void __sramfunc ddr_resume(void)
 {
-#if 0
     int delay=1000;
     int pll_id;
 
@@ -4021,8 +4210,8 @@ static void __sramfunc ddr_resume(void)
     dsb();
 
     ddr_selfrefresh_exit();
-#endif
 }
+#endif
 
 //pArg:Ö¸ÕëÄÚÈݱíʾpll pd or not¡£
 void ddr_reg_save(uint32 *pArg)
@@ -4221,6 +4410,123 @@ char * ddr_get_resume_data_info(u32 *size)
 }
 EXPORT_SYMBOL(ddr_get_resume_data_info);
 
+/**********************ddr bandwidth calc*********************/
+enum ddr_bandwidth_id {
+       ddrbw_wr_num = 0,
+       ddrbw_rd_num,
+       ddrbw_act_num,
+       ddrbw_time_num,
+       ddrbw_eff,
+       ddrbw_id_end
+};
+
+#define grf_readl(offset)      readl_relaxed(RK_GRF_VIRT + offset)
+#define grf_writel(v, offset) \
+       do { writel_relaxed(v, RK_GRF_VIRT + offset); dsb(); } while (0)
+
+#define noc_readl(offset)       readl_relaxed(RK3288_SERVICE_BUS_VIRT + offset)
+#define noc_writel(v, offset) \
+       do { writel_relaxed(v, RK3288_SERVICE_BUS_VIRT + offset); \
+               dsb(); } while (0)
+
+static void ddr_monitor_start(void)
+{
+       int i;
+
+       for (i = 1; i < 8; i++) {
+               noc_writel(0x8, (0x400*i+0x8));
+               noc_writel(0x1, (0x400*i+0xc));
+               noc_writel(0x6, (0x400*i+0x138));
+               noc_writel(0x10, (0x400*i+0x14c));
+               noc_writel(0x8, (0x400*i+0x160));
+               noc_writel(0x10, (0x400*i+0x174));
+       }
+
+       grf_writel((((readl_relaxed(RK_PMU_VIRT+0x9c)>>13)&7) == 3) ?
+                       0xc000c000 : 0xe000e000, RK3288_GRF_SOC_CON4);
+
+       for (i = 1; i < 8; i++)
+               noc_writel(0x1, (0x400*i+0x28));
+}
+
+static void ddr_monitor_stop(void)
+{
+       grf_writel(0xc0000000, RK3288_GRF_SOC_CON4);
+}
+
+static void _ddr_bandwidth_get(struct ddr_bw_info *ddr_bw_ch0,
+                       struct ddr_bw_info *ddr_bw_ch1)
+{
+       u32 ddr_bw_val[2][ddrbw_id_end], ddr_freq;
+       u64 temp64;
+       int i, j;
+
+       ddr_monitor_stop();
+       for (j = 0; j < 2; j++) {
+               for (i = 0; i < ddrbw_eff; i++)
+                       ddr_bw_val[j][i] =
+                               grf_readl(RK3288_GRF_SOC_STATUS11+i*4+j*16);
+       }
+       if (!ddr_bw_val[0][ddrbw_time_num])
+               goto end;
+
+       if (ddr_bw_ch0) {
+               ddr_freq = readl_relaxed(RK_DDR_VIRT + 0xc0);
+
+               temp64 = ((u64)ddr_bw_val[0][0]+ddr_bw_val[0][1])*4*100;
+               do_div(temp64, ddr_bw_val[0][ddrbw_time_num]);
+               ddr_bw_val[0][ddrbw_eff] = temp64;
+
+               ddr_bw_ch0->ddr_percent = temp64;
+               ddr_bw_ch0->ddr_time =
+                       ddr_bw_val[0][ddrbw_time_num]/(ddr_freq*1000);
+               ddr_bw_ch0->ddr_wr =
+                       (ddr_bw_val[0][ddrbw_wr_num]*8*4)*
+                               ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->ddr_rd =
+                       (ddr_bw_val[0][ddrbw_rd_num]*8*4)*
+                               ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->ddr_act =
+                       ddr_bw_val[0][ddrbw_act_num];
+               ddr_bw_ch0->ddr_total =
+                       ddr_freq*2*4;
+
+               ddr_bw_ch0->cpum = (noc_readl(0x400+0x178)<<16)
+                       + (noc_readl(0x400+0x164));
+               ddr_bw_ch0->gpu = (noc_readl(0x800+0x178)<<16)
+                       + (noc_readl(0x800+0x164));
+               ddr_bw_ch0->peri = (noc_readl(0xc00+0x178)<<16)
+                       + (noc_readl(0xc00+0x164));
+               ddr_bw_ch0->video = (noc_readl(0x1000+0x178)<<16)
+                       + (noc_readl(0x1000+0x164));
+               ddr_bw_ch0->vio0 = (noc_readl(0x1400+0x178)<<16)
+                       + (noc_readl(0x1400+0x164));
+               ddr_bw_ch0->vio1 = (noc_readl(0x1800+0x178)<<16)
+                       + (noc_readl(0x1800+0x164));
+               ddr_bw_ch0->vio2 = (noc_readl(0x1c00+0x178)<<16)
+                       + (noc_readl(0x1c00+0x164));
+
+               ddr_bw_ch0->cpum =
+                       ddr_bw_ch0->cpum*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->gpu =
+                       ddr_bw_ch0->gpu*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->peri =
+                       ddr_bw_ch0->peri*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->video =
+                       ddr_bw_ch0->video*
+                               ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->vio0 =
+                       ddr_bw_ch0->vio0*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->vio1 =
+                       ddr_bw_ch0->vio1*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+               ddr_bw_ch0->vio2 =
+                       ddr_bw_ch0->vio2*ddr_freq/ddr_bw_val[0][ddrbw_time_num];
+       }
+end:
+       ddr_monitor_start();
+}
+
+/******************************************************************/
 
 static int ddr_init(uint32 dram_speed_bin, uint32 freq)
 {
@@ -4230,11 +4536,11 @@ static int ddr_init(uint32 dram_speed_bin, uint32 freq)
     struct clk *clk;
     uint32 ch,cap=0,cs_cap;
 
-    ddr_print("version 1.00 20140404 \n");
+    ddr_print("version 1.00 20150126 \n");
 
     p_ddr_reg = kern_to_pie(rockchip_pie_chunk, &DATA(ddr_reg));
     p_ddr_set_pll = fn_to_pie(rockchip_pie_chunk, &FUNC(ddr_set_pll));
-    //p_cpu_pause = kern_to_pie(rockchip_pie_chunk, &DATA(cpu_pause[0]));
+    DATA(p_cpu_pause) = kern_to_pie(rockchip_pie_chunk, &DATA(cpu_pause[0]));
 
     tmp = clk_get_rate(clk_get(NULL, "clk_ddr"))/1000000;
     *kern_to_pie(rockchip_pie_chunk, &DATA(ddr_freq)) = tmp;