25684d32c233b99f818b74d3c2ce79618bdc5161
[firefly-linux-kernel-4.4.55.git] / drivers / media / platform / rk3288-vpu / rk3288_vpu_hw_vp8e.c
1 /*
2  * Rockchip RK3288 VPU codec driver
3  *
4  * Copyright (C) 2014 Rockchip Electronics Co., Ltd.
5  *      Alpha Lin <Alpha.Lin@rock-chips.com>
6  *      Jeffy Chen <jeffy.chen@rock-chips.com>
7  *
8  * Copyright (C) 2014 Google, Inc.
9  *      Tomasz Figa <tfiga@chromium.org>
10  *
11  * This software is licensed under the terms of the GNU General Public
12  * License version 2, as published by the Free Software Foundation, and
13  * may be copied, distributed, and modified under those terms.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  */
20
21 #include "rk3288_vpu_common.h"
22
23 #include <linux/types.h>
24 #include <linux/sort.h>
25
26 #include "rk3288_vpu_regs.h"
27 #include "rk3288_vpu_hw.h"
28
29 /* Various parameters specific to VP8 encoder. */
30 #define VP8_CABAC_CTX_OFFSET                    192
31 #define VP8_CABAC_CTX_SIZE                      ((55 + 96) << 3)
32
33 #define VP8_KEY_FRAME_HDR_SIZE                  10
34 #define VP8_INTER_FRAME_HDR_SIZE                3
35
36 #define VP8_FRAME_TAG_KEY_FRAME_BIT             BIT(0)
37 #define VP8_FRAME_TAG_LENGTH_SHIFT              5
38 #define VP8_FRAME_TAG_LENGTH_MASK               (0x7ffff << 5)
39
40 /**
41  * struct rk3288_vpu_vp8e_ctrl_buf - hardware control buffer layout
42  * @ext_hdr_size:       Ext header size in bytes (written by hardware).
43  * @dct_size:           DCT partition size (written by hardware).
44  * @rsvd:               Reserved for hardware.
45  */
46 struct rk3288_vpu_vp8e_ctrl_buf {
47         u32 ext_hdr_size;
48         u32 dct_size;
49         u8 rsvd[1016];
50 };
51
52 /*
53  * The hardware takes care only of ext hdr and dct partition. The software
54  * must take care of frame header.
55  *
56  * Buffer layout as received from hardware:
57  *   |<--gap-->|<--ext hdr-->|<-gap->|<---dct part---
58  *   |<-------dct part offset------->|
59  *
60  * Required buffer layout:
61  *   |<--hdr-->|<--ext hdr-->|<---dct part---
62  */
63 void rk3288_vpu_vp8e_assemble_bitstream(struct rk3288_vpu_ctx *ctx,
64                                         struct rk3288_vpu_buf *dst_buf)
65 {
66         size_t ext_hdr_size = dst_buf->vp8e.ext_hdr_size;
67         size_t dct_size = dst_buf->vp8e.dct_size;
68         size_t hdr_size = dst_buf->vp8e.hdr_size;
69         size_t dst_size;
70         size_t tag_size;
71         void *dst;
72         u32 *tag;
73
74         dst_size = vb2_plane_size(&dst_buf->b, 0);
75         dst = vb2_plane_vaddr(&dst_buf->b, 0);
76         tag = dst; /* To access frame tag words. */
77
78         if (WARN_ON(hdr_size + ext_hdr_size + dct_size > dst_size))
79                 return;
80         if (WARN_ON(dst_buf->vp8e.dct_offset + dct_size > dst_size))
81                 return;
82
83         vpu_debug(1, "%s: hdr_size = %u, ext_hdr_size = %u, dct_size = %u\n",
84                         __func__, hdr_size, ext_hdr_size, dct_size);
85
86         memmove(dst + hdr_size + ext_hdr_size,
87                 dst + dst_buf->vp8e.dct_offset, dct_size);
88         memcpy(dst, dst_buf->vp8e.header, hdr_size);
89
90         /* Patch frame tag at first 32-bit word of the frame. */
91         if (dst_buf->b.v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) {
92                 tag_size = VP8_KEY_FRAME_HDR_SIZE;
93                 tag[0] &= ~VP8_FRAME_TAG_KEY_FRAME_BIT;
94         } else {
95                 tag_size = VP8_INTER_FRAME_HDR_SIZE;
96                 tag[0] |= VP8_FRAME_TAG_KEY_FRAME_BIT;
97         }
98
99         tag[0] &= ~VP8_FRAME_TAG_LENGTH_MASK;
100         tag[0] |= (hdr_size + ext_hdr_size - tag_size)
101                                                 << VP8_FRAME_TAG_LENGTH_SHIFT;
102
103         vb2_set_plane_payload(&dst_buf->b, 0,
104                                 hdr_size + ext_hdr_size + dct_size);
105 }
106
107 static inline unsigned int ref_luma_size(unsigned int w, unsigned int h)
108 {
109         return round_up(w, MB_DIM) * round_up(h, MB_DIM);
110 }
111
112 int rk3288_vpu_vp8e_init(struct rk3288_vpu_ctx *ctx)
113 {
114         struct rk3288_vpu_dev *vpu = ctx->dev;
115         size_t height = ctx->src_fmt.height;
116         size_t width = ctx->src_fmt.width;
117         size_t ref_buf_size;
118         size_t mv_size;
119         int ret;
120
121         ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ctrl_buf,
122                                 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
123         if (ret) {
124                 vpu_err("failed to allocate ctrl buffer\n");
125                 return ret;
126         }
127
128         mv_size = DIV_ROUND_UP(width, 16) * DIV_ROUND_UP(height, 16) / 4;
129         ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.mv_buf, mv_size);
130         if (ret) {
131                 vpu_err("failed to allocate MV buffer\n");
132                 goto err_ctrl_buf;
133         }
134
135         ref_buf_size = ref_luma_size(width, height) * 3 / 2;
136         ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ext_buf,
137                                         2 * ref_buf_size);
138         if (ret) {
139                 vpu_err("failed to allocate ext buffer\n");
140                 goto err_mv_buf;
141         }
142
143         return 0;
144
145 err_mv_buf:
146         rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
147 err_ctrl_buf:
148         rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
149
150         return ret;
151 }
152
153 void rk3288_vpu_vp8e_exit(struct rk3288_vpu_ctx *ctx)
154 {
155         struct rk3288_vpu_dev *vpu = ctx->dev;
156
157         rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ext_buf);
158         rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
159         rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
160 }
161
162 static inline u32 enc_in_img_ctrl(struct rk3288_vpu_ctx *ctx)
163 {
164         struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
165         struct v4l2_rect *crop = &ctx->src_crop;
166         unsigned bytes_per_line, overfill_r, overfill_b;
167
168         /*
169          * The hardware needs only the value for luma plane, because
170          * values of other planes are calculated internally based on
171          * format setting.
172          */
173         bytes_per_line = pix_fmt->plane_fmt[0].bytesperline;
174         overfill_r = (pix_fmt->width - crop->width) / 4;
175         overfill_b = pix_fmt->height - crop->height;
176
177         return VEPU_REG_IN_IMG_CTRL_ROW_LEN(bytes_per_line)
178                         | VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
179                         | VEPU_REG_IN_IMG_CTRL_OVRFLB_D4(overfill_b)
180                         | VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
181 }
182
183 static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu,
184                                         struct rk3288_vpu_ctx *ctx)
185 {
186         const struct rk3288_vp8e_reg_params *params = ctx->run.vp8e.reg_params;
187         dma_addr_t ref_buf_dma, rec_buf_dma;
188         dma_addr_t stream_dma;
189         size_t rounded_size;
190         dma_addr_t dst_dma;
191         u32 start_offset;
192         size_t dst_size;
193
194         rounded_size = ref_luma_size(ctx->src_fmt.width,
195                                                 ctx->src_fmt.height);
196
197         ref_buf_dma = rec_buf_dma = ctx->hw.vp8e.ext_buf.dma;
198         if (ctx->hw.vp8e.ref_rec_ptr)
199                 ref_buf_dma += rounded_size * 3 / 2;
200         else
201                 rec_buf_dma += rounded_size * 3 / 2;
202         ctx->hw.vp8e.ref_rec_ptr ^= 1;
203
204         dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0);
205         dst_size = vb2_plane_size(&ctx->run.dst->b, 0);
206
207         /*
208          * stream addr-->|
209          * align 64bits->|<-start offset->|
210          * |<---------header size-------->|<---dst buf---
211          */
212         start_offset = (params->rlc_ctrl & VEPU_REG_RLC_CTRL_STR_OFFS_MASK)
213                                         >> VEPU_REG_RLC_CTRL_STR_OFFS_SHIFT;
214         stream_dma = dst_dma + params->hdr_len;
215
216         /**
217          * Userspace will pass 8 bytes aligned size(round_down) to us,
218          * so we need to plus start offset to get real header size.
219          *
220          * |<-aligned size->|<-start offset->|
221          * |<----------header size---------->|
222          */
223         ctx->run.dst->vp8e.hdr_size = params->hdr_len + (start_offset >> 3);
224
225         if (params->enc_ctrl & VEPU_REG_ENC_CTRL_KEYFRAME_BIT)
226                 ctx->run.dst->b.v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
227         else
228                 ctx->run.dst->b.v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
229
230         /*
231          * We assume here that 1/10 of the buffer is enough for headers.
232          * DCT partition will be placed in remaining 9/10 of the buffer.
233          */
234         ctx->run.dst->vp8e.dct_offset = round_up(dst_size / 10, 8);
235
236         /* Destination buffer. */
237         vepu_write_relaxed(vpu, stream_dma, VEPU_REG_ADDR_OUTPUT_STREAM);
238         vepu_write_relaxed(vpu, dst_dma + ctx->run.dst->vp8e.dct_offset,
239                                 VEPU_REG_ADDR_VP8_DCT_PART(0));
240         vepu_write_relaxed(vpu, dst_size - ctx->run.dst->vp8e.dct_offset,
241                                 VEPU_REG_STR_BUF_LIMIT);
242
243         /* Auxilliary buffers. */
244         vepu_write_relaxed(vpu, ctx->hw.vp8e.ctrl_buf.dma,
245                                 VEPU_REG_ADDR_OUTPUT_CTRL);
246         vepu_write_relaxed(vpu, ctx->hw.vp8e.mv_buf.dma,
247                                 VEPU_REG_ADDR_MV_OUT);
248         vepu_write_relaxed(vpu, ctx->run.priv_dst.dma,
249                                 VEPU_REG_ADDR_VP8_PROB_CNT);
250         vepu_write_relaxed(vpu, ctx->run.priv_src.dma + VP8_CABAC_CTX_OFFSET,
251                                 VEPU_REG_ADDR_CABAC_TBL);
252         vepu_write_relaxed(vpu, ctx->run.priv_src.dma
253                                 + VP8_CABAC_CTX_OFFSET + VP8_CABAC_CTX_SIZE,
254                                 VEPU_REG_ADDR_VP8_SEG_MAP);
255
256         /* Reference buffers. */
257         vepu_write_relaxed(vpu, ref_buf_dma,
258                                 VEPU_REG_ADDR_REF_LUMA);
259         vepu_write_relaxed(vpu, ref_buf_dma + rounded_size,
260                                 VEPU_REG_ADDR_REF_CHROMA);
261
262         /* Reconstruction buffers. */
263         vepu_write_relaxed(vpu, rec_buf_dma,
264                                 VEPU_REG_ADDR_REC_LUMA);
265         vepu_write_relaxed(vpu, rec_buf_dma + rounded_size,
266                                 VEPU_REG_ADDR_REC_CHROMA);
267
268         /* Source buffer. */
269         vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
270                                 PLANE_Y), VEPU_REG_ADDR_IN_LUMA);
271         vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
272                                 PLANE_CB), VEPU_REG_ADDR_IN_CB);
273         vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
274                                 PLANE_CR), VEPU_REG_ADDR_IN_CR);
275
276         /* Source parameters. */
277         vepu_write_relaxed(vpu, enc_in_img_ctrl(ctx), VEPU_REG_IN_IMG_CTRL);
278 }
279
280 static void rk3288_vpu_vp8e_set_params(struct rk3288_vpu_dev *vpu,
281                                        struct rk3288_vpu_ctx *ctx)
282 {
283         const struct rk3288_vp8e_reg_params *params = ctx->run.vp8e.reg_params;
284         int i;
285
286         vepu_write_relaxed(vpu, params->enc_ctrl0, VEPU_REG_ENC_CTRL0);
287         vepu_write_relaxed(vpu, params->enc_ctrl1, VEPU_REG_ENC_CTRL1);
288         vepu_write_relaxed(vpu, params->enc_ctrl2, VEPU_REG_ENC_CTRL2);
289         vepu_write_relaxed(vpu, params->enc_ctrl3, VEPU_REG_ENC_CTRL3);
290         vepu_write_relaxed(vpu, params->enc_ctrl5, VEPU_REG_ENC_CTRL5);
291         vepu_write_relaxed(vpu, params->enc_ctrl4, VEPU_REG_ENC_CTRL4);
292         vepu_write_relaxed(vpu, params->str_hdr_rem_msb,
293                                 VEPU_REG_STR_HDR_REM_MSB);
294         vepu_write_relaxed(vpu, params->str_hdr_rem_lsb,
295                                 VEPU_REG_STR_HDR_REM_LSB);
296         vepu_write_relaxed(vpu, params->mad_ctrl, VEPU_REG_MAD_CTRL);
297
298         for (i = 0; i < ARRAY_SIZE(params->qp_val); ++i)
299                 vepu_write_relaxed(vpu, params->qp_val[i],
300                                         VEPU_REG_VP8_QP_VAL(i));
301
302         vepu_write_relaxed(vpu, params->bool_enc, VEPU_REG_VP8_BOOL_ENC);
303         vepu_write_relaxed(vpu, params->vp8_ctrl0, VEPU_REG_VP8_CTRL0);
304         vepu_write_relaxed(vpu, params->rlc_ctrl, VEPU_REG_RLC_CTRL);
305         vepu_write_relaxed(vpu, params->mb_ctrl, VEPU_REG_MB_CTRL);
306
307         for (i = 0; i < ARRAY_SIZE(params->rgb_yuv_coeff); ++i)
308                 vepu_write_relaxed(vpu, params->rgb_yuv_coeff[i],
309                                         VEPU_REG_RGB_YUV_COEFF(i));
310
311         vepu_write_relaxed(vpu, params->rgb_mask_msb,
312                                 VEPU_REG_RGB_MASK_MSB);
313         vepu_write_relaxed(vpu, params->intra_area_ctrl,
314                                 VEPU_REG_INTRA_AREA_CTRL);
315         vepu_write_relaxed(vpu, params->cir_intra_ctrl,
316                                 VEPU_REG_CIR_INTRA_CTRL);
317         vepu_write_relaxed(vpu, params->first_roi_area,
318                                 VEPU_REG_FIRST_ROI_AREA);
319         vepu_write_relaxed(vpu, params->second_roi_area,
320                                 VEPU_REG_SECOND_ROI_AREA);
321         vepu_write_relaxed(vpu, params->mvc_ctrl,
322                                 VEPU_REG_MVC_CTRL);
323
324         for (i = 0; i < ARRAY_SIZE(params->intra_penalty); ++i)
325                 vepu_write_relaxed(vpu, params->intra_penalty[i],
326                                         VEPU_REG_VP8_INTRA_PENALTY(i));
327
328         for (i = 0; i < ARRAY_SIZE(params->seg_qp); ++i)
329                 vepu_write_relaxed(vpu, params->seg_qp[i],
330                                         VEPU_REG_VP8_SEG_QP(i));
331
332         for (i = 0; i < ARRAY_SIZE(params->dmv_4p_1p_penalty); ++i)
333                 vepu_write_relaxed(vpu, params->dmv_4p_1p_penalty[i],
334                                         VEPU_REG_DMV_4P_1P_PENALTY(i));
335
336         for (i = 0; i < ARRAY_SIZE(params->dmv_qpel_penalty); ++i)
337                 vepu_write_relaxed(vpu, params->dmv_qpel_penalty[i],
338                                         VEPU_REG_DMV_QPEL_PENALTY(i));
339
340         vepu_write_relaxed(vpu, params->vp8_ctrl1, VEPU_REG_VP8_CTRL1);
341         vepu_write_relaxed(vpu, params->bit_cost_golden,
342                                 VEPU_REG_VP8_BIT_COST_GOLDEN);
343
344         for (i = 0; i < ARRAY_SIZE(params->loop_flt_delta); ++i)
345                 vepu_write_relaxed(vpu, params->loop_flt_delta[i],
346                                         VEPU_REG_VP8_LOOP_FLT_DELTA(i));
347 }
348
349 void rk3288_vpu_vp8e_run(struct rk3288_vpu_ctx *ctx)
350 {
351         struct rk3288_vpu_dev *vpu = ctx->dev;
352         u32 reg;
353
354         /* The hardware expects the control buffer to be zeroed. */
355         memset(ctx->hw.vp8e.ctrl_buf.cpu, 0,
356                 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
357
358         /*
359          * Program the hardware.
360          */
361         rk3288_vpu_power_on(vpu);
362
363         vepu_write_relaxed(vpu, VEPU_REG_ENC_CTRL_ENC_MODE_VP8,
364                                 VEPU_REG_ENC_CTRL);
365
366         rk3288_vpu_vp8e_set_params(vpu, ctx);
367         rk3288_vpu_vp8e_set_buffers(vpu, ctx);
368
369         /* Make sure that all registers are written at this point. */
370         wmb();
371
372         /* Set the watchdog. */
373         schedule_delayed_work(&vpu->watchdog_work, msecs_to_jiffies(2000));
374
375         /* Start the hardware. */
376         reg = VEPU_REG_AXI_CTRL_OUTPUT_SWAP16
377                 | VEPU_REG_AXI_CTRL_INPUT_SWAP16
378                 | VEPU_REG_AXI_CTRL_BURST_LEN(16)
379                 | VEPU_REG_AXI_CTRL_GATE_BIT
380                 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP32
381                 | VEPU_REG_AXI_CTRL_INPUT_SWAP32
382                 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP8
383                 | VEPU_REG_AXI_CTRL_INPUT_SWAP8;
384         vepu_write(vpu, reg, VEPU_REG_AXI_CTRL);
385
386         vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
387
388         reg = VEPU_REG_ENC_CTRL_NAL_MODE_BIT
389                 | VEPU_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width))
390                 | VEPU_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
391                 | VEPU_REG_ENC_CTRL_ENC_MODE_VP8
392                 | VEPU_REG_ENC_CTRL_EN_BIT;
393
394         if (ctx->run.dst->b.v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME)
395                 reg |= VEPU_REG_ENC_CTRL_KEYFRAME_BIT;
396
397         vepu_write(vpu, reg, VEPU_REG_ENC_CTRL);
398 }
399
400 void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx,
401                           enum vb2_buffer_state result)
402 {
403         struct rk3288_vpu_vp8e_ctrl_buf *ctrl_buf = ctx->hw.vp8e.ctrl_buf.cpu;
404
405         /* Read length information of this run from utility buffer. */
406         ctx->run.dst->vp8e.ext_hdr_size = ctrl_buf->ext_hdr_size;
407         ctx->run.dst->vp8e.dct_size = ctrl_buf->dct_size;
408
409         rk3288_vpu_run_done(ctx, result);
410 }