2 * Rockchip RK3288 VPU codec driver
4 * Copyright (C) 2014 Rockchip Electronics Co., Ltd.
5 * Alpha Lin <Alpha.Lin@rock-chips.com>
6 * Jeffy Chen <jeffy.chen@rock-chips.com>
8 * Copyright (C) 2014 Google, Inc.
9 * Tomasz Figa <tfiga@chromium.org>
11 * This software is licensed under the terms of the GNU General Public
12 * License version 2, as published by the Free Software Foundation, and
13 * may be copied, distributed, and modified under those terms.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
21 #include "rockchip_vpu_common.h"
23 #include <linux/types.h>
24 #include <linux/sort.h>
26 #include "rk3288_vpu_regs.h"
27 #include "rockchip_vpu_hw.h"
29 /* Various parameters specific to VP8 encoder. */
30 #define VP8_CABAC_CTX_OFFSET 192
31 #define VP8_CABAC_CTX_SIZE ((55 + 96) << 3)
34 * struct rk3288_vpu_vp8e_ctrl_buf - hardware control buffer layout
35 * @ext_hdr_size: Ext header size in bytes (written by hardware).
36 * @dct_size: DCT partition size (written by hardware).
37 * @rsvd: Reserved for hardware.
39 struct rk3288_vpu_vp8e_ctrl_buf {
45 static inline unsigned int ref_luma_size(unsigned int w, unsigned int h)
47 return round_up(w, MB_DIM) * round_up(h, MB_DIM);
50 int rk3288_vpu_vp8e_init(struct rockchip_vpu_ctx *ctx)
52 struct rockchip_vpu_dev *vpu = ctx->dev;
53 size_t height = ctx->src_fmt.height;
54 size_t width = ctx->src_fmt.width;
59 ret = rockchip_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ctrl_buf,
60 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
62 vpu_err("failed to allocate ctrl buffer\n");
66 mv_size = DIV_ROUND_UP(width, 16) * DIV_ROUND_UP(height, 16) / 4;
67 ret = rockchip_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.mv_buf, mv_size);
69 vpu_err("failed to allocate MV buffer\n");
73 ref_buf_size = ref_luma_size(width, height) * 3 / 2;
74 ret = rockchip_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ext_buf,
77 vpu_err("failed to allocate ext buffer\n");
84 rockchip_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
86 rockchip_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
91 void rk3288_vpu_vp8e_exit(struct rockchip_vpu_ctx *ctx)
93 struct rockchip_vpu_dev *vpu = ctx->dev;
95 rockchip_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ext_buf);
96 rockchip_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
97 rockchip_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
100 static inline u32 enc_in_img_ctrl(struct rockchip_vpu_ctx *ctx)
102 struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
103 struct v4l2_rect *crop = &ctx->src_crop;
104 unsigned bytes_per_line, overfill_r, overfill_b;
107 * The hardware needs only the value for luma plane, because
108 * values of other planes are calculated internally based on
111 bytes_per_line = pix_fmt->plane_fmt[0].bytesperline;
112 overfill_r = (pix_fmt->width - crop->width) / 4;
113 overfill_b = pix_fmt->height - crop->height;
115 return VEPU_REG_IN_IMG_CTRL_ROW_LEN(bytes_per_line)
116 | VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
117 | VEPU_REG_IN_IMG_CTRL_OVRFLB_D4(overfill_b)
118 | VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
121 static void rk3288_vpu_vp8e_set_buffers(struct rockchip_vpu_dev *vpu,
122 struct rockchip_vpu_ctx *ctx)
124 struct vb2_v4l2_buffer *vb2_dst = to_vb2_v4l2_buffer(&ctx->run.dst->vb.vb2_buf);
125 const struct rk3288_vp8e_reg_params *params =
126 (struct rk3288_vp8e_reg_params *)ctx->run.vp8e.reg_params;
127 dma_addr_t ref_buf_dma, rec_buf_dma;
128 dma_addr_t stream_dma;
134 rounded_size = ref_luma_size(ctx->src_fmt.width,
135 ctx->src_fmt.height);
137 ref_buf_dma = rec_buf_dma = ctx->hw.vp8e.ext_buf.dma;
138 if (ctx->hw.vp8e.ref_rec_ptr)
139 ref_buf_dma += rounded_size * 3 / 2;
141 rec_buf_dma += rounded_size * 3 / 2;
142 ctx->hw.vp8e.ref_rec_ptr ^= 1;
144 if (rockchip_vpu_ctx_is_dummy_encode(ctx)) {
145 dst_dma = vpu->dummy_encode_dst.dma;
146 dst_size = vpu->dummy_encode_dst.size;
148 dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->vb.vb2_buf, 0);
149 dst_size = vb2_plane_size(&ctx->run.dst->vb.vb2_buf, 0);
154 * align 64bits->|<-start offset->|
155 * |<---------header size-------->|<---dst buf---
157 start_offset = (params->rlc_ctrl & VEPU_REG_RLC_CTRL_STR_OFFS_MASK)
158 >> VEPU_REG_RLC_CTRL_STR_OFFS_SHIFT;
159 stream_dma = dst_dma + params->hdr_len;
162 * Userspace will pass 8 bytes aligned size(round_down) to us,
163 * so we need to plus start offset to get real header size.
165 * |<-aligned size->|<-start offset->|
166 * |<----------header size---------->|
168 ctx->run.dst->vp8e.hdr_size = params->hdr_len + (start_offset >> 3);
170 if (params->enc_ctrl & VEPU_REG_ENC_CTRL_KEYFRAME_BIT)
171 vb2_dst->flags |= V4L2_BUF_FLAG_KEYFRAME;
173 vb2_dst->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
176 * We assume here that 1/10 of the buffer is enough for headers.
177 * DCT partition will be placed in remaining 9/10 of the buffer.
179 ctx->run.dst->vp8e.dct_offset = round_up(dst_size / 10, 8);
181 /* Destination buffer. */
182 vepu_write_relaxed(vpu, stream_dma, VEPU_REG_ADDR_OUTPUT_STREAM);
183 vepu_write_relaxed(vpu, dst_dma + ctx->run.dst->vp8e.dct_offset,
184 VEPU_REG_ADDR_VP8_DCT_PART(0));
185 vepu_write_relaxed(vpu, dst_size - ctx->run.dst->vp8e.dct_offset,
186 VEPU_REG_STR_BUF_LIMIT);
188 /* Auxilliary buffers. */
189 vepu_write_relaxed(vpu, ctx->hw.vp8e.ctrl_buf.dma,
190 VEPU_REG_ADDR_OUTPUT_CTRL);
191 vepu_write_relaxed(vpu, ctx->hw.vp8e.mv_buf.dma,
192 VEPU_REG_ADDR_MV_OUT);
193 vepu_write_relaxed(vpu, ctx->run.priv_dst.dma,
194 VEPU_REG_ADDR_VP8_PROB_CNT);
195 vepu_write_relaxed(vpu, ctx->run.priv_src.dma + VP8_CABAC_CTX_OFFSET,
196 VEPU_REG_ADDR_CABAC_TBL);
197 vepu_write_relaxed(vpu, ctx->run.priv_src.dma
198 + VP8_CABAC_CTX_OFFSET + VP8_CABAC_CTX_SIZE,
199 VEPU_REG_ADDR_VP8_SEG_MAP);
201 /* Reference buffers. */
202 vepu_write_relaxed(vpu, ref_buf_dma,
203 VEPU_REG_ADDR_REF_LUMA);
204 vepu_write_relaxed(vpu, ref_buf_dma + rounded_size,
205 VEPU_REG_ADDR_REF_CHROMA);
207 /* Reconstruction buffers. */
208 vepu_write_relaxed(vpu, rec_buf_dma,
209 VEPU_REG_ADDR_REC_LUMA);
210 vepu_write_relaxed(vpu, rec_buf_dma + rounded_size,
211 VEPU_REG_ADDR_REC_CHROMA);
214 if (rockchip_vpu_ctx_is_dummy_encode(ctx)) {
215 vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_Y].dma,
216 VEPU_REG_ADDR_IN_LUMA);
217 vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CB].dma,
218 VEPU_REG_ADDR_IN_CB);
219 vepu_write_relaxed(vpu, vpu->dummy_encode_src[PLANE_CR].dma,
220 VEPU_REG_ADDR_IN_CR);
222 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
223 &ctx->run.src->vb.vb2_buf, PLANE_Y),
224 VEPU_REG_ADDR_IN_LUMA);
225 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
226 &ctx->run.src->vb.vb2_buf, PLANE_CB),
227 VEPU_REG_ADDR_IN_CB);
228 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(
229 &ctx->run.src->vb.vb2_buf, PLANE_CR),
230 VEPU_REG_ADDR_IN_CR);
233 /* Source parameters. */
234 vepu_write_relaxed(vpu, enc_in_img_ctrl(ctx), VEPU_REG_IN_IMG_CTRL);
237 static void rk3288_vpu_vp8e_set_params(struct rockchip_vpu_dev *vpu,
238 struct rockchip_vpu_ctx *ctx)
240 const struct rk3288_vp8e_reg_params *params =
241 (struct rk3288_vp8e_reg_params *)ctx->run.vp8e.reg_params;
244 vepu_write_relaxed(vpu, params->enc_ctrl0, VEPU_REG_ENC_CTRL0);
245 vepu_write_relaxed(vpu, params->enc_ctrl1, VEPU_REG_ENC_CTRL1);
246 vepu_write_relaxed(vpu, params->enc_ctrl2, VEPU_REG_ENC_CTRL2);
247 vepu_write_relaxed(vpu, params->enc_ctrl3, VEPU_REG_ENC_CTRL3);
248 vepu_write_relaxed(vpu, params->enc_ctrl5, VEPU_REG_ENC_CTRL5);
249 vepu_write_relaxed(vpu, params->enc_ctrl4, VEPU_REG_ENC_CTRL4);
250 vepu_write_relaxed(vpu, params->str_hdr_rem_msb,
251 VEPU_REG_STR_HDR_REM_MSB);
252 vepu_write_relaxed(vpu, params->str_hdr_rem_lsb,
253 VEPU_REG_STR_HDR_REM_LSB);
254 vepu_write_relaxed(vpu, params->mad_ctrl, VEPU_REG_MAD_CTRL);
256 for (i = 0; i < ARRAY_SIZE(params->qp_val); ++i)
257 vepu_write_relaxed(vpu, params->qp_val[i],
258 VEPU_REG_VP8_QP_VAL(i));
260 vepu_write_relaxed(vpu, params->bool_enc, VEPU_REG_VP8_BOOL_ENC);
261 vepu_write_relaxed(vpu, params->vp8_ctrl0, VEPU_REG_VP8_CTRL0);
262 vepu_write_relaxed(vpu, params->rlc_ctrl, VEPU_REG_RLC_CTRL);
263 vepu_write_relaxed(vpu, params->mb_ctrl, VEPU_REG_MB_CTRL);
265 for (i = 0; i < ARRAY_SIZE(params->rgb_yuv_coeff); ++i)
266 vepu_write_relaxed(vpu, params->rgb_yuv_coeff[i],
267 VEPU_REG_RGB_YUV_COEFF(i));
269 vepu_write_relaxed(vpu, params->rgb_mask_msb,
270 VEPU_REG_RGB_MASK_MSB);
271 vepu_write_relaxed(vpu, params->intra_area_ctrl,
272 VEPU_REG_INTRA_AREA_CTRL);
273 vepu_write_relaxed(vpu, params->cir_intra_ctrl,
274 VEPU_REG_CIR_INTRA_CTRL);
275 vepu_write_relaxed(vpu, params->first_roi_area,
276 VEPU_REG_FIRST_ROI_AREA);
277 vepu_write_relaxed(vpu, params->second_roi_area,
278 VEPU_REG_SECOND_ROI_AREA);
279 vepu_write_relaxed(vpu, params->mvc_ctrl,
282 for (i = 0; i < ARRAY_SIZE(params->intra_penalty); ++i)
283 vepu_write_relaxed(vpu, params->intra_penalty[i],
284 VEPU_REG_VP8_INTRA_PENALTY(i));
286 for (i = 0; i < ARRAY_SIZE(params->seg_qp); ++i)
287 vepu_write_relaxed(vpu, params->seg_qp[i],
288 VEPU_REG_VP8_SEG_QP(i));
290 for (i = 0; i < ARRAY_SIZE(params->dmv_4p_1p_penalty); ++i)
291 vepu_write_relaxed(vpu, params->dmv_4p_1p_penalty[i],
292 VEPU_REG_DMV_4P_1P_PENALTY(i));
294 for (i = 0; i < ARRAY_SIZE(params->dmv_qpel_penalty); ++i)
295 vepu_write_relaxed(vpu, params->dmv_qpel_penalty[i],
296 VEPU_REG_DMV_QPEL_PENALTY(i));
298 vepu_write_relaxed(vpu, params->vp8_ctrl1, VEPU_REG_VP8_CTRL1);
299 vepu_write_relaxed(vpu, params->bit_cost_golden,
300 VEPU_REG_VP8_BIT_COST_GOLDEN);
302 for (i = 0; i < ARRAY_SIZE(params->loop_flt_delta); ++i)
303 vepu_write_relaxed(vpu, params->loop_flt_delta[i],
304 VEPU_REG_VP8_LOOP_FLT_DELTA(i));
307 void rk3288_vpu_vp8e_run(struct rockchip_vpu_ctx *ctx)
309 struct vb2_v4l2_buffer *vb2_dst = to_vb2_v4l2_buffer(&ctx->run.dst->vb.vb2_buf);
310 struct rockchip_vpu_dev *vpu = ctx->dev;
313 /* The hardware expects the control buffer to be zeroed. */
314 memset(ctx->hw.vp8e.ctrl_buf.cpu, 0,
315 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
318 * Program the hardware.
320 rockchip_vpu_power_on(vpu);
322 vepu_write_relaxed(vpu, VEPU_REG_ENC_CTRL_ENC_MODE_VP8,
325 rk3288_vpu_vp8e_set_params(vpu, ctx);
326 rk3288_vpu_vp8e_set_buffers(vpu, ctx);
328 /* Make sure that all registers are written at this point. */
331 /* Set the watchdog. */
332 schedule_delayed_work(&vpu->watchdog_work, msecs_to_jiffies(2000));
334 /* Start the hardware. */
335 reg = VEPU_REG_AXI_CTRL_OUTPUT_SWAP16
336 | VEPU_REG_AXI_CTRL_INPUT_SWAP16
337 | VEPU_REG_AXI_CTRL_BURST_LEN(16)
338 | VEPU_REG_AXI_CTRL_GATE_BIT
339 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP32
340 | VEPU_REG_AXI_CTRL_INPUT_SWAP32
341 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP8
342 | VEPU_REG_AXI_CTRL_INPUT_SWAP8;
343 vepu_write(vpu, reg, VEPU_REG_AXI_CTRL);
345 vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
347 reg = VEPU_REG_ENC_CTRL_NAL_MODE_BIT
348 | VEPU_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width))
349 | VEPU_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
350 | VEPU_REG_ENC_CTRL_ENC_MODE_VP8
351 | VEPU_REG_ENC_CTRL_EN_BIT;
353 if (vb2_dst->flags & V4L2_BUF_FLAG_KEYFRAME)
354 reg |= VEPU_REG_ENC_CTRL_KEYFRAME_BIT;
356 vepu_write(vpu, reg, VEPU_REG_ENC_CTRL);
359 void rk3288_vpu_vp8e_done(struct rockchip_vpu_ctx *ctx,
360 enum vb2_buffer_state result)
362 struct rk3288_vpu_vp8e_ctrl_buf *ctrl_buf = ctx->hw.vp8e.ctrl_buf.cpu;
364 /* Read length information of this run from utility buffer. */
365 ctx->run.dst->vp8e.ext_hdr_size = ctrl_buf->ext_hdr_size;
366 ctx->run.dst->vp8e.dct_size = ctrl_buf->dct_size;
368 rockchip_vpu_run_done(ctx, result);
372 * WAR for encoder state corruption after decoding
375 static const struct rockchip_reg_params dummy_encode_reg_params = {
377 /* 00000014 */ .hdr_len = 0x00000000,
378 /* 00000038 */ .enc_ctrl = VEPU_REG_ENC_CTRL_KEYFRAME_BIT,
379 /* 00000040 */ .enc_ctrl0 = 0x00000000,
380 /* 00000044 */ .enc_ctrl1 = 0x00000000,
381 /* 00000048 */ .enc_ctrl2 = 0x00040014,
382 /* 0000004c */ .enc_ctrl3 = 0x404083c0,
383 /* 00000050 */ .enc_ctrl5 = 0x01006bff,
384 /* 00000054 */ .enc_ctrl4 = 0x00000039,
385 /* 00000058 */ .str_hdr_rem_msb = 0x85848805,
386 /* 0000005c */ .str_hdr_rem_lsb = 0x02000000,
387 /* 00000064 */ .mad_ctrl = 0x00000000,
388 /* 0000006c */ .qp_val = {
389 /* 0000006c */ 0x020213b1,
390 /* 00000070 */ 0x02825249,
391 /* 00000074 */ 0x048409d8,
392 /* 00000078 */ 0x03834c30,
393 /* 0000007c */ 0x020213b1,
394 /* 00000080 */ 0x02825249,
395 /* 00000084 */ 0x00340e0d,
396 /* 00000088 */ 0x401c1a15,
398 /* 0000008c */ .bool_enc = 0x00018140,
399 /* 00000090 */ .vp8_ctrl0 = 0x000695c0,
400 /* 00000094 */ .rlc_ctrl = 0x14000000,
401 /* 00000098 */ .mb_ctrl = 0x00000000,
402 /* 000000d4 */ .rgb_yuv_coeff = {
403 /* 000000d4 */ 0x962b4c85,
404 /* 000000d8 */ 0x90901d50,
406 /* 000000dc */ .rgb_mask_msb = 0x0000b694,
407 /* 000000e0 */ .intra_area_ctrl = 0xffffffff,
408 /* 000000e4 */ .cir_intra_ctrl = 0x00000000,
409 /* 000000f0 */ .first_roi_area = 0xffffffff,
410 /* 000000f4 */ .second_roi_area = 0xffffffff,
411 /* 000000f8 */ .mvc_ctrl = 0x01780000,
412 /* 00000100 */ .intra_penalty = {
413 /* 00000100 */ 0x00010005,
414 /* 00000104 */ 0x00015011,
415 /* 00000108 */ 0x0000c005,
416 /* 0000010c */ 0x00016010,
417 /* 00000110 */ 0x0001a018,
418 /* 00000114 */ 0x00018015,
419 /* 00000118 */ 0x0001d01a,
421 /* 00000120 */ .seg_qp = {
422 /* 00000120 */ 0x020213b1,
423 /* 00000124 */ 0x02825249,
424 /* 00000128 */ 0x048409d8,
425 /* 0000012c */ 0x03834c30,
426 /* 00000130 */ 0x020213b1,
427 /* 00000134 */ 0x02825249,
428 /* 00000138 */ 0x00340e0d,
429 /* 0000013c */ 0x341c1a15,
430 /* 00000140 */ 0x020213b1,
431 /* 00000144 */ 0x02825249,
432 /* 00000148 */ 0x048409d8,
433 /* 0000014c */ 0x03834c30,
434 /* 00000150 */ 0x020213b1,
435 /* 00000154 */ 0x02825249,
436 /* 00000158 */ 0x00340e0d,
437 /* 0000015c */ 0x341c1a15,
438 /* 00000160 */ 0x020213b1,
439 /* 00000164 */ 0x02825249,
440 /* 00000168 */ 0x048409d8,
441 /* 0000016c */ 0x03834c30,
442 /* 00000170 */ 0x020213b1,
443 /* 00000174 */ 0x02825249,
444 /* 00000178 */ 0x00340e0d,
445 /* 0000017c */ 0x341c1a15,
447 /* 00000180 */ .dmv_4p_1p_penalty = {
448 /* 00000180 */ 0x00020406,
449 /* 00000184 */ 0x080a0c0e,
450 /* 00000188 */ 0x10121416,
451 /* 0000018c */ 0x181a1c1e,
452 /* 00000190 */ 0x20222426,
453 /* 00000194 */ 0x282a2c2e,
454 /* 00000198 */ 0x30323436,
455 /* 0000019c */ 0x383a3c3e,
456 /* 000001a0 */ 0x40424446,
457 /* 000001a4 */ 0x484a4c4e,
458 /* 000001a8 */ 0x50525456,
459 /* 000001ac */ 0x585a5c5e,
460 /* 000001b0 */ 0x60626466,
461 /* 000001b4 */ 0x686a6c6e,
462 /* 000001b8 */ 0x70727476,
463 /* NOTE: Further 17 registers set to 0. */
466 * NOTE: Following registers all set to 0:
467 * - dmv_qpel_penalty,
475 const struct rockchip_reg_params *rk3288_vpu_vp8e_get_dummy_params(void)
477 return &dummy_encode_reg_params;