2 * Rockchip RK3288 VPU codec driver
4 * Copyright (C) 2014 Rockchip Electronics Co., Ltd.
5 * Alpha Lin <Alpha.Lin@rock-chips.com>
6 * Jeffy Chen <jeffy.chen@rock-chips.com>
8 * Copyright (C) 2014 Google, Inc.
9 * Tomasz Figa <tfiga@chromium.org>
11 * This software is licensed under the terms of the GNU General Public
12 * License version 2, as published by the Free Software Foundation, and
13 * may be copied, distributed, and modified under those terms.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
21 #include "rk3288_vpu_common.h"
23 #include <linux/types.h>
24 #include <linux/sort.h>
26 #include "rk3288_vpu_regs.h"
27 #include "rk3288_vpu_hw.h"
29 /* Various parameters specific to VP8 encoder. */
30 #define VP8_CABAC_CTX_OFFSET 192
31 #define VP8_CABAC_CTX_SIZE ((55 + 96) << 3)
33 #define VP8_KEY_FRAME_HDR_SIZE 10
34 #define VP8_INTER_FRAME_HDR_SIZE 3
36 #define VP8_FRAME_TAG_KEY_FRAME_BIT BIT(0)
37 #define VP8_FRAME_TAG_LENGTH_SHIFT 5
38 #define VP8_FRAME_TAG_LENGTH_MASK (0x7ffff << 5)
41 * struct rk3288_vpu_vp8e_ctrl_buf - hardware control buffer layout
42 * @ext_hdr_size: Ext header size in bytes (written by hardware).
43 * @dct_size: DCT partition size (written by hardware).
44 * @rsvd: Reserved for hardware.
46 struct rk3288_vpu_vp8e_ctrl_buf {
53 * The hardware takes care only of ext hdr and dct partition. The software
54 * must take care of frame header.
56 * Buffer layout as received from hardware:
57 * |<--gap-->|<--ext hdr-->|<-gap->|<---dct part---
58 * |<-------dct part offset------->|
60 * Required buffer layout:
61 * |<--hdr-->|<--ext hdr-->|<---dct part---
63 void rk3288_vpu_vp8e_assemble_bitstream(struct rk3288_vpu_ctx *ctx,
64 struct rk3288_vpu_buf *dst_buf)
66 size_t ext_hdr_size = dst_buf->vp8e.ext_hdr_size;
67 size_t dct_size = dst_buf->vp8e.dct_size;
68 size_t hdr_size = dst_buf->vp8e.hdr_size;
74 dst_size = vb2_plane_size(&dst_buf->b, 0);
75 dst = vb2_plane_vaddr(&dst_buf->b, 0);
76 tag = dst; /* To access frame tag words. */
78 if (WARN_ON(hdr_size + ext_hdr_size + dct_size > dst_size))
80 if (WARN_ON(dst_buf->vp8e.dct_offset + dct_size > dst_size))
83 vpu_debug(1, "%s: hdr_size = %u, ext_hdr_size = %u, dct_size = %u\n",
84 __func__, hdr_size, ext_hdr_size, dct_size);
86 memmove(dst + hdr_size + ext_hdr_size,
87 dst + dst_buf->vp8e.dct_offset, dct_size);
88 memcpy(dst, dst_buf->vp8e.header, hdr_size);
90 /* Patch frame tag at first 32-bit word of the frame. */
91 if (dst_buf->b.v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME) {
92 tag_size = VP8_KEY_FRAME_HDR_SIZE;
93 tag[0] &= ~VP8_FRAME_TAG_KEY_FRAME_BIT;
95 tag_size = VP8_INTER_FRAME_HDR_SIZE;
96 tag[0] |= VP8_FRAME_TAG_KEY_FRAME_BIT;
99 tag[0] &= ~VP8_FRAME_TAG_LENGTH_MASK;
100 tag[0] |= (hdr_size + ext_hdr_size - tag_size)
101 << VP8_FRAME_TAG_LENGTH_SHIFT;
103 vb2_set_plane_payload(&dst_buf->b, 0,
104 hdr_size + ext_hdr_size + dct_size);
107 static inline unsigned int ref_luma_size(unsigned int w, unsigned int h)
109 return round_up(w, MB_DIM) * round_up(h, MB_DIM);
112 int rk3288_vpu_vp8e_init(struct rk3288_vpu_ctx *ctx)
114 struct rk3288_vpu_dev *vpu = ctx->dev;
115 size_t height = ctx->src_fmt.height;
116 size_t width = ctx->src_fmt.width;
121 ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ctrl_buf,
122 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
124 vpu_err("failed to allocate ctrl buffer\n");
128 mv_size = DIV_ROUND_UP(width, 16) * DIV_ROUND_UP(height, 16) / 4;
129 ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.mv_buf, mv_size);
131 vpu_err("failed to allocate MV buffer\n");
135 ref_buf_size = ref_luma_size(width, height) * 3 / 2;
136 ret = rk3288_vpu_aux_buf_alloc(vpu, &ctx->hw.vp8e.ext_buf,
139 vpu_err("failed to allocate ext buffer\n");
146 rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
148 rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
153 void rk3288_vpu_vp8e_exit(struct rk3288_vpu_ctx *ctx)
155 struct rk3288_vpu_dev *vpu = ctx->dev;
157 rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ext_buf);
158 rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.mv_buf);
159 rk3288_vpu_aux_buf_free(vpu, &ctx->hw.vp8e.ctrl_buf);
162 static inline u32 enc_in_img_ctrl(struct rk3288_vpu_ctx *ctx)
164 struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
165 struct v4l2_rect *crop = &ctx->src_crop;
166 unsigned bytes_per_line, overfill_r, overfill_b;
169 * The hardware needs only the value for luma plane, because
170 * values of other planes are calculated internally based on
173 bytes_per_line = pix_fmt->plane_fmt[0].bytesperline;
174 overfill_r = (pix_fmt->width - crop->width) / 4;
175 overfill_b = pix_fmt->height - crop->height;
177 return VEPU_REG_IN_IMG_CTRL_ROW_LEN(bytes_per_line)
178 | VEPU_REG_IN_IMG_CTRL_OVRFLR_D4(overfill_r)
179 | VEPU_REG_IN_IMG_CTRL_OVRFLB_D4(overfill_b)
180 | VEPU_REG_IN_IMG_CTRL_FMT(ctx->vpu_src_fmt->enc_fmt);
183 static void rk3288_vpu_vp8e_set_buffers(struct rk3288_vpu_dev *vpu,
184 struct rk3288_vpu_ctx *ctx)
186 const struct rk3288_vp8e_reg_params *params = ctx->run.vp8e.reg_params;
187 dma_addr_t ref_buf_dma, rec_buf_dma;
188 dma_addr_t stream_dma;
194 rounded_size = ref_luma_size(ctx->src_fmt.width,
195 ctx->src_fmt.height);
197 ref_buf_dma = rec_buf_dma = ctx->hw.vp8e.ext_buf.dma;
198 if (ctx->hw.vp8e.ref_rec_ptr)
199 ref_buf_dma += rounded_size * 3 / 2;
201 rec_buf_dma += rounded_size * 3 / 2;
202 ctx->hw.vp8e.ref_rec_ptr ^= 1;
204 dst_dma = vb2_dma_contig_plane_dma_addr(&ctx->run.dst->b, 0);
205 dst_size = vb2_plane_size(&ctx->run.dst->b, 0);
209 * align 64bits->|<-start offset->|
210 * |<---------header size-------->|<---dst buf---
212 start_offset = (params->rlc_ctrl & VEPU_REG_RLC_CTRL_STR_OFFS_MASK)
213 >> VEPU_REG_RLC_CTRL_STR_OFFS_SHIFT;
214 stream_dma = dst_dma + params->hdr_len;
217 * Userspace will pass 8 bytes aligned size(round_down) to us,
218 * so we need to plus start offset to get real header size.
220 * |<-aligned size->|<-start offset->|
221 * |<----------header size---------->|
223 ctx->run.dst->vp8e.hdr_size = params->hdr_len + (start_offset >> 3);
225 if (params->enc_ctrl & VEPU_REG_ENC_CTRL_KEYFRAME_BIT)
226 ctx->run.dst->b.v4l2_buf.flags |= V4L2_BUF_FLAG_KEYFRAME;
228 ctx->run.dst->b.v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
231 * We assume here that 1/10 of the buffer is enough for headers.
232 * DCT partition will be placed in remaining 9/10 of the buffer.
234 ctx->run.dst->vp8e.dct_offset = round_up(dst_size / 10, 8);
236 /* Destination buffer. */
237 vepu_write_relaxed(vpu, stream_dma, VEPU_REG_ADDR_OUTPUT_STREAM);
238 vepu_write_relaxed(vpu, dst_dma + ctx->run.dst->vp8e.dct_offset,
239 VEPU_REG_ADDR_VP8_DCT_PART(0));
240 vepu_write_relaxed(vpu, dst_size - ctx->run.dst->vp8e.dct_offset,
241 VEPU_REG_STR_BUF_LIMIT);
243 /* Auxilliary buffers. */
244 vepu_write_relaxed(vpu, ctx->hw.vp8e.ctrl_buf.dma,
245 VEPU_REG_ADDR_OUTPUT_CTRL);
246 vepu_write_relaxed(vpu, ctx->hw.vp8e.mv_buf.dma,
247 VEPU_REG_ADDR_MV_OUT);
248 vepu_write_relaxed(vpu, ctx->run.priv_dst.dma,
249 VEPU_REG_ADDR_VP8_PROB_CNT);
250 vepu_write_relaxed(vpu, ctx->run.priv_src.dma + VP8_CABAC_CTX_OFFSET,
251 VEPU_REG_ADDR_CABAC_TBL);
252 vepu_write_relaxed(vpu, ctx->run.priv_src.dma
253 + VP8_CABAC_CTX_OFFSET + VP8_CABAC_CTX_SIZE,
254 VEPU_REG_ADDR_VP8_SEG_MAP);
256 /* Reference buffers. */
257 vepu_write_relaxed(vpu, ref_buf_dma,
258 VEPU_REG_ADDR_REF_LUMA);
259 vepu_write_relaxed(vpu, ref_buf_dma + rounded_size,
260 VEPU_REG_ADDR_REF_CHROMA);
262 /* Reconstruction buffers. */
263 vepu_write_relaxed(vpu, rec_buf_dma,
264 VEPU_REG_ADDR_REC_LUMA);
265 vepu_write_relaxed(vpu, rec_buf_dma + rounded_size,
266 VEPU_REG_ADDR_REC_CHROMA);
269 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
270 PLANE_Y), VEPU_REG_ADDR_IN_LUMA);
271 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
272 PLANE_CB), VEPU_REG_ADDR_IN_CB);
273 vepu_write_relaxed(vpu, vb2_dma_contig_plane_dma_addr(&ctx->run.src->b,
274 PLANE_CR), VEPU_REG_ADDR_IN_CR);
276 /* Source parameters. */
277 vepu_write_relaxed(vpu, enc_in_img_ctrl(ctx), VEPU_REG_IN_IMG_CTRL);
280 static void rk3288_vpu_vp8e_set_params(struct rk3288_vpu_dev *vpu,
281 struct rk3288_vpu_ctx *ctx)
283 const struct rk3288_vp8e_reg_params *params = ctx->run.vp8e.reg_params;
286 vepu_write_relaxed(vpu, params->enc_ctrl0, VEPU_REG_ENC_CTRL0);
287 vepu_write_relaxed(vpu, params->enc_ctrl1, VEPU_REG_ENC_CTRL1);
288 vepu_write_relaxed(vpu, params->enc_ctrl2, VEPU_REG_ENC_CTRL2);
289 vepu_write_relaxed(vpu, params->enc_ctrl3, VEPU_REG_ENC_CTRL3);
290 vepu_write_relaxed(vpu, params->enc_ctrl5, VEPU_REG_ENC_CTRL5);
291 vepu_write_relaxed(vpu, params->enc_ctrl4, VEPU_REG_ENC_CTRL4);
292 vepu_write_relaxed(vpu, params->str_hdr_rem_msb,
293 VEPU_REG_STR_HDR_REM_MSB);
294 vepu_write_relaxed(vpu, params->str_hdr_rem_lsb,
295 VEPU_REG_STR_HDR_REM_LSB);
296 vepu_write_relaxed(vpu, params->mad_ctrl, VEPU_REG_MAD_CTRL);
298 for (i = 0; i < ARRAY_SIZE(params->qp_val); ++i)
299 vepu_write_relaxed(vpu, params->qp_val[i],
300 VEPU_REG_VP8_QP_VAL(i));
302 vepu_write_relaxed(vpu, params->bool_enc, VEPU_REG_VP8_BOOL_ENC);
303 vepu_write_relaxed(vpu, params->vp8_ctrl0, VEPU_REG_VP8_CTRL0);
304 vepu_write_relaxed(vpu, params->rlc_ctrl, VEPU_REG_RLC_CTRL);
305 vepu_write_relaxed(vpu, params->mb_ctrl, VEPU_REG_MB_CTRL);
307 for (i = 0; i < ARRAY_SIZE(params->rgb_yuv_coeff); ++i)
308 vepu_write_relaxed(vpu, params->rgb_yuv_coeff[i],
309 VEPU_REG_RGB_YUV_COEFF(i));
311 vepu_write_relaxed(vpu, params->rgb_mask_msb,
312 VEPU_REG_RGB_MASK_MSB);
313 vepu_write_relaxed(vpu, params->intra_area_ctrl,
314 VEPU_REG_INTRA_AREA_CTRL);
315 vepu_write_relaxed(vpu, params->cir_intra_ctrl,
316 VEPU_REG_CIR_INTRA_CTRL);
317 vepu_write_relaxed(vpu, params->first_roi_area,
318 VEPU_REG_FIRST_ROI_AREA);
319 vepu_write_relaxed(vpu, params->second_roi_area,
320 VEPU_REG_SECOND_ROI_AREA);
321 vepu_write_relaxed(vpu, params->mvc_ctrl,
324 for (i = 0; i < ARRAY_SIZE(params->intra_penalty); ++i)
325 vepu_write_relaxed(vpu, params->intra_penalty[i],
326 VEPU_REG_VP8_INTRA_PENALTY(i));
328 for (i = 0; i < ARRAY_SIZE(params->seg_qp); ++i)
329 vepu_write_relaxed(vpu, params->seg_qp[i],
330 VEPU_REG_VP8_SEG_QP(i));
332 for (i = 0; i < ARRAY_SIZE(params->dmv_4p_1p_penalty); ++i)
333 vepu_write_relaxed(vpu, params->dmv_4p_1p_penalty[i],
334 VEPU_REG_DMV_4P_1P_PENALTY(i));
336 for (i = 0; i < ARRAY_SIZE(params->dmv_qpel_penalty); ++i)
337 vepu_write_relaxed(vpu, params->dmv_qpel_penalty[i],
338 VEPU_REG_DMV_QPEL_PENALTY(i));
340 vepu_write_relaxed(vpu, params->vp8_ctrl1, VEPU_REG_VP8_CTRL1);
341 vepu_write_relaxed(vpu, params->bit_cost_golden,
342 VEPU_REG_VP8_BIT_COST_GOLDEN);
344 for (i = 0; i < ARRAY_SIZE(params->loop_flt_delta); ++i)
345 vepu_write_relaxed(vpu, params->loop_flt_delta[i],
346 VEPU_REG_VP8_LOOP_FLT_DELTA(i));
349 void rk3288_vpu_vp8e_run(struct rk3288_vpu_ctx *ctx)
351 struct rk3288_vpu_dev *vpu = ctx->dev;
354 /* The hardware expects the control buffer to be zeroed. */
355 memset(ctx->hw.vp8e.ctrl_buf.cpu, 0,
356 sizeof(struct rk3288_vpu_vp8e_ctrl_buf));
359 * Program the hardware.
361 rk3288_vpu_power_on(vpu);
363 vepu_write_relaxed(vpu, VEPU_REG_ENC_CTRL_ENC_MODE_VP8,
366 rk3288_vpu_vp8e_set_params(vpu, ctx);
367 rk3288_vpu_vp8e_set_buffers(vpu, ctx);
369 /* Make sure that all registers are written at this point. */
372 /* Set the watchdog. */
373 schedule_delayed_work(&vpu->watchdog_work, msecs_to_jiffies(2000));
375 /* Start the hardware. */
376 reg = VEPU_REG_AXI_CTRL_OUTPUT_SWAP16
377 | VEPU_REG_AXI_CTRL_INPUT_SWAP16
378 | VEPU_REG_AXI_CTRL_BURST_LEN(16)
379 | VEPU_REG_AXI_CTRL_GATE_BIT
380 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP32
381 | VEPU_REG_AXI_CTRL_INPUT_SWAP32
382 | VEPU_REG_AXI_CTRL_OUTPUT_SWAP8
383 | VEPU_REG_AXI_CTRL_INPUT_SWAP8;
384 vepu_write(vpu, reg, VEPU_REG_AXI_CTRL);
386 vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
388 reg = VEPU_REG_ENC_CTRL_NAL_MODE_BIT
389 | VEPU_REG_ENC_CTRL_WIDTH(MB_WIDTH(ctx->src_fmt.width))
390 | VEPU_REG_ENC_CTRL_HEIGHT(MB_HEIGHT(ctx->src_fmt.height))
391 | VEPU_REG_ENC_CTRL_ENC_MODE_VP8
392 | VEPU_REG_ENC_CTRL_EN_BIT;
394 if (ctx->run.dst->b.v4l2_buf.flags & V4L2_BUF_FLAG_KEYFRAME)
395 reg |= VEPU_REG_ENC_CTRL_KEYFRAME_BIT;
397 vepu_write(vpu, reg, VEPU_REG_ENC_CTRL);
400 void rk3288_vpu_vp8e_done(struct rk3288_vpu_ctx *ctx,
401 enum vb2_buffer_state result)
403 struct rk3288_vpu_vp8e_ctrl_buf *ctrl_buf = ctx->hw.vp8e.ctrl_buf.cpu;
405 /* Read length information of this run from utility buffer. */
406 ctx->run.dst->vp8e.ext_hdr_size = ctrl_buf->ext_hdr_size;
407 ctx->run.dst->vp8e.dct_size = ctrl_buf->dct_size;
409 rk3288_vpu_run_done(ctx, result);