CHROMIUM: [media] rk3288-vpu: Preserve picture positions in H264 DPB lists
authorTomasz Figa <tfiga@chromium.org>
Tue, 31 Mar 2015 13:40:03 +0000 (22:40 +0900)
committerHuang, Tao <huangtao@rock-chips.com>
Thu, 30 Jun 2016 11:54:13 +0000 (19:54 +0800)
Even though H264 standard allows arbitrary order of DPB entries, the
hardware requires picture with given POC to use the same DPB entry for
its whole lifetime. This means that the driver needs to reorder DPB
array to suit this requirement.

This patch modifies the driver to reorder H264 DPB and should fix
corruption issues when DPB array received from userspace does not meet
hardware requirements.

BUG=chrome-os-partner:38416
TEST=http://www.youtube.com/embed/YE7VzlLtp-4

Signed-off-by: Tomasz Figa <tfiga@chromium.org>
Reviewed-on: https://chromium-review.googlesource.com/263370
Reviewed-by: Pawel Osciak <posciak@chromium.org>
Tested-by: Caesar Wang <wxt@rock-chips.com>
Change-Id: Ibbd0e2bc4e527aadd21ef08ec68866678bf8a659
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Yakir Yang <ykk@rock-chips.com>
drivers/media/platform/rk3288-vpu/rk3288_vpu_common.h
drivers/media/platform/rk3288-vpu/rk3288_vpu_dec.c
drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c

index 0ef96c3f186c5e03cf4fd086f669fff5e964e408..34828369875f5678cd2a22dbd08ae6e032db3ecf 100644 (file)
@@ -213,6 +213,9 @@ struct rk3288_vpu_vp8d_run {
  * @scaling_matrix:    Pointer to a buffer containing scaling matrix.
  * @slice_param:       Pointer to a buffer containing slice parameters array.
  * @decode_param:      Pointer to a buffer containing decode parameters.
+ * @dpb:               Array of DPB entries reordered to keep POC order.
+ * @dpb_map:           Map of indices used in ref_pic_list_* into indices to
+ *                     reordered DPB array.
  */
 struct rk3288_vpu_h264d_run {
        const struct v4l2_ctrl_h264_sps *sps;
@@ -220,6 +223,8 @@ struct rk3288_vpu_h264d_run {
        const struct v4l2_ctrl_h264_scaling_matrix *scaling_matrix;
        const struct v4l2_ctrl_h264_slice_param *slice_param;
        const struct v4l2_ctrl_h264_decode_param *decode_param;
+       struct v4l2_h264_dpb_entry dpb[16];
+       u8 dpb_map[16];
 };
 
 /**
index 62fde27723539ad2a26063f948bbe4ed5c691e33..e3c54146c53eb2110191994ad9e8814f6f060257 100644 (file)
@@ -625,6 +625,99 @@ static int vidioc_streamoff(struct file *file, void *priv,
        return ret;
 }
 
+static void rk3288_vpu_dec_set_dpb(struct rk3288_vpu_ctx *ctx,
+                                           struct v4l2_ctrl *ctrl)
+{
+       struct v4l2_ctrl_h264_decode_param *dec_param = ctrl->p_new.p;
+       const struct v4l2_h264_dpb_entry *new_dpb_entry;
+       u8 *dpb_map = ctx->run.h264d.dpb_map;
+       struct v4l2_h264_dpb_entry *cur_dpb_entry;
+       DECLARE_BITMAP(used, ARRAY_SIZE(ctx->run.h264d.dpb)) = { 0, };
+       DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
+       int i, j;
+
+       BUILD_BUG_ON(ARRAY_SIZE(ctx->run.h264d.dpb) !=
+                                               ARRAY_SIZE(dec_param->dpb));
+
+       /* Disable all entries by default. */
+       for (j = 0; j < ARRAY_SIZE(ctx->run.h264d.dpb); ++j) {
+               cur_dpb_entry = &ctx->run.h264d.dpb[j];
+
+               cur_dpb_entry->flags &= ~V4L2_H264_DPB_ENTRY_FLAG_ACTIVE;
+       }
+
+       /* Try to match new DPB entries with existing ones by their POCs. */
+       for (i = 0; i < ARRAY_SIZE(dec_param->dpb); ++i) {
+               new_dpb_entry = &dec_param->dpb[i];
+
+               if (!(new_dpb_entry->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
+                       continue;
+
+               /*
+                * To cut off some comparisons, iterate only on target DPB
+                * entries which are not used yet.
+                */
+               for_each_clear_bit(j, used, ARRAY_SIZE(ctx->run.h264d.dpb)) {
+                       cur_dpb_entry = &ctx->run.h264d.dpb[j];
+
+                       if (new_dpb_entry->top_field_order_cnt ==
+                                       cur_dpb_entry->top_field_order_cnt &&
+                           new_dpb_entry->bottom_field_order_cnt ==
+                                       cur_dpb_entry->bottom_field_order_cnt) {
+                               memcpy(cur_dpb_entry, new_dpb_entry,
+                                       sizeof(*cur_dpb_entry));
+                               set_bit(j, used);
+                               dpb_map[i] = j;
+                               break;
+                       }
+               }
+
+               if (j == ARRAY_SIZE(ctx->run.h264d.dpb))
+                       set_bit(i, new);
+       }
+
+       /* For entries that could not be matched, use remaining free slots. */
+       for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
+               new_dpb_entry = &dec_param->dpb[i];
+
+               j = find_first_zero_bit(used, ARRAY_SIZE(ctx->run.h264d.dpb));
+               /*
+                * Both arrays are of the same sizes, so there is no way
+                * we can end up with no space in target array, unless
+                * something is buggy.
+                */
+               if (WARN_ON(j >= ARRAY_SIZE(ctx->run.h264d.dpb)))
+                       return;
+
+               cur_dpb_entry = &ctx->run.h264d.dpb[j];
+               memcpy(cur_dpb_entry, new_dpb_entry, sizeof(*cur_dpb_entry));
+               set_bit(j, used);
+               dpb_map[i] = j;
+       }
+
+       /*
+        * Verify that reference picture lists are in range, since they
+        * will be indexing dpb_map[] when programming the hardware.
+        *
+        * Fallback to 0 should be safe, as we will get at most corrupt
+        * decoding result, without any serious side effects. Moreover,
+        * even if entry 0 is unused, the hardware programming code will
+        * handle this properly.
+        */
+       for (i = 0; i < ARRAY_SIZE(dec_param->ref_pic_list_b0); ++i)
+               if (dec_param->ref_pic_list_b0[i]
+                   >= ARRAY_SIZE(ctx->run.h264d.dpb_map))
+                       dec_param->ref_pic_list_b0[i] = 0;
+       for (i = 0; i < ARRAY_SIZE(dec_param->ref_pic_list_b1); ++i)
+               if (dec_param->ref_pic_list_b1[i]
+                   >= ARRAY_SIZE(ctx->run.h264d.dpb_map))
+                       dec_param->ref_pic_list_b1[i] = 0;
+       for (i = 0; i < ARRAY_SIZE(dec_param->ref_pic_list_p0); ++i)
+               if (dec_param->ref_pic_list_p0[i]
+                   >= ARRAY_SIZE(ctx->run.h264d.dpb_map))
+                       dec_param->ref_pic_list_p0[i] = 0;
+}
+
 static int rk3288_vpu_dec_s_ctrl(struct v4l2_ctrl *ctrl)
 {
        struct rk3288_vpu_ctx *ctx = ctrl_to_ctx(ctrl);
@@ -640,11 +733,16 @@ static int rk3288_vpu_dec_s_ctrl(struct v4l2_ctrl *ctrl)
        case V4L2_CID_MPEG_VIDEO_H264_PPS:
        case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX:
        case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAM:
-       case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAM:
        case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HDR:
                /* These controls are used directly. */
                break;
 
+       case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAM:
+               if (ctrl->store)
+                       break;
+               rk3288_vpu_dec_set_dpb(ctx, ctrl);
+               break;
+
        default:
                v4l2_err(&dev->v4l2_dev, "Invalid control, id=%d, val=%d\n",
                         ctrl->id, ctrl->val);
index 132a76013d32a2da6567674b6bfc30a6e61c4ed1..b03491efbf32eab8c94a1d4d5c085fb736dfe9aa 100644 (file)
@@ -226,7 +226,7 @@ static void rk3288_vpu_h264d_prepare_table(struct rk3288_vpu_ctx *ctx)
                                                ctx->run.h264d.scaling_matrix;
        const struct v4l2_ctrl_h264_decode_param *dec_param =
                                                ctx->run.h264d.decode_param;
-       const struct v4l2_h264_dpb_entry *dpb = dec_param->dpb;
+       const struct v4l2_h264_dpb_entry *dpb = ctx->run.h264d.dpb;
        int i;
 
        /*
@@ -367,7 +367,8 @@ static void rk3288_vpu_h264d_set_ref(struct rk3288_vpu_ctx *ctx)
 {
        const struct v4l2_ctrl_h264_decode_param *dec_param =
                                                ctx->run.h264d.decode_param;
-       const struct v4l2_h264_dpb_entry *dpb = dec_param->dpb;
+       const struct v4l2_h264_dpb_entry *dpb = ctx->run.h264d.dpb;
+       const u8 *dpb_map = ctx->run.h264d.dpb_map;
        struct rk3288_vpu_dev *vpu = ctx->dev;
        u32 dpb_longterm = 0;
        u32 dpb_valid = 0;
@@ -419,17 +420,17 @@ static void rk3288_vpu_h264d_set_ref(struct rk3288_vpu_ctx *ctx)
        reg_num = 0;
        for (i = 0; i < 15; i += 3) {
                reg = VDPU_REG_BD_REF_PIC_BINIT_RLIST_F0(
-                               dec_param->ref_pic_list_b0[i + 0])
+                               dpb_map[dec_param->ref_pic_list_b0[i + 0]])
                        | VDPU_REG_BD_REF_PIC_BINIT_RLIST_F1(
-                               dec_param->ref_pic_list_b0[i + 1])
+                               dpb_map[dec_param->ref_pic_list_b0[i + 1]])
                        | VDPU_REG_BD_REF_PIC_BINIT_RLIST_F2(
-                               dec_param->ref_pic_list_b0[i + 2])
+                               dpb_map[dec_param->ref_pic_list_b0[i + 2]])
                        | VDPU_REG_BD_REF_PIC_BINIT_RLIST_B0(
-                               dec_param->ref_pic_list_b1[i + 0])
+                               dpb_map[dec_param->ref_pic_list_b1[i + 0]])
                        | VDPU_REG_BD_REF_PIC_BINIT_RLIST_B1(
-                               dec_param->ref_pic_list_b1[i + 1])
+                               dpb_map[dec_param->ref_pic_list_b1[i + 1]])
                        | VDPU_REG_BD_REF_PIC_BINIT_RLIST_B2(
-                               dec_param->ref_pic_list_b1[i + 2]);
+                               dpb_map[dec_param->ref_pic_list_b1[i + 2]]);
                vdpu_write_relaxed(vpu, reg, VDPU_REG_BD_REF_PIC(reg_num++));
        }
 
@@ -439,17 +440,17 @@ static void rk3288_vpu_h264d_set_ref(struct rk3288_vpu_ctx *ctx)
         * of P forward picture list.
         */
        reg = VDPU_REG_BD_P_REF_PIC_BINIT_RLIST_F15(
-                       dec_param->ref_pic_list_b0[15])
+                       dpb_map[dec_param->ref_pic_list_b0[15]])
                | VDPU_REG_BD_P_REF_PIC_BINIT_RLIST_B15(
-                       dec_param->ref_pic_list_b1[15])
+                       dpb_map[dec_param->ref_pic_list_b1[15]])
                | VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F0(
-                       dec_param->ref_pic_list_p0[0])
+                       dpb_map[dec_param->ref_pic_list_p0[0]])
                | VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F1(
-                       dec_param->ref_pic_list_p0[1])
+                       dpb_map[dec_param->ref_pic_list_p0[1]])
                | VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F2(
-                       dec_param->ref_pic_list_p0[2])
+                       dpb_map[dec_param->ref_pic_list_p0[2]])
                | VDPU_REG_BD_P_REF_PIC_PINIT_RLIST_F3(
-                       dec_param->ref_pic_list_p0[3]);
+                       dpb_map[dec_param->ref_pic_list_p0[3]]);
        vdpu_write_relaxed(vpu, reg, VDPU_REG_BD_P_REF_PIC);
 
        /*
@@ -459,17 +460,17 @@ static void rk3288_vpu_h264d_set_ref(struct rk3288_vpu_ctx *ctx)
        reg_num = 0;
        for (i = 4; i < RK3288_VPU_H264_NUM_DPB; i += 6) {
                reg = VDPU_REG_FWD_PIC_PINIT_RLIST_F0(
-                               dec_param->ref_pic_list_p0[i + 0])
+                               dpb_map[dec_param->ref_pic_list_p0[i + 0]])
                        | VDPU_REG_FWD_PIC_PINIT_RLIST_F1(
-                               dec_param->ref_pic_list_p0[i + 1])
+                               dpb_map[dec_param->ref_pic_list_p0[i + 1]])
                        | VDPU_REG_FWD_PIC_PINIT_RLIST_F2(
-                               dec_param->ref_pic_list_p0[i + 2])
+                               dpb_map[dec_param->ref_pic_list_p0[i + 2]])
                        | VDPU_REG_FWD_PIC_PINIT_RLIST_F3(
-                               dec_param->ref_pic_list_p0[i + 3])
+                               dpb_map[dec_param->ref_pic_list_p0[i + 3]])
                        | VDPU_REG_FWD_PIC_PINIT_RLIST_F4(
-                               dec_param->ref_pic_list_p0[i + 4])
+                               dpb_map[dec_param->ref_pic_list_p0[i + 4]])
                        | VDPU_REG_FWD_PIC_PINIT_RLIST_F5(
-                               dec_param->ref_pic_list_p0[i + 5]);
+                               dpb_map[dec_param->ref_pic_list_p0[i + 5]]);
                vdpu_write_relaxed(vpu, reg, VDPU_REG_FWD_PIC(reg_num++));
        }