Message ID | 20210715065203.709914-1-vkoul@kernel.org |
---|---|
Headers | show |
Series | drm/msm: Add Display Stream Compression Support | expand |
On 15/07/2021 09:51, Vinod Koul wrote: > This add the bits in RM to enable the DSC blocks > > Signed-off-by: Vinod Koul <vkoul@kernel.org> > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h | 1 + > drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 32 +++++++++++++++++++++++++ > drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h | 1 + > 3 files changed, 34 insertions(+) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > index d6717d6672f7..d56c05146dfe 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h > @@ -165,6 +165,7 @@ struct dpu_global_state { > uint32_t ctl_to_enc_id[CTL_MAX - CTL_0]; > uint32_t intf_to_enc_id[INTF_MAX - INTF_0]; > uint32_t dspp_to_enc_id[DSPP_MAX - DSPP_0]; > + uint32_t dsc_to_enc_id[DSC_MAX - DSC_0]; > }; > > struct dpu_global_state > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c > index fd2d104f0a91..4da6d72b7996 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c > @@ -11,6 +11,7 @@ > #include "dpu_hw_intf.h" > #include "dpu_hw_dspp.h" > #include "dpu_hw_merge3d.h" > +#include "dpu_hw_dsc.h" > #include "dpu_encoder.h" > #include "dpu_trace.h" > > @@ -75,6 +76,14 @@ int dpu_rm_destroy(struct dpu_rm *rm) > dpu_hw_intf_destroy(hw); > } > } > + for (i = 0; i < ARRAY_SIZE(rm->dsc_blks); i++) { > + struct dpu_hw_dsc *hw; > + > + if (rm->intf_blks[i]) { rm->dsc_blks[i] > + hw = to_dpu_hw_dsc(rm->dsc_blks[i]); > + dpu_hw_dsc_destroy(hw); > + } > + } > > return 0; > } > @@ -221,6 +230,19 @@ int dpu_rm_init(struct dpu_rm *rm, > rm->dspp_blks[dspp->id - DSPP_0] = &hw->base; > } > > + for (i = 0; i < cat->dsc_count; i++) { > + struct dpu_hw_dsc *hw; > + const struct dpu_dsc_cfg *dsc = &cat->dsc[i]; > + > + hw = dpu_hw_dsc_init(dsc->id, mmio, cat); > + if (IS_ERR_OR_NULL(hw)) { > + rc = PTR_ERR(hw); > + DPU_ERROR("failed dsc object creation: err %d\n", rc); > + goto fail; > + } > + rm->dsc_blks[dsc->id - DSC_0] = &hw->base; > + } > + > return 0; > > fail: > @@ -476,6 +498,9 @@ static int _dpu_rm_reserve_intf( > } > > global_state->intf_to_enc_id[idx] = enc_id; > + > + global_state->dsc_to_enc_id[0] = enc_id; > + global_state->dsc_to_enc_id[1] = enc_id; This is not correct. At least this should be guarded with an if, checking that DSC is requested. Also we'd need to check that DSC 0 and 1 are not allocated. > return 0; > } > > @@ -567,6 +592,8 @@ void dpu_rm_release(struct dpu_global_state *global_state, > ARRAY_SIZE(global_state->ctl_to_enc_id), enc->base.id); > _dpu_rm_clear_mapping(global_state->intf_to_enc_id, > ARRAY_SIZE(global_state->intf_to_enc_id), enc->base.id); > + _dpu_rm_clear_mapping(global_state->dsc_to_enc_id, > + ARRAY_SIZE(global_state->dsc_to_enc_id), enc->base.id); > } > > int dpu_rm_reserve( > @@ -640,6 +667,11 @@ int dpu_rm_get_assigned_resources(struct dpu_rm *rm, > hw_to_enc_id = global_state->dspp_to_enc_id; > max_blks = ARRAY_SIZE(rm->dspp_blks); > break; > + case DPU_HW_BLK_DSC: > + hw_blks = rm->dsc_blks; > + hw_to_enc_id = global_state->dsc_to_enc_id; > + max_blks = ARRAY_SIZE(rm->dsc_blks); > + break; > default: > DPU_ERROR("blk type %d not managed by rm\n", type); > return 0; > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h > index 1f12c8d5b8aa..278d2a510b80 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h > @@ -30,6 +30,7 @@ struct dpu_rm { > struct dpu_hw_blk *intf_blks[INTF_MAX - INTF_0]; > struct dpu_hw_blk *dspp_blks[DSPP_MAX - DSPP_0]; > struct dpu_hw_blk *merge_3d_blks[MERGE_3D_MAX - MERGE_3D_0]; > + struct dpu_hw_blk *dsc_blks[DSC_MAX - DSC_0]; > > uint32_t lm_max_width; > }; > -- With best wishes Dmitry
On 15/07/2021 09:52, Vinod Koul wrote: > When DSC is enabled in DT, we need to configure the encoder for DSC > configuration, calculate DSC parameters for the given timing. > > This patch adds that support by adding dpu_encoder_prep_dsc() which is > invoked when DSC is enabled in DT > > Signed-off-by: Vinod Koul <vkoul@kernel.org> > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 142 +++++++++++++++++++- > 1 file changed, 141 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > index 8d942052db8a..41140b781e66 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > @@ -21,12 +21,17 @@ > #include "dpu_hw_intf.h" > #include "dpu_hw_ctl.h" > #include "dpu_hw_dspp.h" > +#include "dpu_hw_dsc.h" > #include "dpu_formats.h" > #include "dpu_encoder_phys.h" > #include "dpu_crtc.h" > #include "dpu_trace.h" > #include "dpu_core_irq.h" > > +#define DSC_MODE_SPLIT_PANEL BIT(0) > +#define DSC_MODE_MULTIPLEX BIT(1) > +#define DSC_MODE_VIDEO BIT(2) This should go into dpu_hw_dsc.h. Ah. They are already defined there and just redefined there. Remove the defines here. It might be cleaner to add bool flags to struct msm_display_dsc_config and then calculate common mode in the dpu_hw_dsc_config(). > + > #define DPU_DEBUG_ENC(e, fmt, ...) DPU_DEBUG("enc%d " fmt,\ > (e) ? (e)->base.base.id : -1, ##__VA_ARGS__) > > @@ -135,6 +140,7 @@ enum dpu_enc_rc_states { > * @cur_slave: As above but for the slave encoder. > * @hw_pp: Handle to the pingpong blocks used for the display. No. > * pingpong blocks can be different than num_phys_encs. > + * @hw_dsc Handle to the DSC blocks used for the display. > * @intfs_swapped: Whether or not the phys_enc interfaces have been swapped > * for partial update right-only cases, such as pingpong > * split where virtual pingpong does not generate IRQs > @@ -180,6 +186,7 @@ struct dpu_encoder_virt { > struct dpu_encoder_phys *cur_master; > struct dpu_encoder_phys *cur_slave; > struct dpu_hw_pingpong *hw_pp[MAX_CHANNELS_PER_ENC]; > + struct dpu_hw_dsc *hw_dsc[MAX_CHANNELS_PER_ENC]; > > bool intfs_swapped; > > @@ -1008,7 +1015,8 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, > struct dpu_hw_blk *hw_ctl[MAX_CHANNELS_PER_ENC]; > struct dpu_hw_blk *hw_lm[MAX_CHANNELS_PER_ENC]; > struct dpu_hw_blk *hw_dspp[MAX_CHANNELS_PER_ENC] = { NULL }; > - int num_lm, num_ctl, num_pp; > + struct dpu_hw_blk *hw_dsc[MAX_CHANNELS_PER_ENC]; > + int num_lm, num_ctl, num_pp, num_dsc; > int i, j; > > if (!drm_enc) { > @@ -1061,11 +1069,16 @@ static void dpu_encoder_virt_mode_set(struct drm_encoder *drm_enc, > dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state, > drm_enc->base.id, DPU_HW_BLK_DSPP, hw_dspp, > ARRAY_SIZE(hw_dspp)); > + num_dsc = dpu_rm_get_assigned_resources(&dpu_kms->rm, global_state, > + drm_enc->base.id, DPU_HW_BLK_DSC, hw_dsc, ARRAY_SIZE(hw_dsc)); > > for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) > dpu_enc->hw_pp[i] = i < num_pp ? to_dpu_hw_pingpong(hw_pp[i]) > : NULL; > > + for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) > + dpu_enc->hw_dsc[i] = i < num_dsc ? to_dpu_hw_dsc(hw_dsc[i]) : NULL; > + > cstate = to_dpu_crtc_state(drm_crtc->state); > > for (i = 0; i < num_lm; i++) { > @@ -1810,10 +1823,133 @@ static void dpu_encoder_vsync_event_work_handler(struct kthread_work *work) > nsecs_to_jiffies(ktime_to_ns(wakeup_time))); > } > > +static void > +dpu_encoder_dsc_pclk_param_calc(struct msm_display_dsc_config *dsc, u32 width) > +{ > + int slice_count, slice_per_intf; > + int bytes_in_slice, total_bytes_per_intf; > + > + if (!dsc || !dsc->drm->slice_width || !dsc->drm->slice_count) { > + DPU_ERROR("Invalid DSC/slices\n"); > + return; > + } > + > + slice_count = dsc->drm->slice_count; > + slice_per_intf = DIV_ROUND_UP(width, dsc->drm->slice_width); > + > + /* > + * If slice_count is greater than slice_per_intf then default to 1. > + * This can happen during partial update. > + */ > + if (slice_count > slice_per_intf) > + slice_count = 1; > + > + bytes_in_slice = DIV_ROUND_UP(dsc->drm->slice_width * > + dsc->drm->bits_per_pixel, 8); > + total_bytes_per_intf = bytes_in_slice * slice_per_intf; > + > + dsc->eol_byte_num = total_bytes_per_intf % 3; > + dsc->pclk_per_line = DIV_ROUND_UP(total_bytes_per_intf, 3); > + dsc->bytes_in_slice = bytes_in_slice; > + dsc->bytes_per_pkt = bytes_in_slice * slice_count; > + dsc->pkt_per_line = slice_per_intf / slice_count; > +} > + > +static void > +dpu_encoder_dsc_initial_line_calc(struct msm_display_dsc_config *dsc, > + u32 enc_ip_width) > +{ > + int ssm_delay, total_pixels, soft_slice_per_enc; > + > + soft_slice_per_enc = enc_ip_width / dsc->drm->slice_width; > + > + /* > + * minimum number of initial line pixels is a sum of: > + * 1. sub-stream multiplexer delay (83 groups for 8bpc, > + * 91 for 10 bpc) * 3 > + * 2. for two soft slice cases, add extra sub-stream multiplexer * 3 > + * 3. the initial xmit delay > + * 4. total pipeline delay through the "lock step" of encoder (47) > + * 5. 6 additional pixels as the output of the rate buffer is > + * 48 bits wide > + */ > + ssm_delay = ((dsc->drm->bits_per_component < 10) ? 84 : 92); > + total_pixels = ssm_delay * 3 + dsc->drm->initial_xmit_delay + 47; > + if (soft_slice_per_enc > 1) > + total_pixels += (ssm_delay * 3); > + dsc->initial_lines = DIV_ROUND_UP(total_pixels, dsc->drm->slice_width); > +} > + > +static void dpu_encoder_dsc_pipe_cfg(struct dpu_hw_dsc *hw_dsc, > + struct dpu_hw_pingpong *hw_pp, > + struct msm_display_dsc_config *dsc, > + u32 common_mode) > +{ > + if (hw_dsc->ops.dsc_config) > + hw_dsc->ops.dsc_config(hw_dsc, dsc, common_mode); > + > + if (hw_dsc->ops.dsc_config_thresh) > + hw_dsc->ops.dsc_config_thresh(hw_dsc, dsc); > + > + if (hw_pp->ops.setup_dsc) > + hw_pp->ops.setup_dsc(hw_pp); > + > + if (hw_pp->ops.enable_dsc) > + hw_pp->ops.enable_dsc(hw_pp); I think, we do not need to split these operations, I'd suggest having just hw_dsc->ops.dsc_config() and hw_pp->ops.enable_dsc(), merging dsc_config_thres() and setup_dsc() into respective methods. > +} > + > +static void dpu_encoder_prep_dsc(struct dpu_encoder_virt *dpu_enc, > + struct msm_display_dsc_config *dsc) > +{ > + /* coding only for 2LM, 2enc, 1 dsc config */ > + struct dpu_encoder_phys *enc_master = dpu_enc->cur_master; > + struct dpu_hw_dsc *hw_dsc[MAX_CHANNELS_PER_ENC]; > + struct dpu_hw_pingpong *hw_pp[MAX_CHANNELS_PER_ENC]; > + int this_frame_slices; > + int intf_ip_w, enc_ip_w; > + int dsc_common_mode; > + int pic_width, pic_height; > + int i; > + > + for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) { > + hw_pp[i] = dpu_enc->hw_pp[i]; > + hw_dsc[i] = dpu_enc->hw_dsc[i]; > + > + if (!hw_pp[i] || !hw_dsc[i]) { > + DPU_ERROR_ENC(dpu_enc, "invalid params for DSC\n"); > + return; > + } > + } > + > + dsc_common_mode = 0; > + pic_width = dsc->drm->pic_width; > + pic_height = dsc->drm->pic_height; > + > + dsc_common_mode = DSC_MODE_MULTIPLEX | DSC_MODE_SPLIT_PANEL; > + if (enc_master->intf_mode == INTF_MODE_VIDEO) > + dsc_common_mode |= DSC_MODE_VIDEO; > + > + this_frame_slices = pic_width / dsc->drm->slice_width; > + intf_ip_w = this_frame_slices * dsc->drm->slice_width; > + > + dpu_encoder_dsc_pclk_param_calc(dsc, intf_ip_w); > + > + /* > + * dsc merge case: when using 2 encoders for the same stream, > + * no. of slices need to be same on both the encoders. > + */ > + enc_ip_w = intf_ip_w / 2; > + dpu_encoder_dsc_initial_line_calc(dsc, enc_ip_w); > + > + for (i = 0; i < MAX_CHANNELS_PER_ENC; i++) > + dpu_encoder_dsc_pipe_cfg(hw_dsc[i], hw_pp[i], dsc, dsc_common_mode); > +} > + > void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc) > { > struct dpu_encoder_virt *dpu_enc; > struct dpu_encoder_phys *phys; > + struct msm_drm_private *priv; > bool needs_hw_reset = false; > unsigned int i; > > @@ -1841,6 +1977,10 @@ void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc) > dpu_encoder_helper_hw_reset(dpu_enc->phys_encs[i]); > } > } > + > + priv = drm_enc->dev->dev_private; > + if (priv->dsc) > + dpu_encoder_prep_dsc(dpu_enc, priv->dsc); Not quite. This makes dsc config global, while we can have several encoders enabled at once (think of DSI + DP). So the dsc should be a per-encoder setting rather than global. > } > > void dpu_encoder_kickoff(struct drm_encoder *drm_enc) > -- With best wishes Dmitry
On 15/07/2021 09:51, Vinod Koul wrote: > Later gens of hardware have DSC bits moved to hw_ctl, so configure these > bits so that DSC would work there as well > > Signed-off-by: Vinod Koul <vkoul@kernel.org> > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 7 ++++++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > index 2d4645e01ebf..aeea6add61ee 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > @@ -25,6 +25,8 @@ > #define CTL_MERGE_3D_ACTIVE 0x0E4 > #define CTL_INTF_ACTIVE 0x0F4 > #define CTL_MERGE_3D_FLUSH 0x100 > +#define CTL_DSC_ACTIVE 0x0E8 > +#define CTL_DSC_FLUSH 0x104 > #define CTL_INTF_FLUSH 0x110 > #define CTL_INTF_MASTER 0x134 > #define CTL_FETCH_PIPE_ACTIVE 0x0FC > @@ -34,6 +36,7 @@ > > #define DPU_REG_RESET_TIMEOUT_US 2000 > #define MERGE_3D_IDX 23 > +#define DSC_IDX 22 > #define INTF_IDX 31 > #define CTL_INVALID_BIT 0xffff > > @@ -120,6 +123,7 @@ static u32 dpu_hw_ctl_get_pending_flush(struct dpu_hw_ctl *ctx) > > static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) > { > + DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH, BIT(0) | BIT(1) | BIT(2) | BIT(3)); Please pass DSC indices using intf cfg and use them to configure register writes. > > if (ctx->pending_flush_mask & BIT(MERGE_3D_IDX)) > DPU_REG_WRITE(&ctx->hw, CTL_MERGE_3D_FLUSH, > @@ -128,7 +132,7 @@ static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) > DPU_REG_WRITE(&ctx->hw, CTL_INTF_FLUSH, > ctx->pending_intf_flush_mask); > > - DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask); > + DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask | BIT(DSC_IDX)); Only if DSCs are used > } > > static inline void dpu_hw_ctl_trigger_flush(struct dpu_hw_ctl *ctx) > @@ -507,6 +511,7 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx, > if (cfg->merge_3d) > DPU_REG_WRITE(c, CTL_MERGE_3D_ACTIVE, > BIT(cfg->merge_3d - MERGE_3D_0)); > + DPU_REG_WRITE(c, CTL_DSC_ACTIVE, BIT(0) | BIT(1) | BIT(2) | BIT(3)); And here > } > > static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx, > -- With best wishes Dmitry
On 2021-07-14 23:51, Vinod Koul wrote: > In SDM845, DSC can be enabled by writing to pingpong block registers, > so > add support for DSC in hw_pp > > Signed-off-by: Vinod Koul <vkoul@kernel.org> Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org> > --- > .../gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c | 32 +++++++++++++++++++ > .../gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h | 14 ++++++++ > 2 files changed, 46 insertions(+) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c > b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c > index 245a7a62b5c6..07fc131ca9aa 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c > @@ -28,6 +28,9 @@ > #define PP_FBC_MODE 0x034 > #define PP_FBC_BUDGET_CTL 0x038 > #define PP_FBC_LOSSY_MODE 0x03C > +#define PP_DSC_MODE 0x0a0 > +#define PP_DCE_DATA_IN_SWAP 0x0ac > +#define PP_DCE_DATA_OUT_SWAP 0x0c8 > > #define PP_DITHER_EN 0x000 > #define PP_DITHER_BITDEPTH 0x004 > @@ -245,6 +248,32 @@ static u32 dpu_hw_pp_get_line_count(struct > dpu_hw_pingpong *pp) > return line; > } > > +static int dpu_hw_pp_dsc_enable(struct dpu_hw_pingpong *pp) > +{ > + struct dpu_hw_blk_reg_map *c = &pp->hw; > + > + DPU_REG_WRITE(c, PP_DSC_MODE, 1); > + return 0; > +} > + > +static void dpu_hw_pp_dsc_disable(struct dpu_hw_pingpong *pp) > +{ > + struct dpu_hw_blk_reg_map *c = &pp->hw; > + > + DPU_REG_WRITE(c, PP_DSC_MODE, 0); > +} > + > +static int dpu_hw_pp_setup_dsc(struct dpu_hw_pingpong *pp) > +{ > + struct dpu_hw_blk_reg_map *pp_c = &pp->hw; > + int data; > + > + data = DPU_REG_READ(pp_c, PP_DCE_DATA_OUT_SWAP); > + data |= BIT(18); /* endian flip */ > + DPU_REG_WRITE(pp_c, PP_DCE_DATA_OUT_SWAP, data); > + return 0; > +} > + > static void _setup_pingpong_ops(struct dpu_hw_pingpong *c, > unsigned long features) > { > @@ -256,6 +285,9 @@ static void _setup_pingpong_ops(struct > dpu_hw_pingpong *c, > c->ops.get_autorefresh = dpu_hw_pp_get_autorefresh_config; > c->ops.poll_timeout_wr_ptr = dpu_hw_pp_poll_timeout_wr_ptr; > c->ops.get_line_count = dpu_hw_pp_get_line_count; > + c->ops.setup_dsc = dpu_hw_pp_setup_dsc; > + c->ops.enable_dsc = dpu_hw_pp_dsc_enable; > + c->ops.disable_dsc = dpu_hw_pp_dsc_disable; > > if (test_bit(DPU_PINGPONG_DITHER, &features)) > c->ops.setup_dither = dpu_hw_pp_setup_dither; > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h > b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h > index 845b9ce80e31..5058e41ffbc0 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h > @@ -124,6 +124,20 @@ struct dpu_hw_pingpong_ops { > */ > void (*setup_dither)(struct dpu_hw_pingpong *pp, > struct dpu_hw_dither_cfg *cfg); > + /** > + * Enable DSC > + */ > + int (*enable_dsc)(struct dpu_hw_pingpong *pp); > + > + /** > + * Disable DSC > + */ > + void (*disable_dsc)(struct dpu_hw_pingpong *pp); > + > + /** > + * Setup DSC > + */ > + int (*setup_dsc)(struct dpu_hw_pingpong *pp); > }; > > struct dpu_hw_pingpong {
On 2021-07-14 23:51, Vinod Koul wrote: > Later gens of hardware have DSC bits moved to hw_ctl, so configure > these > bits so that DSC would work there as well > > Signed-off-by: Vinod Koul <vkoul@kernel.org> Please correct me if wrong but here you seem to be flushing all the DSC bits even the unused ones. This will end-up enabling DSC even when DSC is unused on the newer targets. If so, thats wrong. We need to implement bit-mask based approach to avoid this change and only enable those DSCs which are used. > --- > drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c | 7 ++++++- > 1 file changed, 6 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > index 2d4645e01ebf..aeea6add61ee 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c > @@ -25,6 +25,8 @@ > #define CTL_MERGE_3D_ACTIVE 0x0E4 > #define CTL_INTF_ACTIVE 0x0F4 > #define CTL_MERGE_3D_FLUSH 0x100 > +#define CTL_DSC_ACTIVE 0x0E8 > +#define CTL_DSC_FLUSH 0x104 > #define CTL_INTF_FLUSH 0x110 > #define CTL_INTF_MASTER 0x134 > #define CTL_FETCH_PIPE_ACTIVE 0x0FC > @@ -34,6 +36,7 @@ > > #define DPU_REG_RESET_TIMEOUT_US 2000 > #define MERGE_3D_IDX 23 > +#define DSC_IDX 22 > #define INTF_IDX 31 > #define CTL_INVALID_BIT 0xffff > > @@ -120,6 +123,7 @@ static u32 dpu_hw_ctl_get_pending_flush(struct > dpu_hw_ctl *ctx) > > static inline void dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) > { > + DPU_REG_WRITE(&ctx->hw, CTL_DSC_FLUSH, BIT(0) | BIT(1) | BIT(2) | > BIT(3)); > > if (ctx->pending_flush_mask & BIT(MERGE_3D_IDX)) > DPU_REG_WRITE(&ctx->hw, CTL_MERGE_3D_FLUSH, > @@ -128,7 +132,7 @@ static inline void > dpu_hw_ctl_trigger_flush_v1(struct dpu_hw_ctl *ctx) > DPU_REG_WRITE(&ctx->hw, CTL_INTF_FLUSH, > ctx->pending_intf_flush_mask); > > - DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask); > + DPU_REG_WRITE(&ctx->hw, CTL_FLUSH, ctx->pending_flush_mask | > BIT(DSC_IDX)); > } > > static inline void dpu_hw_ctl_trigger_flush(struct dpu_hw_ctl *ctx) > @@ -507,6 +511,7 @@ static void dpu_hw_ctl_intf_cfg_v1(struct > dpu_hw_ctl *ctx, > if (cfg->merge_3d) > DPU_REG_WRITE(c, CTL_MERGE_3D_ACTIVE, > BIT(cfg->merge_3d - MERGE_3D_0)); > + DPU_REG_WRITE(c, CTL_DSC_ACTIVE, BIT(0) | BIT(1) | BIT(2) | BIT(3)); > } > > static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
On 2021-07-14 23:52, Vinod Koul wrote: > For DSC to work we typically need a 2,2,1 configuration. This should > suffice for resolutions upto 4k. For more resolutions like 8k this > won't > work. > > The topology information is provided by DTS so we try to deduce the > topology required for DSC. > Furthermore, we can use 1 DSC encoder in lesser resolutions, but that > is > not power efficient according to Abhinav, it is better to use 2 mixers > as that will split width/2 and is proven to be power efficient. I think now that we have added the technical reason of why it is better to use 2-2-1 ( using 2 LMs is better than one as it will half layer width), we can drop my name from the commit text as it holds less value than the actual reason itself :) You can still keep my signed-off and co-developed by tag > > Also, the panel has been tested only with 2,2,1 configuration, so for > now we blindly create 2,2,1 topology when DSC is enabled > > Co-developed-by: Abhinav Kumar <abhinavk@codeaurora.org> > Signed-off-by: Abhinav Kumar <abhinavk@codeaurora.org> > Signed-off-by: Vinod Koul <vkoul@kernel.org> > --- > Changes since RFC: > - Add more details in changelog > > drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 16 ++++++++++++++++ > 1 file changed, 16 insertions(+) > > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > index 41140b781e66..8f0a8bd9c8ff 100644 > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c > @@ -573,6 +573,8 @@ static struct msm_display_topology > dpu_encoder_get_topology( > struct drm_display_mode *mode) > { > struct msm_display_topology topology = {0}; > + struct drm_encoder *drm_enc; > + struct msm_drm_private *priv; > int i, intf_count = 0; > > for (i = 0; i < MAX_PHYS_ENCODERS_PER_VIRTUAL; i++) > @@ -607,8 +609,22 @@ static struct msm_display_topology > dpu_encoder_get_topology( > topology.num_enc = 0; > topology.num_intf = intf_count; > > + drm_enc = &dpu_enc->base; > + priv = drm_enc->dev->dev_private; > + if (priv && priv->dsc) { if dsc is moved to the encoder, this will need to be changed too > + /* In case of Display Stream Compression DSC, we would use > + * 2 encoders, 2 line mixers and 1 interface > + * this is power optimal and can drive upto (including) 4k > + * screens > + */ > + topology.num_enc = 2; > + topology.num_intf = 1; > + topology.num_lm = 2; > + } > + > return topology; > } > + > static int dpu_encoder_virt_atomic_check( > struct drm_encoder *drm_enc, > struct drm_crtc_state *crtc_state,
On 2021-07-14 23:52, Vinod Koul wrote: > When DSC is enabled, we need to configure DSI registers accordingly and > configure the respective stream compression registers. > > Add support to calculate the register setting based on DSC params and > timing information and configure these registers. > > Signed-off-by: Vinod Koul <vkoul@kernel.org> same comments as dmitry on this one: https://patchwork.freedesktop.org/patch/444082/?series=90413&rev=2 nothing more to add. > --- > drivers/gpu/drm/msm/dsi/dsi.xml.h | 10 ++ > drivers/gpu/drm/msm/dsi/dsi_host.c | 142 +++++++++++++++++++++++++++-- > 2 files changed, 142 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h > b/drivers/gpu/drm/msm/dsi/dsi.xml.h > index 50eb4d1b8fdd..b8e9e608abfc 100644 > --- a/drivers/gpu/drm/msm/dsi/dsi.xml.h > +++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h > @@ -2310,4 +2310,14 @@ static inline uint32_t > REG_DSI_7nm_PHY_LN_TX_DCTRL(uint32_t i0) { return 0x00000 > > #define REG_DSI_7nm_PHY_PLL_PERF_OPTIMIZE 0x00000260 > > +#define REG_DSI_VIDEO_COMPRESSION_MODE_CTRL 0x0000029c > + > +#define REG_DSI_VIDEO_COMPRESSION_MODE_CTRL2 0x000002a0 > + > +#define REG_DSI_COMMAND_COMPRESSION_MODE_CTRL 0x000002a4 > + > +#define REG_DSI_COMMAND_COMPRESSION_MODE_CTRL2 0x000002a8 > + > +#define REG_DSI_COMMAND_COMPRESSION_MODE_CTRL3 0x000002ac > + > #endif /* DSI_XML */ > diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c > b/drivers/gpu/drm/msm/dsi/dsi_host.c > index e1e5d91809b5..4e8ab1b1df8b 100644 > --- a/drivers/gpu/drm/msm/dsi/dsi_host.c > +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c > @@ -942,6 +942,26 @@ static void dsi_ctrl_config(struct msm_dsi_host > *msm_host, bool enable, > dsi_write(msm_host, REG_DSI_CTRL, data); > } > > +static int dsi_dsc_update_pic_dim(struct msm_display_dsc_config *dsc, > + int pic_width, int pic_height) > +{ > + if (!dsc || !pic_width || !pic_height) { > + pr_err("DSI: invalid input: pic_width: %d pic_height: %d\n", > pic_width, pic_height); > + return -EINVAL; > + } > + > + if ((pic_width % dsc->drm->slice_width) || (pic_height % > dsc->drm->slice_height)) { > + pr_err("DSI: pic_dim %dx%d has to be multiple of slice %dx%d\n", > + pic_width, pic_height, dsc->drm->slice_width, > dsc->drm->slice_height); > + return -EINVAL; > + } > + > + dsc->drm->pic_width = pic_width; > + dsc->drm->pic_height = pic_height; > + > + return 0; > +} > + > static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool > is_dual_dsi) > { > struct drm_display_mode *mode = msm_host->mode; > @@ -956,6 +976,7 @@ static void dsi_timing_setup(struct msm_dsi_host > *msm_host, bool is_dual_dsi) > u32 va_end = va_start + mode->vdisplay; > u32 hdisplay = mode->hdisplay; > u32 wc; > + u32 data; > > DBG(""); > > @@ -974,7 +995,73 @@ static void dsi_timing_setup(struct msm_dsi_host > *msm_host, bool is_dual_dsi) > hdisplay /= 2; > } > > + if (msm_host->dsc) { > + struct msm_display_dsc_config *dsc = msm_host->dsc; > + > + /* update dsc params with timing params */ > + dsi_dsc_update_pic_dim(dsc, mode->hdisplay, mode->vdisplay); > + DBG("Mode Width- %d x Height %d\n", dsc->drm->pic_width, > dsc->drm->pic_height); > + > + /* we do the calculations for dsc parameters here so that > + * panel can use these parameters > + */ > + dsi_populate_dsc_params(dsc); > + > + /* Divide the display by 3 but keep back/font porch and > + * pulse width same > + */ > + h_total -= hdisplay; > + hdisplay /= 3; > + h_total += hdisplay; > + ha_end = ha_start + hdisplay; > + } > + > if (msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) { > + if (msm_host->dsc) { > + struct msm_display_dsc_config *dsc = msm_host->dsc; > + u32 reg, intf_width, slice_per_intf, width; > + u32 total_bytes_per_intf; > + > + /* first calculate dsc parameters and then program > + * compress mode registers > + */ > + intf_width = hdisplay; > + slice_per_intf = DIV_ROUND_UP(intf_width, dsc->drm->slice_width); > + > + /* If slice_count > slice_per_intf, then use 1 > + * This can happen during partial update > + */ > + dsc->drm->slice_count = 1; > + > + dsc->bytes_in_slice = DIV_ROUND_UP(dsc->drm->slice_width * 8, 8); > + total_bytes_per_intf = dsc->bytes_in_slice * slice_per_intf; > + > + dsc->eol_byte_num = total_bytes_per_intf % 3; > + dsc->pclk_per_line = DIV_ROUND_UP(total_bytes_per_intf, 3); > + dsc->bytes_per_pkt = dsc->bytes_in_slice * dsc->drm->slice_count; > + dsc->pkt_per_line = slice_per_intf / dsc->drm->slice_count; > + > + width = dsc->pclk_per_line; > + reg = dsc->bytes_per_pkt << 16; > + reg |= (0x0b << 8); /* dtype of compressed image */ > + > + /* pkt_per_line: > + * 0 == 1 pkt > + * 1 == 2 pkt > + * 2 == 4 pkt > + * 3 pkt is not supported > + * above translates to ffs() - 1 > + */ > + reg |= (ffs(dsc->pkt_per_line) - 1) << 6; > + > + dsc->eol_byte_num = total_bytes_per_intf % 3; > + reg |= dsc->eol_byte_num << 4; > + reg |= 1; > + > + dsi_write(msm_host, > + REG_DSI_VIDEO_COMPRESSION_MODE_CTRL, reg); > + } > + > dsi_write(msm_host, REG_DSI_ACTIVE_H, > DSI_ACTIVE_H_START(ha_start) | > DSI_ACTIVE_H_END(ha_end)); > @@ -993,19 +1080,50 @@ static void dsi_timing_setup(struct > msm_dsi_host *msm_host, bool is_dual_dsi) > DSI_ACTIVE_VSYNC_VPOS_START(vs_start) | > DSI_ACTIVE_VSYNC_VPOS_END(vs_end)); > } else { /* command mode */ > + if (msm_host->dsc) { > + struct msm_display_dsc_config *dsc = msm_host->dsc; > + u32 reg, reg_ctrl, reg_ctrl2; > + u32 slice_per_intf, bytes_in_slice, total_bytes_per_intf; > + > + reg_ctrl = dsi_read(msm_host, > REG_DSI_COMMAND_COMPRESSION_MODE_CTRL); > + reg_ctrl2 = dsi_read(msm_host, > REG_DSI_COMMAND_COMPRESSION_MODE_CTRL2); > + > + slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->drm->slice_width); > + bytes_in_slice = DIV_ROUND_UP(dsc->drm->slice_width * > + dsc->drm->bits_per_pixel, 8); > + dsc->drm->slice_chunk_size = bytes_in_slice; > + total_bytes_per_intf = dsc->bytes_in_slice * slice_per_intf; > + dsc->pkt_per_line = slice_per_intf / dsc->drm->slice_count; > + > + reg = 0x39 << 8; > + reg |= ffs(dsc->pkt_per_line) << 6; > + > + dsc->eol_byte_num = total_bytes_per_intf % 3; > + reg |= dsc->eol_byte_num << 4; > + reg |= 1; > + > + reg_ctrl |= reg; > + reg_ctrl2 |= bytes_in_slice; > + > + dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL, reg); > + dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL2, > reg_ctrl2); > + } > + > /* image data and 1 byte write_memory_start cmd */ > - wc = hdisplay * dsi_get_bpp(msm_host->format) / 8 + 1; > + if (!msm_host->dsc) > + wc = hdisplay * dsi_get_bpp(msm_host->format) / 8 + 1; > + else > + wc = mode->hdisplay / 2 + 1; > > - dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_CTRL, > - DSI_CMD_MDP_STREAM0_CTRL_WORD_COUNT(wc) | > - DSI_CMD_MDP_STREAM0_CTRL_VIRTUAL_CHANNEL( > - msm_host->channel) | > - DSI_CMD_MDP_STREAM0_CTRL_DATA_TYPE( > - MIPI_DSI_DCS_LONG_WRITE)); > + data = DSI_CMD_MDP_STREAM0_CTRL_WORD_COUNT(wc) | > + DSI_CMD_MDP_STREAM0_CTRL_VIRTUAL_CHANNEL(msm_host->channel) | > + DSI_CMD_MDP_STREAM0_CTRL_DATA_TYPE(MIPI_DSI_DCS_LONG_WRITE); > > - dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_TOTAL, > - DSI_CMD_MDP_STREAM0_TOTAL_H_TOTAL(hdisplay) | > - DSI_CMD_MDP_STREAM0_TOTAL_V_TOTAL(mode->vdisplay)); > + dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_CTRL, data); > + > + data = DSI_CMD_MDP_STREAM0_TOTAL_H_TOTAL(hdisplay) | > + DSI_CMD_MDP_STREAM0_TOTAL_V_TOTAL(mode->vdisplay); > + dsi_write(msm_host, REG_DSI_CMD_MDP_STREAM0_TOTAL, data); > } > } > > @@ -2074,6 +2192,7 @@ int msm_dsi_host_modeset_init(struct > mipi_dsi_host *host, > struct msm_dsi_host *msm_host = to_msm_dsi_host(host); > const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd; > struct platform_device *pdev = msm_host->pdev; > + struct msm_drm_private *priv; > int ret; > > msm_host->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); > @@ -2093,6 +2212,9 @@ int msm_dsi_host_modeset_init(struct > mipi_dsi_host *host, > } > > msm_host->dev = dev; > + priv = dev->dev_private; > + priv->dsc = msm_host->dsc; > + > ret = cfg_hnd->ops->tx_buf_alloc(msm_host, SZ_4K); > if (ret) { > pr_err("%s: alloc tx gem obj failed, %d\n", __func__, ret);