Message ID | 20210311143958.562625-17-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | TCI fixes and cleanups | expand |
On 3/11/21 3:39 PM, Richard Henderson wrote: > Use the correct set of asserts during code generation. > We do not require the first input to overlap the output; > the existing interpreter already supported that. > > Split out tci_args_rrrbb in the translator. > Use the deposit32/64 functions rather than inline expansion. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/tci/tcg-target-con-set.h | 1 - > tcg/tci.c | 33 ++++++++++++++++----------------- > tcg/tci/tcg-target.c.inc | 24 ++++++++++++++---------- > 3 files changed, 30 insertions(+), 28 deletions(-) > > diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h > index f51b7bcb13..316730f32c 100644 > --- a/tcg/tci/tcg-target-con-set.h > +++ b/tcg/tci/tcg-target-con-set.h > @@ -13,7 +13,6 @@ C_O0_I2(r, r) > C_O0_I3(r, r, r) > C_O0_I4(r, r, r, r) > C_O1_I1(r, r) > -C_O1_I2(r, 0, r) > C_O1_I2(r, r, r) > C_O1_I4(r, r, r, r, r) > C_O2_I1(r, r, r) > diff --git a/tcg/tci.c b/tcg/tci.c > index 10f58e4f25..3ce2b72316 100644 > --- a/tcg/tci.c > +++ b/tcg/tci.c > @@ -168,6 +168,7 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) > * tci_args_<arguments> > * where arguments is a sequence of > * > + * b = immediate (bit position) > * i = immediate (uint32_t) > * I = immediate (tcg_target_ulong) > * r = register > @@ -236,6 +237,16 @@ static void tci_args_rrrc(const uint8_t **tb_ptr, > *c3 = tci_read_b(tb_ptr); > } > > +static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, > + TCGReg *r2, uint8_t *i3, uint8_t *i4) > +{ > + *r0 = tci_read_r(tb_ptr); > + *r1 = tci_read_r(tb_ptr); > + *r2 = tci_read_r(tb_ptr); > + *i3 = tci_read_b(tb_ptr); > + *i4 = tci_read_b(tb_ptr); > +} > + > #if TCG_TARGET_REG_BITS == 32 > static void tci_args_rrrr(const uint8_t **tb_ptr, > TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) > @@ -432,11 +443,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, > TCGReg r0, r1, r2; > tcg_target_ulong t0; > tcg_target_ulong t1; > - tcg_target_ulong t2; > TCGCond condition; > target_ulong taddr; > - uint8_t tmp8; > - uint16_t tmp16; > + uint8_t pos, len; > uint32_t tmp32; > uint64_t tmp64; > #if TCG_TARGET_REG_BITS == 32 > @@ -627,13 +636,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, > #endif > #if TCG_TARGET_HAS_deposit_i32 > case INDEX_op_deposit_i32: > - t0 = *tb_ptr++; > - t1 = tci_read_rval(regs, &tb_ptr); > - t2 = tci_read_rval(regs, &tb_ptr); > - tmp16 = *tb_ptr++; > - tmp8 = *tb_ptr++; > - tmp32 = (((1 << tmp8) - 1) << tmp16); > - tci_write_reg(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32)); > + tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); > + regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); > break; > #endif > case INDEX_op_brcond_i32: > @@ -789,13 +793,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, > #endif > #if TCG_TARGET_HAS_deposit_i64 > case INDEX_op_deposit_i64: > - t0 = *tb_ptr++; > - t1 = tci_read_rval(regs, &tb_ptr); > - t2 = tci_read_rval(regs, &tb_ptr); > - tmp16 = *tb_ptr++; > - tmp8 = *tb_ptr++; > - tmp64 = (((1ULL << tmp8) - 1) << tmp16); > - tci_write_reg(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64)); > + tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); > + regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); > break; > #endif > case INDEX_op_brcond_i64: > diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc > index 2c64b4f617..640407b4a8 100644 > --- a/tcg/tci/tcg-target.c.inc > +++ b/tcg/tci/tcg-target.c.inc > @@ -126,11 +126,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) > case INDEX_op_rotr_i64: > case INDEX_op_setcond_i32: > case INDEX_op_setcond_i64: > - return C_O1_I2(r, r, r); > - > case INDEX_op_deposit_i32: > case INDEX_op_deposit_i64: > - return C_O1_I2(r, 0, r); > + return C_O1_I2(r, r, r); > > case INDEX_op_brcond_i32: > case INDEX_op_brcond_i64: > @@ -480,13 +478,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, > break; > > CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */ > - tcg_out_r(s, args[0]); > - tcg_out_r(s, args[1]); > - tcg_out_r(s, args[2]); > - tcg_debug_assert(args[3] <= UINT8_MAX); > - tcg_out8(s, args[3]); > - tcg_debug_assert(args[4] <= UINT8_MAX); > - tcg_out8(s, args[4]); > + { > + TCGArg pos = args[3], len = args[4]; > + TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64; > + > + tcg_debug_assert(pos < max); > + tcg_debug_assert(pos + len <= max); > + > + tcg_out_r(s, args[0]); > + tcg_out_r(s, args[1]); > + tcg_out_r(s, args[2]); > + tcg_out8(s, pos); > + tcg_out8(s, len); > + } > break; > > CASE_32_64(brcond) > Another KISS :) Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
diff --git a/tcg/tci/tcg-target-con-set.h b/tcg/tci/tcg-target-con-set.h index f51b7bcb13..316730f32c 100644 --- a/tcg/tci/tcg-target-con-set.h +++ b/tcg/tci/tcg-target-con-set.h @@ -13,7 +13,6 @@ C_O0_I2(r, r) C_O0_I3(r, r, r) C_O0_I4(r, r, r, r) C_O1_I1(r, r) -C_O1_I2(r, 0, r) C_O1_I2(r, r, r) C_O1_I4(r, r, r, r, r) C_O2_I1(r, r, r) diff --git a/tcg/tci.c b/tcg/tci.c index 10f58e4f25..3ce2b72316 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -168,6 +168,7 @@ static tcg_target_ulong tci_read_label(const uint8_t **tb_ptr) * tci_args_<arguments> * where arguments is a sequence of * + * b = immediate (bit position) * i = immediate (uint32_t) * I = immediate (tcg_target_ulong) * r = register @@ -236,6 +237,16 @@ static void tci_args_rrrc(const uint8_t **tb_ptr, *c3 = tci_read_b(tb_ptr); } +static void tci_args_rrrbb(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, + TCGReg *r2, uint8_t *i3, uint8_t *i4) +{ + *r0 = tci_read_r(tb_ptr); + *r1 = tci_read_r(tb_ptr); + *r2 = tci_read_r(tb_ptr); + *i3 = tci_read_b(tb_ptr); + *i4 = tci_read_b(tb_ptr); +} + #if TCG_TARGET_REG_BITS == 32 static void tci_args_rrrr(const uint8_t **tb_ptr, TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3) @@ -432,11 +443,9 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, TCGReg r0, r1, r2; tcg_target_ulong t0; tcg_target_ulong t1; - tcg_target_ulong t2; TCGCond condition; target_ulong taddr; - uint8_t tmp8; - uint16_t tmp16; + uint8_t pos, len; uint32_t tmp32; uint64_t tmp64; #if TCG_TARGET_REG_BITS == 32 @@ -627,13 +636,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, #endif #if TCG_TARGET_HAS_deposit_i32 case INDEX_op_deposit_i32: - t0 = *tb_ptr++; - t1 = tci_read_rval(regs, &tb_ptr); - t2 = tci_read_rval(regs, &tb_ptr); - tmp16 = *tb_ptr++; - tmp8 = *tb_ptr++; - tmp32 = (((1 << tmp8) - 1) << tmp16); - tci_write_reg(regs, t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32)); + tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); + regs[r0] = deposit32(regs[r1], pos, len, regs[r2]); break; #endif case INDEX_op_brcond_i32: @@ -789,13 +793,8 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env, #endif #if TCG_TARGET_HAS_deposit_i64 case INDEX_op_deposit_i64: - t0 = *tb_ptr++; - t1 = tci_read_rval(regs, &tb_ptr); - t2 = tci_read_rval(regs, &tb_ptr); - tmp16 = *tb_ptr++; - tmp8 = *tb_ptr++; - tmp64 = (((1ULL << tmp8) - 1) << tmp16); - tci_write_reg(regs, t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64)); + tci_args_rrrbb(&tb_ptr, &r0, &r1, &r2, &pos, &len); + regs[r0] = deposit64(regs[r1], pos, len, regs[r2]); break; #endif case INDEX_op_brcond_i64: diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2c64b4f617..640407b4a8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -126,11 +126,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotr_i64: case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: - return C_O1_I2(r, r, r); - case INDEX_op_deposit_i32: case INDEX_op_deposit_i64: - return C_O1_I2(r, 0, r); + return C_O1_I2(r, r, r); case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: @@ -480,13 +478,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, break; CASE_32_64(deposit) /* Optional (TCG_TARGET_HAS_deposit_*). */ - tcg_out_r(s, args[0]); - tcg_out_r(s, args[1]); - tcg_out_r(s, args[2]); - tcg_debug_assert(args[3] <= UINT8_MAX); - tcg_out8(s, args[3]); - tcg_debug_assert(args[4] <= UINT8_MAX); - tcg_out8(s, args[4]); + { + TCGArg pos = args[3], len = args[4]; + TCGArg max = opc == INDEX_op_deposit_i32 ? 32 : 64; + + tcg_debug_assert(pos < max); + tcg_debug_assert(pos + len <= max); + + tcg_out_r(s, args[0]); + tcg_out_r(s, args[1]); + tcg_out_r(s, args[2]); + tcg_out8(s, pos); + tcg_out8(s, len); + } break; CASE_32_64(brcond)
Use the correct set of asserts during code generation. We do not require the first input to overlap the output; the existing interpreter already supported that. Split out tci_args_rrrbb in the translator. Use the deposit32/64 functions rather than inline expansion. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/tci/tcg-target-con-set.h | 1 - tcg/tci.c | 33 ++++++++++++++++----------------- tcg/tci/tcg-target.c.inc | 24 ++++++++++++++---------- 3 files changed, 30 insertions(+), 28 deletions(-) -- 2.25.1