Message ID | 20180119045438.28582-5-richard.henderson@linaro.org |
---|---|
State | Accepted |
Commit | e7c06c4e4c98c47899417f154df1f2ef4e8d09a0 |
Headers | show |
Series | target/arm: Prepatory work for SVE | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Rather than passing a regno to the helper, pass pointers to the > vector register directly. This eliminates the need to pass in > the environment pointer and reduces the number of places that > directly access env->vfp.regs[]. > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/helper.h | 2 +- > target/arm/op_helper.c | 17 +++++++---------- > target/arm/translate.c | 8 ++++---- > 3 files changed, 12 insertions(+), 15 deletions(-) > > diff --git a/target/arm/helper.h b/target/arm/helper.h > index dbdc38fcb7..5dec2e6262 100644 > --- a/target/arm/helper.h > +++ b/target/arm/helper.h > @@ -188,7 +188,7 @@ DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) > DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) > DEF_HELPER_2(recpe_u32, i32, i32, ptr) > DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) > -DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) > +DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32) > > DEF_HELPER_3(shl_cc, i32, env, i32, i32) > DEF_HELPER_3(shr_cc, i32, env, i32, i32) > diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c > index 712c5c55b6..a937e76710 100644 > --- a/target/arm/op_helper.c > +++ b/target/arm/op_helper.c > @@ -54,20 +54,17 @@ static int exception_target_el(CPUARMState *env) > return target_el; > } > > -uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, > - uint32_t rn, uint32_t maxindex) > +uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, void *vn, > + uint32_t maxindex) > { > - uint32_t val; > - uint32_t tmp; > - int index; > - int shift; > - uint64_t *table; > - table = (uint64_t *)&env->vfp.regs[rn]; > + uint32_t val, shift; Any particular reason to promote shift to a uint32_t here? > + uint64_t *table = vn; > + > val = 0; > for (shift = 0; shift < 32; shift += 8) { > - index = (ireg >> shift) & 0xff; > + uint32_t index = (ireg >> shift) & 0xff; > if (index < maxindex) { > - tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; > + uint32_t tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; > val |= tmp << shift; > } else { > val |= def & (0xff << shift); > diff --git a/target/arm/translate.c b/target/arm/translate.c > index 6f02c56abb..852d2a75b1 100644 > --- a/target/arm/translate.c > +++ b/target/arm/translate.c > @@ -7544,9 +7544,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) > tcg_gen_movi_i32(tmp, 0); > } > tmp2 = neon_load_reg(rm, 0); > - tmp4 = tcg_const_i32(rn); > + ptr1 = vfp_reg_ptr(true, rn); > tmp5 = tcg_const_i32(n); > - gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5); > + gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5); > tcg_temp_free_i32(tmp); > if (insn & (1 << 6)) { > tmp = neon_load_reg(rd, 1); > @@ -7555,9 +7555,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) > tcg_gen_movi_i32(tmp, 0); > } > tmp3 = neon_load_reg(rm, 1); > - gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5); > + gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5); > tcg_temp_free_i32(tmp5); > - tcg_temp_free_i32(tmp4); > + tcg_temp_free_ptr(ptr1); > neon_store_reg(rd, 0, tmp2); > neon_store_reg(rd, 1, tmp3); > tcg_temp_free_i32(tmp); I was wondering why tmp4 wasn't dropped from the declarations until I was reminded how massively overloaded this function is. Anyway baring the minor nit: Reviewed-by: Alex Bennée <alex.bennee@linaro.org> -- Alex Bennée
diff --git a/target/arm/helper.h b/target/arm/helper.h index dbdc38fcb7..5dec2e6262 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -188,7 +188,7 @@ DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_2(recpe_u32, i32, i32, ptr) DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) -DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) +DEF_HELPER_FLAGS_4(neon_tbl, TCG_CALL_NO_RWG, i32, i32, i32, ptr, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) DEF_HELPER_3(shr_cc, i32, env, i32, i32) diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index 712c5c55b6..a937e76710 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -54,20 +54,17 @@ static int exception_target_el(CPUARMState *env) return target_el; } -uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, - uint32_t rn, uint32_t maxindex) +uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def, void *vn, + uint32_t maxindex) { - uint32_t val; - uint32_t tmp; - int index; - int shift; - uint64_t *table; - table = (uint64_t *)&env->vfp.regs[rn]; + uint32_t val, shift; + uint64_t *table = vn; + val = 0; for (shift = 0; shift < 32; shift += 8) { - index = (ireg >> shift) & 0xff; + uint32_t index = (ireg >> shift) & 0xff; if (index < maxindex) { - tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; + uint32_t tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff; val |= tmp << shift; } else { val |= def & (0xff << shift); diff --git a/target/arm/translate.c b/target/arm/translate.c index 6f02c56abb..852d2a75b1 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -7544,9 +7544,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_movi_i32(tmp, 0); } tmp2 = neon_load_reg(rm, 0); - tmp4 = tcg_const_i32(rn); + ptr1 = vfp_reg_ptr(true, rn); tmp5 = tcg_const_i32(n); - gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5); + gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5); tcg_temp_free_i32(tmp); if (insn & (1 << 6)) { tmp = neon_load_reg(rd, 1); @@ -7555,9 +7555,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_movi_i32(tmp, 0); } tmp3 = neon_load_reg(rm, 1); - gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5); + gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5); tcg_temp_free_i32(tmp5); - tcg_temp_free_i32(tmp4); + tcg_temp_free_ptr(ptr1); neon_store_reg(rd, 0, tmp2); neon_store_reg(rd, 1, tmp3); tcg_temp_free_i32(tmp);