Message ID | 20180425012300.14698-9-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm: Fixups for ARM_FEATURE_V8_FP16 | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > We missed all of the scalar fp16 binary operations. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/translate-a64.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 73 insertions(+) > > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 794ede7222..11b90b7eb0 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -532,6 +532,14 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) > return v; > } > > +static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) > +{ > + TCGv_i32 v = tcg_temp_new_i32(); > + > + tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); > + return v; > +} > + > /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). > * If SVE is not enabled, then there are only 128 bits in the vector. > */ > @@ -4968,6 +4976,61 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, > tcg_temp_free_i64(tcg_res); > } > > +/* Floating-point data-processing (2 source) - half precision */ > +static void handle_fp_2src_half(DisasContext *s, int opcode, > + int rd, int rn, int rm) > +{ > + TCGv_i32 tcg_op1; > + TCGv_i32 tcg_op2; > + TCGv_i32 tcg_res; > + TCGv_ptr fpst; > + > + tcg_res = tcg_temp_new_i32(); > + fpst = get_fpstatus_ptr(true); > + tcg_op1 = read_fp_hreg(s, rn); > + tcg_op2 = read_fp_hreg(s, rm); > + > + switch (opcode) { > + case 0x0: /* FMUL */ > + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x1: /* FDIV */ > + gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x2: /* FADD */ > + gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x3: /* FSUB */ > + gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x4: /* FMAX */ > + gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x5: /* FMIN */ > + gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x6: /* FMAXNM */ > + gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x7: /* FMINNM */ > + gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); > + break; > + case 0x8: /* FNMUL */ > + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); > + tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); > + break; > + default: > + g_assert_not_reached(); > + } > + > + write_fp_sreg(s, rd, tcg_res); If we are going to the trouble of adding a read_fp_hreg() we might as well do the same for the write case. Then we can convert the various: read_vec_element_i32(s, tcg_vm, rm, 0, MO_16); that we used before. > + > + tcg_temp_free_ptr(fpst); > + tcg_temp_free_i32(tcg_op1); > + tcg_temp_free_i32(tcg_op2); > + tcg_temp_free_i32(tcg_res); > +} > + > /* Floating point data-processing (2 source) > * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 > * +---+---+---+-----------+------+---+------+--------+-----+------+------+ > @@ -5000,6 +5063,16 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) > } > handle_fp_2src_double(s, opcode, rd, rn, rm); > break; > + case 3: > + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { > + unallocated_encoding(s); > + return; > + } > + if (!fp_access_check(s)) { > + return; > + } > + handle_fp_2src_half(s, opcode, rd, rn, rm); > + break; > default: > unallocated_encoding(s); > } Otherwise: Reviewed-by: Alex Bennée <alex.bennee@linaro.org> -- Alex Bennée
On 05/01/2018 04:13 AM, Alex Bennée wrote: > If we are going to the trouble of adding a read_fp_hreg() we might as > well do the same for the write case. Then we can convert the various: > > read_vec_element_i32(s, tcg_vm, rm, 0, MO_16); > > that we used before. Ok, I've split these new functions into two new patches. r~
On 05/01/2018 04:13 AM, Alex Bennée wrote: > If we are going to the trouble of adding a read_fp_hreg() we might as > well do the same for the write case. The write case is much harder. Almost all of the time we have a helper function that has already zero-extended the result from 16 to 32-bits, and therefore write_fp_sreg is perfectly fine. In more than a few cases, this writeback is shared by both single and half precision paths and so adjusting the writeback is not appropriate. There are in fact zero instances at this point in the tree that do require extra zero-extension. So I'm going to leave this alone. r~
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 794ede7222..11b90b7eb0 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -532,6 +532,14 @@ static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) return v; } +static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) +{ + TCGv_i32 v = tcg_temp_new_i32(); + + tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); + return v; +} + /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). * If SVE is not enabled, then there are only 128 bits in the vector. */ @@ -4968,6 +4976,61 @@ static void handle_fp_2src_double(DisasContext *s, int opcode, tcg_temp_free_i64(tcg_res); } +/* Floating-point data-processing (2 source) - half precision */ +static void handle_fp_2src_half(DisasContext *s, int opcode, + int rd, int rn, int rm) +{ + TCGv_i32 tcg_op1; + TCGv_i32 tcg_op2; + TCGv_i32 tcg_res; + TCGv_ptr fpst; + + tcg_res = tcg_temp_new_i32(); + fpst = get_fpstatus_ptr(true); + tcg_op1 = read_fp_hreg(s, rn); + tcg_op2 = read_fp_hreg(s, rm); + + switch (opcode) { + case 0x0: /* FMUL */ + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x1: /* FDIV */ + gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x2: /* FADD */ + gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x3: /* FSUB */ + gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x4: /* FMAX */ + gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x5: /* FMIN */ + gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x6: /* FMAXNM */ + gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x7: /* FMINNM */ + gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); + break; + case 0x8: /* FNMUL */ + gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); + tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); + break; + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_res); + + tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i32(tcg_res); +} + /* Floating point data-processing (2 source) * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ @@ -5000,6 +5063,16 @@ static void disas_fp_2src(DisasContext *s, uint32_t insn) } handle_fp_2src_double(s, opcode, rd, rn, rm); break; + case 3: + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + if (!fp_access_check(s)) { + return; + } + handle_fp_2src_half(s, opcode, rd, rn, rm); + break; default: unallocated_encoding(s); }
We missed all of the scalar fp16 binary operations. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate-a64.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) -- 2.14.3