Message ID | 20180703151732.29843-8-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/ppc fp cleanups | expand |
> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote: > > Memory operations have no side effects on fp state. > The use of a "real" conversions between float64 and float32 > would raise exceptions for SNaN and out-of-range inputs. Would you have any documentation that tells us about converting between 64 bit and 32 bit floating points? > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/ppc/helper.h | 4 +- > target/ppc/fpu_helper.c | 63 ++++++++++++++++++++++++------ > target/ppc/translate/fp-impl.inc.c | 26 +++++------- > 3 files changed, 62 insertions(+), 31 deletions(-) > > diff --git a/target/ppc/helper.h b/target/ppc/helper.h > index cc3d031407..33e6e1df60 100644 > --- a/target/ppc/helper.h > +++ b/target/ppc/helper.h > @@ -61,8 +61,8 @@ DEF_HELPER_2(compute_fprf_float64, void, env, i64) > DEF_HELPER_3(store_fpscr, void, env, i64, i32) > DEF_HELPER_2(fpscr_clrbit, void, env, i32) > DEF_HELPER_2(fpscr_setbit, void, env, i32) > -DEF_HELPER_2(float64_to_float32, i32, env, i64) > -DEF_HELPER_2(float32_to_float64, i64, env, i32) > +DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32) > +DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64) > > DEF_HELPER_4(fcmpo, void, env, i64, i64, i32) > DEF_HELPER_4(fcmpu, void, env, i64, i64, i32) > diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c > index 1e195487d3..d4e9e3bccb 100644 > --- a/target/ppc/fpu_helper.c > +++ b/target/ppc/fpu_helper.c > @@ -47,24 +47,61 @@ static inline bool fp_exceptions_enabled(CPUPPCState *env) > > /*****************************************************************************/ > /* Floating point operations helpers */ > -uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg) > -{ > - CPU_FloatU f; > - CPU_DoubleU d; > > - f.l = arg; > - d.d = float32_to_float64(f.f, &env->fp_status); > - return d.ll; > +/* > + * This is the non-arithmatic conversion that happens e.g. on loads. > + * In the Power ISA pseudocode, this is called DOUBLE. > + */ > +uint64_t helper_todouble(uint32_t arg) > +{ > + uint32_t abs_arg = arg & 0x7fffffff; > + uint64_t ret; > + > + if (likely(abs_arg >= 0x00800000)) { > + /* Normalized operand, or Inf, or NaN. */ > + ret = (uint64_t)extract32(arg, 30, 2) << 62; > + ret |= ((extract32(arg, 30, 1) ^ 1) * (uint64_t)7) << 59; > + ret |= (uint64_t)extract32(arg, 0, 29) << 29; > + } else { > + /* Zero or Denormalized operand. */ > + ret = (uint64_t)extract32(arg, 31, 1) << 63; > + if (unlikely(abs_arg != 0)) { > + /* Denormalized operand. */ > + int shift = clz32(abs_arg) - 9; > + int exp = -126 - shift + 1023; > + ret |= (uint64_t)exp << 52; > + ret |= abs_arg << (shift + 29); > + } > + } > + return ret; > } > > -uint32_t helper_float64_to_float32(CPUPPCState *env, uint64_t arg) > +/* > + * This is the non-arithmatic conversion that happens e.g. on stores. > + * In the Power ISA pseudocode, this is called SINGLE. > + */ > +uint32_t helper_tosingle(uint64_t arg) > { > - CPU_FloatU f; > - CPU_DoubleU d; > + int exp = extract64(arg, 52, 11); > + uint32_t ret; > > - d.ll = arg; > - f.f = float64_to_float32(d.d, &env->fp_status); > - return f.l; > + if (likely(exp > 896)) { > + /* No denormalization required (includes Inf, NaN). */ > + ret = extract64(arg, 62, 2) << 30; > + ret |= extract64(arg, 29, 29); > + } else { > + /* Zero or Denormal result. If the exponent is in bounds for > + * a single-precision denormal result, extract the proper bits. > + * If the input is not zero, and the exponent is out of bounds, > + * then the result is undefined; this underflows to zero. > + */ > + ret = extract64(arg, 63, 1) << 63; > + if (unlikely(exp >= 874)) { > + /* Denormal result. */ > + ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp); > + } > + } > + return ret; > } > > static inline int ppc_float32_get_unbiased_exp(float32 f) > diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c > index 2fbd4d4f38..a6f522b85c 100644 > --- a/target/ppc/translate/fp-impl.inc.c > +++ b/target/ppc/translate/fp-impl.inc.c > @@ -660,15 +660,12 @@ GEN_LDUF(name, ldop, op | 0x21, type); \ > GEN_LDUXF(name, ldop, op | 0x01, type); \ > GEN_LDXF(name, ldop, 0x17, op | 0x00, type) > > -static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) > +static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) > { > - TCGv t0 = tcg_temp_new(); > - TCGv_i32 t1 = tcg_temp_new_i32(); > - gen_qemu_ld32u(ctx, t0, arg2); > - tcg_gen_trunc_tl_i32(t1, t0); > - tcg_temp_free(t0); > - gen_helper_float32_to_float64(arg1, cpu_env, t1); > - tcg_temp_free_i32(t1); > + TCGv_i32 tmp = tcg_temp_new_i32(); > + tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); > + gen_helper_todouble(dest, tmp); > + tcg_temp_free_i32(tmp); > } > > /* lfd lfdu lfdux lfdx */ > @@ -836,15 +833,12 @@ GEN_STUF(name, stop, op | 0x21, type); \ > GEN_STUXF(name, stop, op | 0x01, type); \ > GEN_STXF(name, stop, 0x17, op | 0x00, type) > > -static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) > +static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr) > { > - TCGv_i32 t0 = tcg_temp_new_i32(); > - TCGv t1 = tcg_temp_new(); > - gen_helper_float64_to_float32(t0, cpu_env, arg1); > - tcg_gen_extu_i32_tl(t1, t0); > - tcg_temp_free_i32(t0); > - gen_qemu_st32(ctx, t1, arg2); > - tcg_temp_free(t1); > + TCGv_i32 tmp = tcg_temp_new_i32(); > + gen_helper_tosingle(tmp, src); > + tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); > + tcg_temp_free_i32(tmp); > } > > /* stfd stfdu stfdux stfdx */ > -- > 2.17.1 >
On 07/05/2018 09:31 AM, Programmingkid wrote: >> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote: >> >> Memory operations have no side effects on fp state. >> The use of a "real" conversions between float64 and float32 >> would raise exceptions for SNaN and out-of-range inputs. > > Would you have any documentation that tells us about converting > between 64 bit and 32 bit floating points? Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of Book 1 of the Power ISA manual (version 3.0B) [0]. I've double-checked vs RISU[1] testing of LFS and STFS, with master traces generated on Power 8 ppc64le, so I don't see anything immediately wrong with the patch. But I haven't had time to look further than that. r~ [0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 [1] https://git.linaro.org/people/peter.maydell/risu.git
> On Jul 5, 2018, at 12:48 PM, Richard Henderson <richard.henderson@linaro.org> wrote: > > On 07/05/2018 09:31 AM, Programmingkid wrote: >>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote: >>> >>> Memory operations have no side effects on fp state. >>> The use of a "real" conversions between float64 and float32 >>> would raise exceptions for SNaN and out-of-range inputs. >> >> Would you have any documentation that tells us about converting >> between 64 bit and 32 bit floating points? > > Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of > Book 1 of the Power ISA manual (version 3.0B) [0]. > > I've double-checked vs RISU[1] testing of LFS and STFS, with master traces > generated on Power 8 ppc64le, so I don't see anything immediately wrong with > the patch. But I haven't had time to look further than that. > > > r~ > > > [0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 > [1] https://git.linaro.org/people/peter.maydell/risu.git Thank you for the documentation. My guess is there are differences between the PowerPC and Power 8 implementations. PowerPC is big endian. Would you be able to do your testing again with your Power 8 CPU in big endian mode?
On Thu, Jul 05, 2018 at 12:51:00PM -0400, Programmingkid wrote: > > > On Jul 5, 2018, at 12:48 PM, Richard Henderson <richard.henderson@linaro.org> wrote: > > > > On 07/05/2018 09:31 AM, Programmingkid wrote: > >>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote: > >>> > >>> Memory operations have no side effects on fp state. > >>> The use of a "real" conversions between float64 and float32 > >>> would raise exceptions for SNaN and out-of-range inputs. > >> > >> Would you have any documentation that tells us about converting > >> between 64 bit and 32 bit floating points? > > > > Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of > > Book 1 of the Power ISA manual (version 3.0B) [0]. > > > > I've double-checked vs RISU[1] testing of LFS and STFS, with master traces > > generated on Power 8 ppc64le, so I don't see anything immediately wrong with > > the patch. But I haven't had time to look further than that. > > > > > > r~ > > > > > > [0] https://openpowerfoundation.org/?resource_lib=power-isa-version-3-0 > > [1] https://git.linaro.org/people/peter.maydell/risu.git > > Thank you for the documentation. My guess is there are differences > between the PowerPC and Power 8 implementations. That seems very, very unlikely to me. > PowerPC is big > endian. Would you be able to do your testing again with your Power 8 > CPU in big endian mode? > > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
On 05/07/18 17:48, Richard Henderson wrote: > On 07/05/2018 09:31 AM, Programmingkid wrote: >>> On Jul 3, 2018, at 11:17 AM, Richard Henderson <richard.henderson@linaro.org> wrote: >>> >>> Memory operations have no side effects on fp state. >>> The use of a "real" conversions between float64 and float32 >>> would raise exceptions for SNaN and out-of-range inputs. >> >> Would you have any documentation that tells us about converting >> between 64 bit and 32 bit floating points? > > Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of > Book 1 of the Power ISA manual (version 3.0B) [0]. > > I've double-checked vs RISU[1] testing of LFS and STFS, with master traces > generated on Power 8 ppc64le, so I don't see anything immediately wrong with > the patch. But I haven't had time to look further than that. I've had a quick look at this with the attached patch to compare the helper results before your patch and after, writing any differences to the console. With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded the output below: $ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99 helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: 3bf0000000000000 helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: 3bf0000000000000 (note: MacOS 9 will hang here unless the line marked "Uncommenting this allows MacOS to run" in my patch is enabled) helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: 3be0000000000000 helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: 3be0000000000000 helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: 3bf0000000000000 helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: 3bf0000000000000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_todouble diff for arg: be61b08a d.ll: bfcc361140000000 ret: bbcc361140000000 helper_todouble diff for arg: 3fdf81a5 d.ll: 3ffbf034a0000000 ret: 3bfbf034a0000000 helper_todouble diff for arg: bf402647 d.ll: bfe804c8e0000000 ret: bbe804c8e0000000 helper_todouble diff for arg: 3e61b08a d.ll: 3fcc361140000000 ret: 3bcc361140000000 helper_tosingle diff for arg: bfcc361140000000 f.l: be61b08a ret: 9e61b08a helper_todouble diff for arg: 3f0ccccd d.ll: 3fe19999a0000000 ret: 3be19999a0000000 helper_tosingle diff for arg: 3ffbf034a0000000 f.l: 3fdf81a5 ret: 1fdf81a5 helper_tosingle diff for arg: bfe804c8e0000000 f.l: bf402647 ret: 9f402647 helper_tosingle diff for arg: 3fcc361140000000 f.l: 3e61b08a ret: 1e61b08a helper_tosingle diff for arg: 3fe19999a0000000 f.l: 3f0ccccd ret: 1f0ccccd helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: 3b70000000000000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: 3b70000000000000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 It looks like the differences are related to a flag or flags in the MSB byte of ret. ATB, Mark. diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index cb82e6e842..aedf5d1e3b 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -52,10 +52,15 @@ static inline bool fp_exceptions_enabled(CPUPPCState *env) * This is the non-arithmatic conversion that happens e.g. on loads. * In the Power ISA pseudocode, this is called DOUBLE. */ -uint64_t helper_todouble(uint32_t arg) +uint64_t helper_todouble(CPUPPCState *env, uint32_t arg) { uint32_t abs_arg = arg & 0x7fffffff; uint64_t ret; + CPU_FloatU f; + CPU_DoubleU d; + + f.l = arg; + d.d = float32_to_float64(f.f, &env->fp_status); if (likely(abs_arg >= 0x00800000)) { /* Normalized operand, or Inf, or NaN. */ @@ -73,6 +78,13 @@ uint64_t helper_todouble(uint32_t arg) ret |= abs_arg << (shift + 29); } } + + if (d.ll != ret) { + printf("helper_todouble diff for arg: %x d.ll: %lx ret: %lx\n", arg, d.ll, ret); + // Uncommenting this allows MacOS to run + //ret = d.ll; + } + return ret; } @@ -80,10 +92,15 @@ uint64_t helper_todouble(uint32_t arg) * This is the non-arithmatic conversion that happens e.g. on stores. * In the Power ISA pseudocode, this is called SINGLE. */ -uint32_t helper_tosingle(uint64_t arg) +uint32_t helper_tosingle(CPUPPCState *env, uint64_t arg) { int exp = extract64(arg, 52, 11); uint32_t ret; + CPU_FloatU f; + CPU_DoubleU d; + + d.ll = arg; + f.f = float64_to_float32(d.d, &env->fp_status); if (likely(exp > 896)) { /* No denormalization required (includes Inf, NaN). */ @@ -101,6 +118,11 @@ uint32_t helper_tosingle(uint64_t arg) ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp); } } + + if (f.l != ret) { + printf("helper_tosingle diff for arg: %lx f.l: %x ret: %x\n", arg, f.l, ret); + } + return ret; } diff --git a/target/ppc/helper.h b/target/ppc/helper.h index ef64248bc4..94b5f4fd8c 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -61,8 +61,8 @@ DEF_HELPER_2(compute_fprf_float64, void, env, i64) DEF_HELPER_3(store_fpscr, void, env, i64, i32) DEF_HELPER_2(fpscr_clrbit, void, env, i32) DEF_HELPER_2(fpscr_setbit, void, env, i32) -DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32) -DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64) +DEF_HELPER_FLAGS_2(todouble, TCG_CALL_NO_RWG_SE, i64, env, i32) +DEF_HELPER_FLAGS_2(tosingle, TCG_CALL_NO_RWG_SE, i32, env, i64) DEF_HELPER_4(fcmpo, void, env, i64, i64, i32) DEF_HELPER_4(fcmpu, void, env, i64, i64, i32) diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c index a6f522b85c..16ecca3048 100644 --- a/target/ppc/translate/fp-impl.inc.c +++ b/target/ppc/translate/fp-impl.inc.c @@ -664,7 +664,7 @@ static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) { TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); - gen_helper_todouble(dest, tmp); + gen_helper_todouble(dest, cpu_env, tmp); tcg_temp_free_i32(tmp); } @@ -836,7 +836,7 @@ GEN_STXF(name, stop, 0x17, op | 0x00, type) static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr) { TCGv_i32 tmp = tcg_temp_new_i32(); - gen_helper_tosingle(tmp, src); + gen_helper_tosingle(tmp, cpu_env, src); tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); tcg_temp_free_i32(tmp); }
On 06/07/18 10:03, Mark Cave-Ayland wrote: > On 05/07/18 17:48, Richard Henderson wrote: > >> On 07/05/2018 09:31 AM, Programmingkid wrote: >>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson >>>> <richard.henderson@linaro.org> wrote: >>>> >>>> Memory operations have no side effects on fp state. >>>> The use of a "real" conversions between float64 and float32 >>>> would raise exceptions for SNaN and out-of-range inputs. >>> >>> Would you have any documentation that tells us about converting >>> between 64 bit and 32 bit floating points? >> >> Spelled out right at the beginning of sections 4.6 (load) and 4.7 >> (store) of >> Book 1 of the Power ISA manual (version 3.0B) [0]. >> >> I've double-checked vs RISU[1] testing of LFS and STFS, with master >> traces >> generated on Power 8 ppc64le, so I don't see anything immediately >> wrong with >> the patch. But I haven't had time to look further than that. > > I've had a quick look at this with the attached patch to compare the > helper results before your patch and after, writing any differences to > the console. > > With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded > the output below: > > > $ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99 > > helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: > 3bf0000000000000 > helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: > 3bf0000000000000 > > (note: MacOS 9 will hang here unless the line marked "Uncommenting this > allows MacOS to run" in my patch is enabled) > > helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: > 3be0000000000000 > helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: > 3be0000000000000 > helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: > 3bf0000000000000 > helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: > 3bf0000000000000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_todouble diff for arg: be61b08a d.ll: bfcc361140000000 ret: > bbcc361140000000 > helper_todouble diff for arg: 3fdf81a5 d.ll: 3ffbf034a0000000 ret: > 3bfbf034a0000000 > helper_todouble diff for arg: bf402647 d.ll: bfe804c8e0000000 ret: > bbe804c8e0000000 > helper_todouble diff for arg: 3e61b08a d.ll: 3fcc361140000000 ret: > 3bcc361140000000 > helper_tosingle diff for arg: bfcc361140000000 f.l: be61b08a ret: > 9e61b08a > helper_todouble diff for arg: 3f0ccccd d.ll: 3fe19999a0000000 ret: > 3be19999a0000000 > helper_tosingle diff for arg: 3ffbf034a0000000 f.l: 3fdf81a5 ret: > 1fdf81a5 > helper_tosingle diff for arg: bfe804c8e0000000 f.l: bf402647 ret: > 9f402647 > helper_tosingle diff for arg: 3fcc361140000000 f.l: 3e61b08a ret: > 1e61b08a > helper_tosingle diff for arg: 3fe19999a0000000 f.l: 3f0ccccd ret: > 1f0ccccd > helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: > 3b70000000000000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: > 3b70000000000000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: > 1f800000 > > > It looks like the differences are related to a flag or flags in the MSB > byte of ret. Hi Richard, Have you had a chance to look at this yet? I've been working on top of David's ppc-for-3.1 branch over the weekend and ran into this again during my testing :/ ATB, Mark.
On 08/05/2018 04:41 AM, Mark Cave-Ayland wrote: > On 06/07/18 10:03, Mark Cave-Ayland wrote: > >> On 05/07/18 17:48, Richard Henderson wrote: >> >>> On 07/05/2018 09:31 AM, Programmingkid wrote: >>>>> On Jul 3, 2018, at 11:17 AM, Richard Henderson >>>>> <richard.henderson@linaro.org> wrote: >>>>> >>>>> Memory operations have no side effects on fp state. >>>>> The use of a "real" conversions between float64 and float32 >>>>> would raise exceptions for SNaN and out-of-range inputs. >>>> >>>> Would you have any documentation that tells us about converting >>>> between 64 bit and 32 bit floating points? >>> >>> Spelled out right at the beginning of sections 4.6 (load) and 4.7 (store) of >>> Book 1 of the Power ISA manual (version 3.0B) [0]. >>> >>> I've double-checked vs RISU[1] testing of LFS and STFS, with master traces >>> generated on Power 8 ppc64le, so I don't see anything immediately wrong with >>> the patch. But I haven't had time to look further than that. >> >> I've had a quick look at this with the attached patch to compare the helper >> results before your patch and after, writing any differences to the console. >> >> With this patch applied to ppc-for-3.1 I've booted MacOS 9 and recorded the >> output below: >> >> >> $ ./qemu-system-ppc -cdrom MacOS921-macsbug.iso -boot d -M mac99 >> >> helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: >> 3bf0000000000000 >> helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: >> 3bf0000000000000 >> >> (note: MacOS 9 will hang here unless the line marked "Uncommenting this >> allows MacOS to run" in my patch is enabled) >> >> helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: >> 3be0000000000000 >> helper_todouble diff for arg: 3f000000 d.ll: 3fe0000000000000 ret: >> 3be0000000000000 >> helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: >> 3bf0000000000000 >> helper_todouble diff for arg: 3f800000 d.ll: 3ff0000000000000 ret: >> 3bf0000000000000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_todouble diff for arg: be61b08a d.ll: bfcc361140000000 ret: >> bbcc361140000000 >> helper_todouble diff for arg: 3fdf81a5 d.ll: 3ffbf034a0000000 ret: >> 3bfbf034a0000000 >> helper_todouble diff for arg: bf402647 d.ll: bfe804c8e0000000 ret: >> bbe804c8e0000000 >> helper_todouble diff for arg: 3e61b08a d.ll: 3fcc361140000000 ret: >> 3bcc361140000000 >> helper_tosingle diff for arg: bfcc361140000000 f.l: be61b08a ret: 9e61b08a >> helper_todouble diff for arg: 3f0ccccd d.ll: 3fe19999a0000000 ret: >> 3be19999a0000000 >> helper_tosingle diff for arg: 3ffbf034a0000000 f.l: 3fdf81a5 ret: 1fdf81a5 >> helper_tosingle diff for arg: bfe804c8e0000000 f.l: bf402647 ret: 9f402647 >> helper_tosingle diff for arg: 3fcc361140000000 f.l: 3e61b08a ret: 1e61b08a >> helper_tosingle diff for arg: 3fe19999a0000000 f.l: 3f0ccccd ret: 1f0ccccd >> helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: >> 3b70000000000000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_todouble diff for arg: 3b800000 d.ll: 3f70000000000000 ret: >> 3b70000000000000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> helper_tosingle diff for arg: 3ff0000000000000 f.l: 3f800000 ret: 1f800000 >> >> >> It looks like the differences are related to a flag or flags in the MSB byte >> of ret. > > Hi Richard, > > Have you had a chance to look at this yet? I've been working on top of David's > ppc-for-3.1 branch over the weekend and ran into this again during my testing :/ Thanks for the reminder and the test cases. I've posted a fix for this now. r~
diff --git a/target/ppc/helper.h b/target/ppc/helper.h index cc3d031407..33e6e1df60 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -61,8 +61,8 @@ DEF_HELPER_2(compute_fprf_float64, void, env, i64) DEF_HELPER_3(store_fpscr, void, env, i64, i32) DEF_HELPER_2(fpscr_clrbit, void, env, i32) DEF_HELPER_2(fpscr_setbit, void, env, i32) -DEF_HELPER_2(float64_to_float32, i32, env, i64) -DEF_HELPER_2(float32_to_float64, i64, env, i32) +DEF_HELPER_FLAGS_1(todouble, TCG_CALL_NO_RWG_SE, i64, i32) +DEF_HELPER_FLAGS_1(tosingle, TCG_CALL_NO_RWG_SE, i32, i64) DEF_HELPER_4(fcmpo, void, env, i64, i64, i32) DEF_HELPER_4(fcmpu, void, env, i64, i64, i32) diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 1e195487d3..d4e9e3bccb 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -47,24 +47,61 @@ static inline bool fp_exceptions_enabled(CPUPPCState *env) /*****************************************************************************/ /* Floating point operations helpers */ -uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg) -{ - CPU_FloatU f; - CPU_DoubleU d; - f.l = arg; - d.d = float32_to_float64(f.f, &env->fp_status); - return d.ll; +/* + * This is the non-arithmatic conversion that happens e.g. on loads. + * In the Power ISA pseudocode, this is called DOUBLE. + */ +uint64_t helper_todouble(uint32_t arg) +{ + uint32_t abs_arg = arg & 0x7fffffff; + uint64_t ret; + + if (likely(abs_arg >= 0x00800000)) { + /* Normalized operand, or Inf, or NaN. */ + ret = (uint64_t)extract32(arg, 30, 2) << 62; + ret |= ((extract32(arg, 30, 1) ^ 1) * (uint64_t)7) << 59; + ret |= (uint64_t)extract32(arg, 0, 29) << 29; + } else { + /* Zero or Denormalized operand. */ + ret = (uint64_t)extract32(arg, 31, 1) << 63; + if (unlikely(abs_arg != 0)) { + /* Denormalized operand. */ + int shift = clz32(abs_arg) - 9; + int exp = -126 - shift + 1023; + ret |= (uint64_t)exp << 52; + ret |= abs_arg << (shift + 29); + } + } + return ret; } -uint32_t helper_float64_to_float32(CPUPPCState *env, uint64_t arg) +/* + * This is the non-arithmatic conversion that happens e.g. on stores. + * In the Power ISA pseudocode, this is called SINGLE. + */ +uint32_t helper_tosingle(uint64_t arg) { - CPU_FloatU f; - CPU_DoubleU d; + int exp = extract64(arg, 52, 11); + uint32_t ret; - d.ll = arg; - f.f = float64_to_float32(d.d, &env->fp_status); - return f.l; + if (likely(exp > 896)) { + /* No denormalization required (includes Inf, NaN). */ + ret = extract64(arg, 62, 2) << 30; + ret |= extract64(arg, 29, 29); + } else { + /* Zero or Denormal result. If the exponent is in bounds for + * a single-precision denormal result, extract the proper bits. + * If the input is not zero, and the exponent is out of bounds, + * then the result is undefined; this underflows to zero. + */ + ret = extract64(arg, 63, 1) << 63; + if (unlikely(exp >= 874)) { + /* Denormal result. */ + ret |= ((1ULL << 52) | extract64(arg, 0, 52)) >> (896 + 30 - exp); + } + } + return ret; } static inline int ppc_float32_get_unbiased_exp(float32 f) diff --git a/target/ppc/translate/fp-impl.inc.c b/target/ppc/translate/fp-impl.inc.c index 2fbd4d4f38..a6f522b85c 100644 --- a/target/ppc/translate/fp-impl.inc.c +++ b/target/ppc/translate/fp-impl.inc.c @@ -660,15 +660,12 @@ GEN_LDUF(name, ldop, op | 0x21, type); \ GEN_LDUXF(name, ldop, op | 0x01, type); \ GEN_LDXF(name, ldop, 0x17, op | 0x00, type) -static inline void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) +static void gen_qemu_ld32fs(DisasContext *ctx, TCGv_i64 dest, TCGv addr) { - TCGv t0 = tcg_temp_new(); - TCGv_i32 t1 = tcg_temp_new_i32(); - gen_qemu_ld32u(ctx, t0, arg2); - tcg_gen_trunc_tl_i32(t1, t0); - tcg_temp_free(t0); - gen_helper_float32_to_float64(arg1, cpu_env, t1); - tcg_temp_free_i32(t1); + TCGv_i32 tmp = tcg_temp_new_i32(); + tcg_gen_qemu_ld_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); + gen_helper_todouble(dest, tmp); + tcg_temp_free_i32(tmp); } /* lfd lfdu lfdux lfdx */ @@ -836,15 +833,12 @@ GEN_STUF(name, stop, op | 0x21, type); \ GEN_STUXF(name, stop, op | 0x01, type); \ GEN_STXF(name, stop, 0x17, op | 0x00, type) -static inline void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) +static void gen_qemu_st32fs(DisasContext *ctx, TCGv_i64 src, TCGv addr) { - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv t1 = tcg_temp_new(); - gen_helper_float64_to_float32(t0, cpu_env, arg1); - tcg_gen_extu_i32_tl(t1, t0); - tcg_temp_free_i32(t0); - gen_qemu_st32(ctx, t1, arg2); - tcg_temp_free(t1); + TCGv_i32 tmp = tcg_temp_new_i32(); + gen_helper_tosingle(tmp, src); + tcg_gen_qemu_st_i32(tmp, addr, ctx->mem_idx, DEF_MEMOP(MO_UL)); + tcg_temp_free_i32(tmp); } /* stfd stfdu stfdux stfdx */
Memory operations have no side effects on fp state. The use of a "real" conversions between float64 and float32 would raise exceptions for SNaN and out-of-range inputs. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/ppc/helper.h | 4 +- target/ppc/fpu_helper.c | 63 ++++++++++++++++++++++++------ target/ppc/translate/fp-impl.inc.c | 26 +++++------- 3 files changed, 62 insertions(+), 31 deletions(-) -- 2.17.1