Message ID | 20190518191430.21686-2-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/ppc: make use of new gvec expanders | expand |
On May 18, 2019 9:21 PM, "Richard Henderson" <richard.henderson@linaro.org> wrote: > > The gvec expanders take care of masking the shift amount > against the element width. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/ppc/helper.h | 12 ---------- > target/ppc/int_helper.c | 37 ----------------------------- > target/ppc/translate/vmx-impl.inc.c | 24 +++++++++---------- You changed the line -GEN_VXFORM(vslw, 2, 6); to be: +GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); and left this line unchanged (even though it deals with the same vslw instruction): GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \ vrlwnm, PPC_NONE, PPC2_ISA300) I just want to doublecheck - is this really what you wanted to do? Thanks, Aleksandar > 3 files changed, 12 insertions(+), 61 deletions(-) > > diff --git a/target/ppc/helper.h b/target/ppc/helper.h > index 638a6e99c4..02b67a333e 100644 > --- a/target/ppc/helper.h > +++ b/target/ppc/helper.h > @@ -180,18 +180,6 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr) > DEF_HELPER_3(vmulouh, void, avr, avr, avr) > DEF_HELPER_3(vmulouw, void, avr, avr, avr) > DEF_HELPER_3(vmuluwm, void, avr, avr, avr) > -DEF_HELPER_3(vsrab, void, avr, avr, avr) > -DEF_HELPER_3(vsrah, void, avr, avr, avr) > -DEF_HELPER_3(vsraw, void, avr, avr, avr) > -DEF_HELPER_3(vsrad, void, avr, avr, avr) > -DEF_HELPER_3(vsrb, void, avr, avr, avr) > -DEF_HELPER_3(vsrh, void, avr, avr, avr) > -DEF_HELPER_3(vsrw, void, avr, avr, avr) > -DEF_HELPER_3(vsrd, void, avr, avr, avr) > -DEF_HELPER_3(vslb, void, avr, avr, avr) > -DEF_HELPER_3(vslh, void, avr, avr, avr) > -DEF_HELPER_3(vslw, void, avr, avr, avr) > -DEF_HELPER_3(vsld, void, avr, avr, avr) > DEF_HELPER_3(vslo, void, avr, avr, avr) > DEF_HELPER_3(vsro, void, avr, avr, avr) > DEF_HELPER_3(vsrv, void, avr, avr, avr) > diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c > index f6a088ac08..40a7035df0 100644 > --- a/target/ppc/int_helper.c > +++ b/target/ppc/int_helper.c > @@ -1776,23 +1776,6 @@ VSHIFT(l, 1) > VSHIFT(r, 0) > #undef VSHIFT > > -#define VSL(suffix, element, mask) \ > - void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > - { \ > - int i; \ > - \ > - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ > - unsigned int shift = b->element[i] & mask; \ > - \ > - r->element[i] = a->element[i] << shift; \ > - } \ > - } > -VSL(b, u8, 0x7) > -VSL(h, u16, 0x0F) > -VSL(w, u32, 0x1F) > -VSL(d, u64, 0x3F) > -#undef VSL > - > void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > { > int i; > @@ -1965,26 +1948,6 @@ VNEG(vnegw, s32) > VNEG(vnegd, s64) > #undef VNEG > > -#define VSR(suffix, element, mask) \ > - void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ > - { \ > - int i; \ > - \ > - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ > - unsigned int shift = b->element[i] & mask; \ > - r->element[i] = a->element[i] >> shift; \ > - } \ > - } > -VSR(ab, s8, 0x7) > -VSR(ah, s16, 0xF) > -VSR(aw, s32, 0x1F) > -VSR(ad, s64, 0x3F) > -VSR(b, u8, 0x7) > -VSR(h, u16, 0xF) > -VSR(w, u32, 0x1F) > -VSR(d, u64, 0x3F) > -#undef VSR > - > void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) > { > int sh = (b->VsrB(0xf) >> 3) & 0xf; > diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c > index 6861f4c5b9..663275b729 100644 > --- a/target/ppc/translate/vmx-impl.inc.c > +++ b/target/ppc/translate/vmx-impl.inc.c > @@ -530,21 +530,21 @@ GEN_VXFORM(vmuleuw, 4, 10); > GEN_VXFORM(vmulesb, 4, 12); > GEN_VXFORM(vmulesh, 4, 13); > GEN_VXFORM(vmulesw, 4, 14); > -GEN_VXFORM(vslb, 2, 4); > -GEN_VXFORM(vslh, 2, 5); > -GEN_VXFORM(vslw, 2, 6); > +GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); > +GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); > +GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); > GEN_VXFORM(vrlwnm, 2, 6); > GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \ > vrlwnm, PPC_NONE, PPC2_ISA300) > -GEN_VXFORM(vsld, 2, 23); > -GEN_VXFORM(vsrb, 2, 8); > -GEN_VXFORM(vsrh, 2, 9); > -GEN_VXFORM(vsrw, 2, 10); > -GEN_VXFORM(vsrd, 2, 27); > -GEN_VXFORM(vsrab, 2, 12); > -GEN_VXFORM(vsrah, 2, 13); > -GEN_VXFORM(vsraw, 2, 14); > -GEN_VXFORM(vsrad, 2, 15); > +GEN_VXFORM_V(vsld, MO_64, tcg_gen_gvec_shlv, 2, 23); > +GEN_VXFORM_V(vsrb, MO_8, tcg_gen_gvec_shrv, 2, 8); > +GEN_VXFORM_V(vsrh, MO_16, tcg_gen_gvec_shrv, 2, 9); > +GEN_VXFORM_V(vsrw, MO_32, tcg_gen_gvec_shrv, 2, 10); > +GEN_VXFORM_V(vsrd, MO_64, tcg_gen_gvec_shrv, 2, 27); > +GEN_VXFORM_V(vsrab, MO_8, tcg_gen_gvec_sarv, 2, 12); > +GEN_VXFORM_V(vsrah, MO_16, tcg_gen_gvec_sarv, 2, 13); > +GEN_VXFORM_V(vsraw, MO_32, tcg_gen_gvec_sarv, 2, 14); > +GEN_VXFORM_V(vsrad, MO_64, tcg_gen_gvec_sarv, 2, 15); > GEN_VXFORM(vsrv, 2, 28); > GEN_VXFORM(vslv, 2, 29); > GEN_VXFORM(vslo, 6, 16); > -- > 2.17.1 > >
On 5/20/19 2:49 AM, Aleksandar Markovic wrote: > > On May 18, 2019 9:21 PM, "Richard Henderson" <richard.henderson@linaro.org > <mailto:richard.henderson@linaro.org>> wrote: >> >> The gvec expanders take care of masking the shift amount >> against the element width. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org > <mailto:richard.henderson@linaro.org>> >> --- >> target/ppc/helper.h | 12 ---------- >> target/ppc/int_helper.c | 37 ----------------------------- >> target/ppc/translate/vmx-impl.inc.c | 24 +++++++++---------- > > You changed the line > > -GEN_VXFORM(vslw, 2, 6); > > to be: > > +GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); > > and left this line unchanged (even though it deals with the same vslw instruction): > > GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \ vrlwnm, > PPC_NONE, PPC2_ISA300) > > I just want to doublecheck - is this really what you wanted to do? Yes, the macros do two different things. The first defines a function using tcg_gen_gvec_shlv as the implementation. The second defines a function that chooses between two overloaded encodings, depending on whether PPC_ALTIVEC or PPC2_ISA300 is enabled. If PPC_ALTIVEC, it will forward the implementation to the function defined with the first macro. r~
diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 638a6e99c4..02b67a333e 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -180,18 +180,6 @@ DEF_HELPER_3(vmuloub, void, avr, avr, avr) DEF_HELPER_3(vmulouh, void, avr, avr, avr) DEF_HELPER_3(vmulouw, void, avr, avr, avr) DEF_HELPER_3(vmuluwm, void, avr, avr, avr) -DEF_HELPER_3(vsrab, void, avr, avr, avr) -DEF_HELPER_3(vsrah, void, avr, avr, avr) -DEF_HELPER_3(vsraw, void, avr, avr, avr) -DEF_HELPER_3(vsrad, void, avr, avr, avr) -DEF_HELPER_3(vsrb, void, avr, avr, avr) -DEF_HELPER_3(vsrh, void, avr, avr, avr) -DEF_HELPER_3(vsrw, void, avr, avr, avr) -DEF_HELPER_3(vsrd, void, avr, avr, avr) -DEF_HELPER_3(vslb, void, avr, avr, avr) -DEF_HELPER_3(vslh, void, avr, avr, avr) -DEF_HELPER_3(vslw, void, avr, avr, avr) -DEF_HELPER_3(vsld, void, avr, avr, avr) DEF_HELPER_3(vslo, void, avr, avr, avr) DEF_HELPER_3(vsro, void, avr, avr, avr) DEF_HELPER_3(vsrv, void, avr, avr, avr) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index f6a088ac08..40a7035df0 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1776,23 +1776,6 @@ VSHIFT(l, 1) VSHIFT(r, 0) #undef VSHIFT -#define VSL(suffix, element, mask) \ - void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - \ - r->element[i] = a->element[i] << shift; \ - } \ - } -VSL(b, u8, 0x7) -VSL(h, u16, 0x0F) -VSL(w, u32, 0x1F) -VSL(d, u64, 0x3F) -#undef VSL - void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int i; @@ -1965,26 +1948,6 @@ VNEG(vnegw, s32) VNEG(vnegd, s64) #undef VNEG -#define VSR(suffix, element, mask) \ - void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ - { \ - int i; \ - \ - for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ - unsigned int shift = b->element[i] & mask; \ - r->element[i] = a->element[i] >> shift; \ - } \ - } -VSR(ab, s8, 0x7) -VSR(ah, s16, 0xF) -VSR(aw, s32, 0x1F) -VSR(ad, s64, 0x3F) -VSR(b, u8, 0x7) -VSR(h, u16, 0xF) -VSR(w, u32, 0x1F) -VSR(d, u64, 0x3F) -#undef VSR - void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) { int sh = (b->VsrB(0xf) >> 3) & 0xf; diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 6861f4c5b9..663275b729 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -530,21 +530,21 @@ GEN_VXFORM(vmuleuw, 4, 10); GEN_VXFORM(vmulesb, 4, 12); GEN_VXFORM(vmulesh, 4, 13); GEN_VXFORM(vmulesw, 4, 14); -GEN_VXFORM(vslb, 2, 4); -GEN_VXFORM(vslh, 2, 5); -GEN_VXFORM(vslw, 2, 6); +GEN_VXFORM_V(vslb, MO_8, tcg_gen_gvec_shlv, 2, 4); +GEN_VXFORM_V(vslh, MO_16, tcg_gen_gvec_shlv, 2, 5); +GEN_VXFORM_V(vslw, MO_32, tcg_gen_gvec_shlv, 2, 6); GEN_VXFORM(vrlwnm, 2, 6); GEN_VXFORM_DUAL(vslw, PPC_ALTIVEC, PPC_NONE, \ vrlwnm, PPC_NONE, PPC2_ISA300) -GEN_VXFORM(vsld, 2, 23); -GEN_VXFORM(vsrb, 2, 8); -GEN_VXFORM(vsrh, 2, 9); -GEN_VXFORM(vsrw, 2, 10); -GEN_VXFORM(vsrd, 2, 27); -GEN_VXFORM(vsrab, 2, 12); -GEN_VXFORM(vsrah, 2, 13); -GEN_VXFORM(vsraw, 2, 14); -GEN_VXFORM(vsrad, 2, 15); +GEN_VXFORM_V(vsld, MO_64, tcg_gen_gvec_shlv, 2, 23); +GEN_VXFORM_V(vsrb, MO_8, tcg_gen_gvec_shrv, 2, 8); +GEN_VXFORM_V(vsrh, MO_16, tcg_gen_gvec_shrv, 2, 9); +GEN_VXFORM_V(vsrw, MO_32, tcg_gen_gvec_shrv, 2, 10); +GEN_VXFORM_V(vsrd, MO_64, tcg_gen_gvec_shrv, 2, 27); +GEN_VXFORM_V(vsrab, MO_8, tcg_gen_gvec_sarv, 2, 12); +GEN_VXFORM_V(vsrah, MO_16, tcg_gen_gvec_sarv, 2, 13); +GEN_VXFORM_V(vsraw, MO_32, tcg_gen_gvec_sarv, 2, 14); +GEN_VXFORM_V(vsrad, MO_64, tcg_gen_gvec_sarv, 2, 15); GEN_VXFORM(vsrv, 2, 28); GEN_VXFORM(vslv, 2, 29); GEN_VXFORM(vslo, 6, 16);
The gvec expanders take care of masking the shift amount against the element width. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/ppc/helper.h | 12 ---------- target/ppc/int_helper.c | 37 ----------------------------- target/ppc/translate/vmx-impl.inc.c | 24 +++++++++---------- 3 files changed, 12 insertions(+), 61 deletions(-) -- 2.17.1