Message ID | 87zi8iumsp.fsf@linaro.org |
---|---|
State | New |
Headers | show |
Series | [16/nn] Factor out the mode handling in lower-subreg.c | expand |
On Mon, Oct 23, 2017 at 1:27 PM, Richard Sandiford <richard.sandiford@linaro.org> wrote: > This patch adds a helper routine (interesting_mode_p) to lower-subreg.c, > to make the decision about whether a mode can be split and, if so, > calculate the number of bytes and words in the mode. At present this > function always returns true; a later patch will add cases in which it > can return false. Ok. Richard. > > 2017-10-23 Richard Sandiford <richard.sandiford@linaro.org> > Alan Hayward <alan.hayward@arm.com> > David Sherwood <david.sherwood@arm.com> > > gcc/ > * lower-subreg.c (interesting_mode_p): New function. > (compute_costs, find_decomposable_subregs, decompose_register) > (simplify_subreg_concatn, can_decompose_p, resolve_simple_move) > (resolve_clobber, dump_choices): Use it. > > Index: gcc/lower-subreg.c > =================================================================== > --- gcc/lower-subreg.c 2017-10-23 11:47:11.274393237 +0100 > +++ gcc/lower-subreg.c 2017-10-23 11:47:23.555013148 +0100 > @@ -103,6 +103,18 @@ #define twice_word_mode \ > #define choices \ > this_target_lower_subreg->x_choices > > +/* Return true if MODE is a mode we know how to lower. When returning true, > + store its byte size in *BYTES and its word size in *WORDS. */ > + > +static inline bool > +interesting_mode_p (machine_mode mode, unsigned int *bytes, > + unsigned int *words) > +{ > + *bytes = GET_MODE_SIZE (mode); > + *words = CEIL (*bytes, UNITS_PER_WORD); > + return true; > +} > + > /* RTXes used while computing costs. */ > struct cost_rtxes { > /* Source and target registers. */ > @@ -199,10 +211,10 @@ compute_costs (bool speed_p, struct cost > for (i = 0; i < MAX_MACHINE_MODE; i++) > { > machine_mode mode = (machine_mode) i; > - int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; > - if (factor > 1) > + unsigned int size, factor; > + if (interesting_mode_p (mode, &size, &factor) && factor > 1) > { > - int mode_move_cost; > + unsigned int mode_move_cost; > > PUT_MODE (rtxes->target, mode); > PUT_MODE (rtxes->source, mode); > @@ -469,10 +481,10 @@ find_decomposable_subregs (rtx *loc, enu > continue; > } > > - outer_size = GET_MODE_SIZE (GET_MODE (x)); > - inner_size = GET_MODE_SIZE (GET_MODE (inner)); > - outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > - inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > + if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) > + || !interesting_mode_p (GET_MODE (inner), &inner_size, > + &inner_words)) > + continue; > > /* We only try to decompose single word subregs of multi-word > registers. When we find one, we return -1 to avoid iterating > @@ -507,7 +519,7 @@ find_decomposable_subregs (rtx *loc, enu > } > else if (REG_P (x)) > { > - unsigned int regno; > + unsigned int regno, size, words; > > /* We will see an outer SUBREG before we see the inner REG, so > when we see a plain REG here it means a direct reference to > @@ -527,7 +539,8 @@ find_decomposable_subregs (rtx *loc, enu > > regno = REGNO (x); > if (!HARD_REGISTER_NUM_P (regno) > - && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) > + && interesting_mode_p (GET_MODE (x), &size, &words) > + && words > 1) > { > switch (*pcmi) > { > @@ -567,15 +580,15 @@ find_decomposable_subregs (rtx *loc, enu > decompose_register (unsigned int regno) > { > rtx reg; > - unsigned int words, i; > + unsigned int size, words, i; > rtvec v; > > reg = regno_reg_rtx[regno]; > > regno_reg_rtx[regno] = NULL_RTX; > > - words = GET_MODE_SIZE (GET_MODE (reg)); > - words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > + if (!interesting_mode_p (GET_MODE (reg), &size, &words)) > + gcc_unreachable (); > > v = rtvec_alloc (words); > for (i = 0; i < words; ++i) > @@ -599,25 +612,29 @@ decompose_register (unsigned int regno) > simplify_subreg_concatn (machine_mode outermode, rtx op, > unsigned int byte) > { > - unsigned int inner_size; > + unsigned int outer_size, outer_words, inner_size, inner_words; > machine_mode innermode, partmode; > rtx part; > unsigned int final_offset; > > + innermode = GET_MODE (op); > + if (!interesting_mode_p (outermode, &outer_size, &outer_words) > + || !interesting_mode_p (innermode, &inner_size, &inner_words)) > + gcc_unreachable (); > + > gcc_assert (GET_CODE (op) == CONCATN); > - gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); > + gcc_assert (byte % outer_size == 0); > > - innermode = GET_MODE (op); > - gcc_assert (byte < GET_MODE_SIZE (innermode)); > - if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode)) > + gcc_assert (byte < inner_size); > + if (outer_size > inner_size) > return NULL_RTX; > > - inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); > + inner_size /= XVECLEN (op, 0); > part = XVECEXP (op, 0, byte / inner_size); > partmode = GET_MODE (part); > > final_offset = byte % inner_size; > - if (final_offset + GET_MODE_SIZE (outermode) > inner_size) > + if (final_offset + outer_size > inner_size) > return NULL_RTX; > > /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of > @@ -801,9 +818,10 @@ can_decompose_p (rtx x) > > if (HARD_REGISTER_NUM_P (regno)) > { > - unsigned int byte, num_bytes; > + unsigned int byte, num_bytes, num_words; > > - num_bytes = GET_MODE_SIZE (GET_MODE (x)); > + if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words)) > + return false; > for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) > if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) > return false; > @@ -826,14 +844,15 @@ resolve_simple_move (rtx set, rtx_insn * > rtx src, dest, real_dest; > rtx_insn *insns; > machine_mode orig_mode, dest_mode; > - unsigned int words; > + unsigned int orig_size, words; > bool pushing; > > src = SET_SRC (set); > dest = SET_DEST (set); > orig_mode = GET_MODE (dest); > > - words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > + if (!interesting_mode_p (orig_mode, &orig_size, &words)) > + gcc_unreachable (); > gcc_assert (words > 1); > > start_sequence (); > @@ -964,7 +983,7 @@ resolve_simple_move (rtx set, rtx_insn * > { > unsigned int i, j, jinc; > > - gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); > + gcc_assert (orig_size % UNITS_PER_WORD == 0); > gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); > gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); > > @@ -1059,7 +1078,7 @@ resolve_clobber (rtx pat, rtx_insn *insn > { > rtx reg; > machine_mode orig_mode; > - unsigned int words, i; > + unsigned int orig_size, words, i; > int ret; > > reg = XEXP (pat, 0); > @@ -1067,8 +1086,8 @@ resolve_clobber (rtx pat, rtx_insn *insn > return false; > > orig_mode = GET_MODE (reg); > - words = GET_MODE_SIZE (orig_mode); > - words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; > + if (!interesting_mode_p (orig_mode, &orig_size, &words)) > + gcc_unreachable (); > > ret = validate_change (NULL_RTX, &XEXP (pat, 0), > simplify_gen_subreg_concatn (word_mode, reg, > @@ -1332,12 +1351,13 @@ dump_shift_choices (enum rtx_code code, > static void > dump_choices (bool speed_p, const char *description) > { > - unsigned int i; > + unsigned int size, factor, i; > > fprintf (dump_file, "Choices when optimizing for %s:\n", description); > > for (i = 0; i < MAX_MACHINE_MODE; i++) > - if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD) > + if (interesting_mode_p ((machine_mode) i, &size, &factor) > + && factor > 1) > fprintf (dump_file, " %s mode %s for copy lowering.\n", > choices[speed_p].move_modes_to_split[i] > ? "Splitting"
Index: gcc/lower-subreg.c =================================================================== --- gcc/lower-subreg.c 2017-10-23 11:47:11.274393237 +0100 +++ gcc/lower-subreg.c 2017-10-23 11:47:23.555013148 +0100 @@ -103,6 +103,18 @@ #define twice_word_mode \ #define choices \ this_target_lower_subreg->x_choices +/* Return true if MODE is a mode we know how to lower. When returning true, + store its byte size in *BYTES and its word size in *WORDS. */ + +static inline bool +interesting_mode_p (machine_mode mode, unsigned int *bytes, + unsigned int *words) +{ + *bytes = GET_MODE_SIZE (mode); + *words = CEIL (*bytes, UNITS_PER_WORD); + return true; +} + /* RTXes used while computing costs. */ struct cost_rtxes { /* Source and target registers. */ @@ -199,10 +211,10 @@ compute_costs (bool speed_p, struct cost for (i = 0; i < MAX_MACHINE_MODE; i++) { machine_mode mode = (machine_mode) i; - int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; - if (factor > 1) + unsigned int size, factor; + if (interesting_mode_p (mode, &size, &factor) && factor > 1) { - int mode_move_cost; + unsigned int mode_move_cost; PUT_MODE (rtxes->target, mode); PUT_MODE (rtxes->source, mode); @@ -469,10 +481,10 @@ find_decomposable_subregs (rtx *loc, enu continue; } - outer_size = GET_MODE_SIZE (GET_MODE (x)); - inner_size = GET_MODE_SIZE (GET_MODE (inner)); - outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words) + || !interesting_mode_p (GET_MODE (inner), &inner_size, + &inner_words)) + continue; /* We only try to decompose single word subregs of multi-word registers. When we find one, we return -1 to avoid iterating @@ -507,7 +519,7 @@ find_decomposable_subregs (rtx *loc, enu } else if (REG_P (x)) { - unsigned int regno; + unsigned int regno, size, words; /* We will see an outer SUBREG before we see the inner REG, so when we see a plain REG here it means a direct reference to @@ -527,7 +539,8 @@ find_decomposable_subregs (rtx *loc, enu regno = REGNO (x); if (!HARD_REGISTER_NUM_P (regno) - && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) + && interesting_mode_p (GET_MODE (x), &size, &words) + && words > 1) { switch (*pcmi) { @@ -567,15 +580,15 @@ find_decomposable_subregs (rtx *loc, enu decompose_register (unsigned int regno) { rtx reg; - unsigned int words, i; + unsigned int size, words, i; rtvec v; reg = regno_reg_rtx[regno]; regno_reg_rtx[regno] = NULL_RTX; - words = GET_MODE_SIZE (GET_MODE (reg)); - words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (GET_MODE (reg), &size, &words)) + gcc_unreachable (); v = rtvec_alloc (words); for (i = 0; i < words; ++i) @@ -599,25 +612,29 @@ decompose_register (unsigned int regno) simplify_subreg_concatn (machine_mode outermode, rtx op, unsigned int byte) { - unsigned int inner_size; + unsigned int outer_size, outer_words, inner_size, inner_words; machine_mode innermode, partmode; rtx part; unsigned int final_offset; + innermode = GET_MODE (op); + if (!interesting_mode_p (outermode, &outer_size, &outer_words) + || !interesting_mode_p (innermode, &inner_size, &inner_words)) + gcc_unreachable (); + gcc_assert (GET_CODE (op) == CONCATN); - gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); + gcc_assert (byte % outer_size == 0); - innermode = GET_MODE (op); - gcc_assert (byte < GET_MODE_SIZE (innermode)); - if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode)) + gcc_assert (byte < inner_size); + if (outer_size > inner_size) return NULL_RTX; - inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); + inner_size /= XVECLEN (op, 0); part = XVECEXP (op, 0, byte / inner_size); partmode = GET_MODE (part); final_offset = byte % inner_size; - if (final_offset + GET_MODE_SIZE (outermode) > inner_size) + if (final_offset + outer_size > inner_size) return NULL_RTX; /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of @@ -801,9 +818,10 @@ can_decompose_p (rtx x) if (HARD_REGISTER_NUM_P (regno)) { - unsigned int byte, num_bytes; + unsigned int byte, num_bytes, num_words; - num_bytes = GET_MODE_SIZE (GET_MODE (x)); + if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words)) + return false; for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) return false; @@ -826,14 +844,15 @@ resolve_simple_move (rtx set, rtx_insn * rtx src, dest, real_dest; rtx_insn *insns; machine_mode orig_mode, dest_mode; - unsigned int words; + unsigned int orig_size, words; bool pushing; src = SET_SRC (set); dest = SET_DEST (set); orig_mode = GET_MODE (dest); - words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (orig_mode, &orig_size, &words)) + gcc_unreachable (); gcc_assert (words > 1); start_sequence (); @@ -964,7 +983,7 @@ resolve_simple_move (rtx set, rtx_insn * { unsigned int i, j, jinc; - gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0); + gcc_assert (orig_size % UNITS_PER_WORD == 0); gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY); gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY); @@ -1059,7 +1078,7 @@ resolve_clobber (rtx pat, rtx_insn *insn { rtx reg; machine_mode orig_mode; - unsigned int words, i; + unsigned int orig_size, words, i; int ret; reg = XEXP (pat, 0); @@ -1067,8 +1086,8 @@ resolve_clobber (rtx pat, rtx_insn *insn return false; orig_mode = GET_MODE (reg); - words = GET_MODE_SIZE (orig_mode); - words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + if (!interesting_mode_p (orig_mode, &orig_size, &words)) + gcc_unreachable (); ret = validate_change (NULL_RTX, &XEXP (pat, 0), simplify_gen_subreg_concatn (word_mode, reg, @@ -1332,12 +1351,13 @@ dump_shift_choices (enum rtx_code code, static void dump_choices (bool speed_p, const char *description) { - unsigned int i; + unsigned int size, factor, i; fprintf (dump_file, "Choices when optimizing for %s:\n", description); for (i = 0; i < MAX_MACHINE_MODE; i++) - if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD) + if (interesting_mode_p ((machine_mode) i, &size, &factor) + && factor > 1) fprintf (dump_file, " %s mode %s for copy lowering.\n", choices[speed_p].move_modes_to_split[i] ? "Splitting"