diff mbox

[(4/7)] Unsigned multiplies using wider signed multiplies

Message ID 4E09E1B1.2090005@codesourcery.com
State New
Headers show

Commit Message

Andrew Stubbs June 28, 2011, 2:14 p.m. UTC
On 28/06/11 13:33, Andrew Stubbs wrote:
> On 23/06/11 15:41, Andrew Stubbs wrote:
>> If one or both of the inputs to a widening multiply are of unsigned type
>> then the compiler will attempt to use usmul_widen_optab or
>> umul_widen_optab, respectively.
>>
>> That works fine, but only if the target supports those operations
>> directly. Otherwise, it just bombs out and reverts to the normal
>> inefficient non-widening multiply.
>>
>> This patch attempts to catch these cases and use an alternative signed
>> widening multiply instruction, if one of those is available.
>>
>> I believe this should be legal as long as the top bit of both inputs is
>> guaranteed to be zero. The code achieves this guarantee by
>> zero-extending the inputs to a wider mode (which must still be narrower
>> than the output mode).
>>
>> OK?
>
> This update fixes the testsuite issue Janis pointed out.

And this one fixes up the wmul-5.c testcase also. The patch has changed 
the correct result.

Andrew
diff mbox

Patch

2011-06-28  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency.
	* optabs.c (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this, and add new
	argument 'found_mode'.
	* optabs.h (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this.
	(find_widening_optab_handler): New macro.
	* tree-ssa-math-opts.c: Include langhooks.h
	(build_and_insert_cast): New function.
	(convert_mult_to_widen): Add new argument 'gsi'.
	Convert unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.
	(execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen.

	gcc/testsuite/
	* gcc.target/arm/wmul-5.c: Update expected result.
	* gcc.target/arm/wmul-6.c: New file.

--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2672,7 +2672,8 @@  tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
    $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
-   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h
+   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \
+   langhooks.h
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \
    $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -232,9 +232,10 @@  add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
    non-widening optabs also.  */
 
 enum insn_code
-find_widening_optab_handler (optab op, enum machine_mode to_mode,
-			     enum machine_mode from_mode,
-			     int permit_non_widening)
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+				      enum machine_mode from_mode,
+				      int permit_non_widening,
+				      enum machine_mode *found_mode)
 {
   for (; (permit_non_widening || from_mode != to_mode)
 	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
@@ -245,7 +246,11 @@  find_widening_optab_handler (optab op, enum machine_mode to_mode,
 						       from_mode);
 
       if (handler != CODE_FOR_nothing)
-	return handler;
+	{
+	  if (found_mode)
+	    *found_mode = from_mode;
+	  return handler;
+	}
     }
 
   return CODE_FOR_nothing;
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -808,8 +808,13 @@  extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-extern enum insn_code find_widening_optab_handler (optab, enum machine_mode,
-						   enum machine_mode, int);
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+							    enum machine_mode,
+							    enum machine_mode,
+							    int,
+							    enum machine_mode *);
 
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
    distinguish between machines with a vector shift that takes a scalar for the
--- a/gcc/testsuite/gcc.target/arm/wmul-5.c
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -7,4 +7,4 @@  foo (long long a, char *b, char *c)
   return a + *b * *c;
 }
 
-/* { dg-final { scan-assembler "umlal" } } */
+/* { dg-final { scan-assembler "smlalbb" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -98,6 +98,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "target.h"
 #include "gimple-pretty-print.h"
+#include "langhooks.h"
 
 /* FIXME: RTL headers have to be included here for optabs.  */
 #include "rtl.h"		/* Because optabs.h wants enum rtx_code.  */
@@ -1086,6 +1087,21 @@  build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TYPE, and put the result in
+   TARGET.  Insert the statement prior to GSI's current position, and
+   return the from SSA name.  */
+
+static tree
+build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
+		       tree target, tree val, tree type)
+{
+  tree result = make_ssa_name (target, NULL);
+  gimple stmt = gimple_build_assign (result, fold_convert (type, val));
+  gimple_set_location (stmt, loc);
+  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+  return result;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -2047,7 +2063,7 @@  is_widening_mult_p (gimple stmt,
    value is true iff we converted the statement.  */
 
 static bool
-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
   tree lhs, rhs1, rhs2, type, type1, type2;
   enum insn_code handler;
@@ -2075,7 +2091,31 @@  convert_mult_to_widen (gimple stmt)
   handler = find_widening_optab_handler (op, to_mode, from_mode, 0);
 
   if (handler == CODE_FOR_nothing)
-    return false;
+    {
+      if (op != smul_widen_optab)
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+	    return false;
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+							  from_mode, 0,
+							  &from_mode);
+
+	  if (handler == CODE_FOR_nothing)
+	    return false;
+
+	  type1 = type2 = lang_hooks.types.type_for_mode (from_mode, 0);
+
+	  rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+					create_tmp_var (type1, NULL), rhs1, type1);
+	  rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+					create_tmp_var (type2, NULL), rhs2, type2);
+	}
+      else
+	return false;
+    }
 
   gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
   gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
@@ -2165,7 +2205,22 @@  convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
     return false;
 
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-    return false;
+    {
+      enum machine_mode mode = TYPE_MODE (type1);
+      mode = GET_MODE_WIDER_MODE (mode);
+      if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (TYPE_MODE (type)))
+	{
+	  type1 = type2 = lang_hooks.types.type_for_mode (mode, 0);
+	  mult_rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+					     create_tmp_var (type1, NULL),
+					     mult_rhs1, type1);
+	  mult_rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+					     create_tmp_var (type2, NULL),
+					     mult_rhs2, type2);
+	}
+      else
+	return false;
+    }
 
   /* Verify that the machine can perform a widening multiply
      accumulate in this mode/signedness combination, otherwise
@@ -2393,7 +2448,7 @@  execute_optimize_widening_mul (void)
 	      switch (code)
 		{
 		case MULT_EXPR:
-		  if (!convert_mult_to_widen (stmt)
+		  if (!convert_mult_to_widen (stmt, &gsi)
 		      && convert_mult_to_fma (stmt,
 					      gimple_assign_rhs1 (stmt),
 					      gimple_assign_rhs2 (stmt)))