diff mbox

Fold strstr (s, t) eq/ne s to strcmp (s, t) eq/ne 0 if strlen (t) is known

Message ID CAAgBjMnQdgfGHf9km09ikEYYt3Frvpp6QEP=pZM0a537YWXCww@mail.gmail.com
State Superseded
Headers show

Commit Message

Prathamesh Kulkarni Dec. 7, 2016, 11:32 a.m. UTC
On 5 December 2016 at 23:47, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Dec 05, 2016 at 11:32:15PM +0530, Prathamesh Kulkarni wrote:

>> So I had to check if SSA_NAME_DEF_STMT (rhs2) was call to strstr

>> rather than rhs1.

>

> Then you need to test both whether it is strstr (s, t) == s or

> s == strstr (s, t).

>

>> +                           gassign *ga = gimple_build_assign (lhs, code,

>> +                                                     strcmp_lhs, zero);

>

> The formatting is wrong here.

>

>> +                           gsi_replace (&gsi, ga, false);

>> +                         }

>> +                     }

>> +                 }

>> +             }

>> +         }

>> +     }

>>        else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))

>>       {

>>         tree type = TREE_TYPE (lhs);

>> @@ -2505,7 +2554,7 @@ const pass_data pass_data_strlen =

>>    0, /* properties_provided */

>>    0, /* properties_destroyed */

>>    0, /* todo_flags_start */

>> -  0, /* todo_flags_finish */

>> +  TODO_update_ssa, /* todo_flags_finish */

>

> No, please don't.  Just make sure to build proper SSA right away.

Hi,
Thanks for the suggestions, I have tried to modify the patch accordingly.
Does this version look OK ?
Bootstrap+tested on x86_64-unknown-linux-gnu with --enable-languages=all,ada
Cross tested on arm*-*-*, aarch64*-*-*.

Thanks,
Prathamesh
>

>         Jakub
2016-12-07  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	* tree-ssa-strlen.c (strlen_optimize_stmt): Fold strstr(s, t) == s to
	memcmp (s, t, strlen (t)) == 0.
	Include tree-into-ssa.h.

testsuite/
	* gcc.dg/strlenopt-30.c: New test-case.

Comments

Jakub Jelinek Dec. 7, 2016, 12:06 p.m. UTC | #1
On Wed, Dec 07, 2016 at 05:02:46PM +0530, Prathamesh Kulkarni wrote:
> +			  if (arg1_len == NULL_TREE)

> +			    {

> +			      gimple_stmt_iterator gsi;

> +			      tree strlen_decl;

> +			      gimple *strlen_call;

> +

> +			      strlen_decl = builtin_decl_explicit (BUILT_IN_STRLEN);

> +			      strlen_call = gimple_build_call (strlen_decl, 1,

> +							       arg1);

> +			      arg1_len = make_ssa_name (size_type_node);

> +			      gimple_call_set_lhs (strlen_call, arg1_len);

> +			      update_stmt (strlen_call);

> +			      gsi = gsi_for_stmt (call_stmt);

> +			      gsi_insert_before (&gsi, strlen_call, GSI_SAME_STMT);

> +			    }


Why?  If the strlen isn't readily available, do you really think it is
always a win to replace one call with 2 calls?  The string you want to do
strlen on can be huge, the haystack could be empty or very short, etc.
I'd just punt if strlen isn't known.
> +

> +			  gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);

> +			  tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);

> +			  gcall *memcmp_call

> +			    = gimple_build_call (memcmp_decl, 3, arg0, arg1,

> +						 arg1_len);

> +			  tree memcmp_lhs = make_ssa_name (integer_type_node);

> +			  gimple_call_set_lhs (memcmp_call, memcmp_lhs);

> +			  update_stmt (memcmp_call);

> +			  gsi_remove (&gsi, true);

> +			  gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);

> +

> +			  gsi = gsi_for_stmt (stmt);

> +			  tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));

> +			  gassign *ga = gimple_build_assign (lhs, code,

> +							     memcmp_lhs, zero);

> +			  gsi_replace (&gsi, ga, false);

> +			  update_ssa (TODO_update_ssa);


And this is certainly even more wrong than the old TODO_update_ssa at the
end of the pass, now you'll do it for every single replacement in the
function.  Why do you need it?  The old call stmt has gimple_vdef and
gimple_vuse, so just copy those over, see how e.g.
replace_call_with_call_and_fold in gimple-fold.c does that.
If you don't add strlen, you need to move the vdef/vuse from stmt to
memcmp_call, if you really want to add strlen (see above note though),
then that call should have a vuse added (same vuse as the stmt originally
had).

	Jakub
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c b/gcc/testsuite/gcc.dg/strlenopt-30.c
new file mode 100644
index 0000000..603e23c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-30.c
@@ -0,0 +1,42 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__((no_icf))
+_Bool f1(char *s)
+{
+  return __builtin_strstr (s, "hello") == s;
+}
+
+__attribute__((no_icf))
+_Bool f2(char *s)
+{
+  return s == __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f3(char *s)
+{
+  return s != __builtin_strstr (s, "hello");
+}
+
+__attribute__((no_icf))
+_Bool f4(char *s, char *t)
+{
+  return __builtin_strstr (s, t) == s;
+}
+
+/* Do not perform transform in this case, since
+   t1 doesn't have single use.  */
+
+__attribute__((no_icf))
+_Bool f5(char *s, char *t)
+{
+  void foo(char *);
+
+  char *t1 = __builtin_strstr (s, t);
+  foo (t1);
+  return (t1 == s);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_memcmp" 4 "strlen" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_strlen" 1 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 339812e..b7f4cee 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -45,6 +45,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "ipa-chkp.h"
 #include "tree-hash-traits.h"
 #include "builtins.h"
+#include "tree-into-ssa.h"
 
 /* A vector indexed by SSA_NAME_VERSION.  0 means unknown, positive value
    is an index into strinfo vector, negative value stands for
@@ -2302,7 +2303,94 @@  strlen_optimize_stmt (gimple_stmt_iterator *gsi)
 	  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
 	    handle_pointer_plus (gsi);
 	}
-      else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
+
+      /* Fold strstr (s, t) == s to memcmp (s, t, strlen (t)) == 0.
+	 if var holding return value of strstr has single use.  */
+
+      else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
+	{
+	  enum tree_code code = gimple_assign_rhs_code (stmt);
+	  if (code == EQ_EXPR || code == NE_EXPR)
+	    {
+	      tree rhs1 = gimple_assign_rhs1 (stmt);
+	      tree rhs2 = gimple_assign_rhs2 (stmt);
+	      if (TREE_CODE (rhs1) == SSA_NAME
+		  && TREE_CODE (rhs2) == SSA_NAME)
+		{
+		  gcall *call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs1));
+		  if (!call_stmt)
+		    {
+		      call_stmt = dyn_cast<gcall *> (SSA_NAME_DEF_STMT (rhs2));
+		      tree tmp = rhs1;
+		      rhs1 = rhs2;
+		      rhs2 = tmp;
+		    }
+
+		  tree call_lhs;
+		  if (call_stmt
+		      && gimple_call_builtin_p (call_stmt, BUILT_IN_STRSTR)
+		      && (call_lhs = gimple_call_lhs (call_stmt))
+		      && has_single_use (call_lhs))
+		    {
+		      tree arg0 = gimple_call_arg (call_stmt, 0);
+		      if (operand_equal_p (arg0, rhs2, 0))
+			{
+			  tree arg1 = gimple_call_arg (call_stmt, 1);
+			  tree arg1_len = NULL_TREE;
+			  int idx = get_stridx (arg1);
+
+			  if (idx)
+			    {
+			      if (idx < 0)
+				arg1_len = build_int_cst (size_type_node,
+							  ~idx);
+			      else
+				{
+				  strinfo *si = get_strinfo (idx);
+				  if (si)
+				    arg1_len = get_string_length (si);
+				}
+			    }
+
+			  if (arg1_len == NULL_TREE)
+			    {
+			      gimple_stmt_iterator gsi;
+			      tree strlen_decl;
+			      gimple *strlen_call;
+
+			      strlen_decl = builtin_decl_explicit (BUILT_IN_STRLEN);
+			      strlen_call = gimple_build_call (strlen_decl, 1,
+							       arg1);
+			      arg1_len = make_ssa_name (size_type_node);
+			      gimple_call_set_lhs (strlen_call, arg1_len);
+			      update_stmt (strlen_call);
+			      gsi = gsi_for_stmt (call_stmt);
+			      gsi_insert_before (&gsi, strlen_call, GSI_SAME_STMT);
+			    }
+
+			  gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
+			  tree memcmp_decl = builtin_decl_explicit (BUILT_IN_MEMCMP);
+			  gcall *memcmp_call
+			    = gimple_build_call (memcmp_decl, 3, arg0, arg1,
+						 arg1_len);
+			  tree memcmp_lhs = make_ssa_name (integer_type_node);
+			  gimple_call_set_lhs (memcmp_call, memcmp_lhs);
+			  update_stmt (memcmp_call);
+			  gsi_remove (&gsi, true);
+			  gsi_insert_before (&gsi, memcmp_call, GSI_SAME_STMT);
+
+			  gsi = gsi_for_stmt (stmt);
+			  tree zero = build_zero_cst (TREE_TYPE (memcmp_lhs));
+			  gassign *ga = gimple_build_assign (lhs, code,
+							     memcmp_lhs, zero);
+			  gsi_replace (&gsi, ga, false);
+			  update_ssa (TODO_update_ssa);
+			}
+		    }
+		}
+	    }
+	}
+    else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
 	{
 	  tree type = TREE_TYPE (lhs);
 	  if (TREE_CODE (type) == ARRAY_TYPE)