more m4 optimizations

View: New views
2 Messages — Rating Filter:   Alert me  

more m4 optimizations

by Eric Blake :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

> Hmm, while writing this, I noticed a potential optimization - there is no
> need for m4_escape to waste time on four separate m4_bpatsubst if string
> didn't have any problematic bytes in the first place.  I'll whip out a
> followup patch shortly.

As promised.  Well, it turned into more than just optimizing m4_escape, since I
noticed some subtle bugs in m4sh.  AS_LITERAL_IF wasn't too happy with
unbalanced parentheses, even though that can occur in well-formed shell code.  
And AS_TR_SH and AS_TR_CPP underquoted things, which could inadvertently lead
to the wrong macros being expanded after case changes have completed.  
Meanwhile, idioms such as m4_cond([test1], [1], [$3], [test2], [2], [$2], [$3])
get expensive when $3 is arbitrarily long; rewriting them as
 m4_if(m4_cond([test1], [1], [], [test2], [2], [-]), [-], [$2], [$3])
minimizes the scanning effort required by m4.


From a75bdb89701e9d42e784de4237a042973182a999 Mon Sep 17 00:00:00 2001
From: Eric Blake <ebb9@...>
Date: Wed, 28 Oct 2009 11:23:45 -0600
Subject: [PATCH 1/3] Optimize m4_escape for common case.

* lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is
already sane, by copying from AS_LITERAL_IF.  Move guts...
(_m4_escape): ...into new helper.

Signed-off-by: Eric Blake <ebb9@...>
---
 ChangeLog              |    5 +++++
 lib/m4sugar/m4sugar.m4 |   16 ++++++++++++++++
 2 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 41dfb1e..a972039 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2009-10-28  Eric Blake  <ebb9@...>

+ Optimize m4_escape for common case.
+ * lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is
+ already sane, by copying from AS_LITERAL_IF.  Move guts...
+ (_m4_escape): ...into new helper.
+
  Fix m4_text_wrap handling of quoted whitespace.
  * lib/m4sugar/m4sugar.m4 (m4_escape): New macro.
  (m4_text_wrap): Use it to avoid issues with embedded [ and ].
diff --git a/lib/m4sugar/m4sugar.m4 b/lib/m4sugar/m4sugar.m4
index 6fddff4..e0cacfb 100644
--- a/lib/m4sugar/m4sugar.m4
+++ b/lib/m4sugar/m4sugar.m4
@@ -2546,7 +2546,23 @@ m4_define([m4_append_uniq_w],
 # -----------------
 # Output quoted STRING, but with embedded #, $, [ and ] turned into
 # quadrigraphs.
+#
+# It is faster to check if STRING is already good using m4_translit
+# than to blindly perform four m4_bpatsubst.
+#
+# Because the translit is stripping quotes, it must also neutralize
+# anything that might be in a macro name, as well as comments, commas,
+# and parentheses.  All the problem characters are unified so that a
+# single m4_index can scan the result.
+#
+# Rather than expand m4_defn every time m4_escape is expanded, we
+# inline its expansion up front.
 m4_define([m4_escape],
+[m4_if(m4_index(m4_translit([$1],
+   [[]#,()]]m4_dquote(m4_defn([m4_cr_symbols2]))[, [$$$]), [$]),
+  [-1], [m4_echo], [_$0])([$1])])
+
+m4_define([_m4_escape],
 [m4_changequote([-=<{(],[)}>=-])]dnl
 [m4_bpatsubst(m4_bpatsubst(m4_bpatsubst(m4_bpatsubst(
   -=<{(-=<{(-=<{(-=<{(-=<{($1)}>=-)}>=-)}>=-)}>=-)}>=-,
--
1.6.4.2


From c08d93bedbc554e7ed92e45c97a2666719176cb5 Mon Sep 17 00:00:00 2001
From: Eric Blake <ebb9@...>
Date: Wed, 28 Oct 2009 12:21:36 -0600
Subject: [PATCH 2/3] Minor optimizations to m4sh.

* lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF)
(AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting
if-true and if-false arguments only once.

Signed-off-by: Eric Blake <ebb9@...>
---
 ChangeLog           |    5 +++++
 lib/m4sugar/m4sh.m4 |   25 ++++++++++++++-----------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a972039..f28d97c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2009-10-28  Eric Blake  <ebb9@...>

+ Minor optimizations to m4sh.
+ * lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF)
+ (AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting
+ if-true and if-false arguments only once.
+
  Optimize m4_escape for common case.
  * lib/m4sugar/m4sugar.m4 (m4_escape): Don't use regex if string is
  already sane, by copying from AS_LITERAL_IF.  Move guts...
diff --git a/lib/m4sugar/m4sh.m4 b/lib/m4sugar/m4sh.m4
index 4864088..e3957d0 100644
--- a/lib/m4sugar/m4sh.m4
+++ b/lib/m4sugar/m4sh.m4
@@ -1507,15 +1507,15 @@ m4_define([AS_HELP_STRING],
 # we worry if the first character also matches m4_cr_symbol1 (ie. does not
 # match m4_cr_digit).
 m4_define([AS_IDENTIFIER_IF],
-[m4_if(m4_index([$1], [@]), [-1],
-       [_$0($@)],
-       [_$0(m4_bpatsubst([[$1]], [@&t@]), [$2], [$3])])])
+[m4_if(_$0(m4_if(m4_index([$1], [@]), [-1],
+  [[$1]], [m4_bpatsubst([[$1]], [@&t@])])), [-], [$2], [$3])])
+
 m4_define([_AS_IDENTIFIER_IF],
-[m4_cond([[$1]], [], [$3],
+[m4_cond([[$1]], [], [],
  [m4_eval(m4_len(m4_translit([[$1]], ]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[)) > 0)], [1], [$3],
+m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[)) > 0)], [1], [],
  [m4_len(m4_translit(m4_format([[%.1s]], [$1]), ]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [$2], [$3])])
+m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))], [0], [-], [])])


 # AS_LITERAL_IF(EXPRESSION, IF-LITERAL, IF-NOT-LITERAL)
@@ -1544,13 +1544,16 @@ m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))],
[0], [$2], [$3])])
 # Rather than expand m4_defn every time AS_LITERAL_IF is expanded, we
 # inline its expansion up front.
 m4_define([AS_LITERAL_IF],
-[m4_cond([m4_eval(m4_index(m4_quote($1), [@S|@]) == -1)], [0], [$3],
+[m4_if(_$0([$1]), [-], [$2], [$3])])
+
+m4_define([_AS_LITERAL_IF],
+[m4_cond([m4_eval(m4_index(m4_quote($1), [@S|@]) == -1)], [0], [],
  [m4_index(m4_translit(m4_quote($1),
        [[]`,#]]]dnl
 m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[,
        [$$$]),
-   [$])], [-1], [$2],
- [$3])])
+   [$])], [-1], [-],
+ [])])


 # AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]])
@@ -1909,9 +1912,9 @@ m4_define([AS_VAR_GET],
 # Polymorphic, and avoids sh expansion error upon interrupt or term signal.
 m4_define([AS_VAR_IF],
 [AS_LITERAL_IF([$1],
-  [AS_IF([test "x$$1" = x""$2], [$3], [$4])],
+  [AS_IF([test "x$$1" = x""$2]],
   [AS_VAR_COPY([as_val], [$1])
-   AS_IF([test "x$as_val" = x""$2], [$3], [$4])])])
+   AS_IF([test "x$as_val" = x""$2]]), [$3], [$4])])


 # AS_VAR_PUSHDEF and AS_VAR_POPDEF
--
1.6.4.2


From 70fab56b1c20869f54628426e0f3ae1db8cb6f62 Mon Sep 17 00:00:00 2001
From: Eric Blake <ebb9@...>
Date: Wed, 28 Oct 2009 15:58:43 -0600
Subject: [PATCH 3/3] Fix corner cases in AS_LITERAL_IF and AS_TR_SH.

* lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Fix bug with unbalanced
parens.
(_AS_LITERAL_IF): Assume proper quoting.  Move guts...
(_AS_LITERAL_IF_): into new helper.
(AS_TR_SH, AS_TR_CPP): Fix bugs with expansion of wrong macro.
Move guts...
(_AS_TR_SH, _AS_TR_SH_LITERAL, _AS_TR_SH_INDIR, _AS_TR_CPP)
(_AS_TR_CPP_LITERAL, _AS_TR_CPP_INDIR): ...into new helpers.
(AS_VAR_PUSHDEF): Hoist m4_require, by moving guts...
(_AS_VAR_PUSHDEF): ...into new helper.
* tests/m4sh.at (AS@&t@_LITERAL_IF): Enhance test.

Signed-off-by: Eric Blake <ebb9@...>
---
 ChangeLog           |   13 +++++++++
 lib/m4sugar/m4sh.m4 |   68 ++++++++++++++++++++++++++++++--------------------
 tests/m4sh.at       |    4 +-
 3 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f28d97c..68c5f5e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
 2009-10-28  Eric Blake  <ebb9@...>

+ Fix corner cases in AS_LITERAL_IF and AS_TR_SH.
+ * lib/m4sugar/m4sh.m4 (AS_LITERAL_IF): Fix bug with unbalanced
+ parens.
+ (_AS_LITERAL_IF): Assume proper quoting.  Move guts...
+ (_AS_LITERAL_IF_): into new helper.
+ (AS_TR_SH, AS_TR_CPP): Fix bugs with expansion of wrong macro.
+ Move guts...
+ (_AS_TR_SH, _AS_TR_SH_LITERAL, _AS_TR_SH_INDIR, _AS_TR_CPP)
+ (_AS_TR_CPP_LITERAL, _AS_TR_CPP_INDIR): ...into new helpers.
+ (AS_VAR_PUSHDEF): Hoist m4_require, by moving guts...
+ (_AS_VAR_PUSHDEF): ...into new helper.
+ * tests/m4sh.at (AS@&t@_LITERAL_IF): Enhance test.
+
  Minor optimizations to m4sh.
  * lib/m4sugar/m4sh.m4 (AS_VAR_IF, AS_IDENTIFIER_IF)
  (AS_LITERAL_IF): Parse fewer bytes during expansion, by visiting
diff --git a/lib/m4sugar/m4sh.m4 b/lib/m4sugar/m4sh.m4
index e3957d0..cd45be2 100644
--- a/lib/m4sugar/m4sh.m4
+++ b/lib/m4sugar/m4sh.m4
@@ -1537,23 +1537,23 @@ m4_dquote(m4_dquote(m4_defn([m4_cr_symbols1])))[[))],
[0], [-], [])])
 # profiling shows that it is faster to use m4_translit.
 #
 # Because the translit is stripping quotes, it must also neutralize anything
-# that might be in a macro name, as well as comments and commas.  All the
-# problem characters are unified so that a single m4_index can scan the
-# result.
+# that might be in a macro name, as well as comments, commas, or unbalanced
+# parentheses.  All the problem characters are unified so that a single
+# m4_index can scan the result.
 #
 # Rather than expand m4_defn every time AS_LITERAL_IF is expanded, we
 # inline its expansion up front.
 m4_define([AS_LITERAL_IF],
-[m4_if(_$0([$1]), [-], [$2], [$3])])
+[_$0(m4_expand([$1]), [$2], [$3])])

 m4_define([_AS_LITERAL_IF],
-[m4_cond([m4_eval(m4_index(m4_quote($1), [@S|@]) == -1)], [0], [],
- [m4_index(m4_translit(m4_quote($1),
-       [[]`,#]]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[,
-       [$$$]),
-   [$])], [-1], [-],
- [])])
+[m4_if($0_([$1]), [-], [$2], [$3])])
+
+m4_define([_AS_LITERAL_IF_],
+[m4_cond([m4_eval(m4_index([$1], [@S|@]) == -1)], [0], [],
+ [m4_index(m4_translit([$1], [[]`,#()]]]dnl
+m4_dquote(m4_dquote(m4_defn([m4_cr_symbols2])))[[, [$$$]),
+   [$])], [-1], [-], [])])


 # AS_TMPDIR(PREFIX, [DIRECTORY = $TMPDIR [= /tmp]])
@@ -1739,13 +1739,18 @@ as_tr_sh="eval sed 'y%*+%pp%;s%[[^_$as_cr_alnum]]%_%g'"
 # For speed, we inline the literal definitions that can be computed up front.
 m4_defun_init([AS_TR_SH],
 [AS_REQUIRE([_$0_PREPARE])],
-[AS_LITERAL_IF([$1],
-      [m4_translit([$1], [*+[]]]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_not_symbols2])))[[,
- [pp[]]]]dnl
-m4_dquote(m4_dquote(m4_for(,1,255,,[[_]])))[[)],
-  [`AS_ECHO(["_AS_ESCAPE(m4_dquote(m4_expand([$1])),
-    [`], [\])"]) | $as_tr_sh`])])
+[_$0(m4_expand([$1]))])
+
+m4_define([_AS_TR_SH],
+[_AS_LITERAL_IF([$1], [$0_LITERAL], [$0_INDIR])([$1])])
+
+m4_define([_AS_TR_SH_LITERAL],
+[m4_translit([[$1]],
+  [*+[]]]m4_dquote(m4_defn([m4_cr_not_symbols2]))[,
+  [pp[]]]m4_dquote(m4_for(,1,255,,[[_]]))[)])
+
+m4_define([_AS_TR_SH_INDIR],
+[`AS_ECHO(["_AS_ESCAPE([[$1]], [`], [\])"]) | $as_tr_sh`])


 # _AS_TR_CPP_PREPARE
@@ -1766,12 +1771,18 @@ as_tr_cpp="eval sed 'y%*$as_cr_letters%
P$as_cr_LETTERS%;s%[[^_$as_cr_alnum]]%_%g
 # See implementation comments in AS_TR_SH.
 m4_defun_init([AS_TR_CPP],
 [AS_REQUIRE([_$0_PREPARE])],
-[AS_LITERAL_IF([$1],
-      [m4_translit([$1], [*[]]]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_letters])m4_defn([m4_cr_not_symbols2])))[[,
- [P[]]]]dnl
-m4_dquote(m4_dquote(m4_defn([m4_cr_LETTERS])m4_for(,1,255,,[[_]])))[[)],
-      [`AS_ECHO(["$1"]) | $as_tr_cpp`])])
+[_$0(m4_expand([$1]))])
+
+m4_define([_AS_TR_CPP],
+[_AS_LITERAL_IF([$1], [$0_LITERAL], [$0_INDIR])([$1])])
+
+m4_define([_AS_TR_CPP_LITERAL],
+[m4_translit([$1],
+  [*[]]]m4_dquote(m4_defn([m4_cr_letters])m4_defn([m4_cr_not_symbols2]))[,
+  [P[]]]m4_dquote(m4_defn([m4_cr_LETTERS])m4_for(,1,255,,[[_]]))[)])
+
+m4_define([_AS_TR_CPP_INDIR],
+[`AS_ECHO(["$1"]) | $as_tr_cpp`])


 # _AS_TR_PREPARE
@@ -1962,9 +1973,12 @@ m4_define([AS_VAR_POPDEF],
 # don't work.  Therefore, we must require the preparation ourselves.
 m4_defun_init([AS_VAR_PUSHDEF],
 [AS_REQUIRE([_AS_TR_SH_PREPARE])],
-[AS_LITERAL_IF([$2],
-       [m4_pushdef([$1], [AS_TR_SH($2)])],
-       [as_$1=AS_TR_SH($2)
+[_$0([$1], m4_expand([$2]))])
+
+m4_define([_AS_VAR_PUSHDEF],
+[_AS_LITERAL_IF([$2],
+ [m4_pushdef([$1], [_AS_TR_SH_LITERAL([$2])])],
+ [as_$1=_AS_TR_SH_INDIR([$2])
 m4_pushdef([$1], [$as_[$1]])])])


diff --git a/tests/m4sh.at b/tests/m4sh.at
index 4627a48..5ff9fe8 100644
--- a/tests/m4sh.at
+++ b/tests/m4sh.at
@@ -1041,13 +1041,13 @@ AT_DATA_M4SH([script.as], [[dnl
 AS_INIT
 echo AS_LITERAL_IF([lit], [ok], [ERR]) 1
 echo AS_LITERAL_IF([l$it], [ERR], [ok]) 2
-echo AS_LITERAL_IF([l``it], [ERR], [ok]) 3
+echo AS_LITERAL_IF([l`case a in b) ;; esac`it], [ERR], [ok]) 3
 m4_define([mac], [lit])
 echo AS_LITERAL_IF([mac], [ok], [ERR]) 4
 echo AS_LITERAL_IF([mac($, ``)], [ok], [ERR]) 5
 m4_define([mac], [l$it])
 echo AS_LITERAL_IF([mac], [ERR], [ok]) 6
-m4_define([mac], [l`it])
+m4_define([mac], [l``it])
 echo AS_LITERAL_IF([mac], [ERR], [ok]) 7
 ]])

--
1.6.4.2






Re: more m4 optimizations

by Paolo Bonzini-6 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On 10/28/2009 11:55 PM, Eric Blake wrote:
> As promised.

Looks fine.

Paolo