[PATCH] yysyntax_error: make it manage its own memory.

View: New views
8 Messages — Rating Filter:   Alert me  

[PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

I'd like to push this patch to master and branch-2.5.  I need it for that
lookahead correction implementation I spoke about recently, but I think
it's worthwhile on its own as well.  Any objections?  I haven't looked at
porting this to the other skeletons yet, but would there be any objections
there?

From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001
From: Joel E. Denny <jdenny@...>
Date: Mon, 7 Sep 2009 22:01:16 -0400
Subject: [PATCH] yysyntax_error: make it manage its own memory.

This improves readability, eliminates the need to invoke the
function twice and thus repeat the lookahead collection, and
prepares for future extensions that will make those repetitions
more expensive and that will require additional memory
management in yysyntax_error.
* data/yacc.c (yysyntax_error): Add arguments for message
buffer variables stored in the parser.  Instead of size, return
status similar to yyparse status but indicating success of
message creation.  Import memory management code from...
(yyparse, yypush_parse): ... here.
---
 ChangeLog        |   14 ++
 data/yacc.c      |  124 +++++-----
 src/parse-gram.c |  664 +++++++++++++++++++++++++++---------------------------
 src/parse-gram.h |   10 +-
 4 files changed, 415 insertions(+), 397 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6c97582..be87163 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2009-09-07  Joel E. Denny  <jdenny@...>
+
+ yysyntax_error: make it manage its own memory.
+ This improves readability, eliminates the need to invoke the
+ function twice and thus repeat the lookahead collection, and
+ prepares for future extensions that will make those repetitions
+ more expensive and that will require additional memory
+ management in yysyntax_error.
+ * data/yacc.c (yysyntax_error): Add arguments for message
+ buffer variables stored in the parser.  Instead of size, return
+ status similar to yyparse status but indicating success of
+ message creation.  Import memory management code from...
+ (yyparse, yypush_parse): ... here.
+
 2009-09-10  Joel E. Denny  <jdenny@...>
 
  Clean up yacc.c a little.
diff --git a/data/yacc.c b/data/yacc.c
index 26c5996..013165b 100644
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -889,20 +889,21 @@ yytnamerr (char *yyres, const char *yystr)
 }
 # endif
 
-/* Copy into YYRESULT an error message about the unexpected token
-   YYTOKEN while in state YYSTATE.  Return the number of bytes copied,
-   including the terminating null byte.  If YYRESULT is null, do not
-   copy anything; just return the number of bytes that would be
-   copied.  As a special case, return 0 if an ordinary "syntax error"
-   message will do.  Return YYSIZE_MAXIMUM if overflow occurs during
-   size calculation.  */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yytoken)
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN while in state YYSTATE.  If *YYMSG
+   is not large enough for the message, free *YYMSG iff it's not
+   YYMSGBUF, allocate a new buffer, and update *YYMSG and *YYMSG_ALLOC
+   accordingly.  Return 0 if *YYMSG was successfully written.  Return 1
+   if an ordinary "syntax error" message will suffice instead.  Return 2
+   if memory could not be allocated for the error message.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, char *yymsgbuf,
+                int yystate, int yytoken)
 {
   int yyn = yypact[yystate];
 
   if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
-    return 0;
+    return 1;
   else
     {
       YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
@@ -946,7 +947,7 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
     yysize = yysize1;
   }
 
-        switch (yycount)
+      switch (yycount)
         {
 #define YYCASE_(N, S)                           \
           case N:                               \
@@ -965,28 +966,47 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
       yysize = yysize1;
 
       if (yysize_overflow)
- return YYSIZE_MAXIMUM;
+        yysize = YYSIZE_MAXIMUM;
 
-      if (yyresult)
- {
-  /* Avoid sprintf, as that infringes on the user's name space.
-     Don't have undefined behavior even if the translation
-     produced a string with the wrong number of "%s"s.  */
-  char *yyp = yyresult;
-  int yyi = 0;
-  while ((*yyp = *yyformat) != '\0')
-            if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
-              {
-                yyp += yytnamerr (yyp, yyarg[yyi++]);
-                yyformat += 2;
-              }
-            else
-              {
-                yyp++;
-                yyformat++;
-              }
- }
-      return yysize;
+      if (*yymsg_alloc < yysize && *yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
+        {
+          YYSIZE_T yyalloc = 2 * yysize;
+          if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
+            yyalloc = YYSTACK_ALLOC_MAXIMUM;
+          if (*yymsg != yymsgbuf)
+            YYSTACK_FREE (*yymsg);
+          *yymsg = (char *) YYSTACK_ALLOC (yyalloc);
+          if (*yymsg)
+            *yymsg_alloc = yyalloc;
+          else
+            {
+              *yymsg = yymsgbuf;
+              *yymsg_alloc = sizeof yymsgbuf;
+            }
+        }
+
+      if (! (0 < yysize && yysize <= *yymsg_alloc))
+        return 2;
+
+      /* Avoid sprintf, as that infringes on the user's name space.
+         Don't have undefined behavior even if the translation
+         produced a string with the wrong number of "%s"s.  */
+      {
+        char *yyp = *yymsg;
+        int yyi = 0;
+        while ((*yyp = *yyformat) != '\0')
+          if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+            {
+              yyp += yytnamerr (yyp, yyarg[yyi++]);
+              yyformat += 2;
+            }
+          else
+            {
+              yyp++;
+              yyformat++;
+            }
+        return 0;
+      }
     }
 }
 #endif /* YYERROR_VERBOSE */
@@ -1432,35 +1452,17 @@ yyerrlab:
       yyerror (]b4_yyerror_args[YY_("syntax error"));
 #else
       {
- YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
-  {
-    YYSIZE_T yyalloc = 2 * yysize;
-    if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
-      yyalloc = YYSTACK_ALLOC_MAXIMUM;
-    if (yymsg != yymsgbuf)
-      YYSTACK_FREE (yymsg);
-    yymsg = (char *) YYSTACK_ALLOC (yyalloc);
-    if (yymsg)
-      yymsg_alloc = yyalloc;
-    else
-      {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
-      }
-  }
-
- if (0 < yysize && yysize <= yymsg_alloc)
-  {
-    (void) yysyntax_error (yymsg, yystate, yytoken);
-    yyerror (]b4_yyerror_args[yymsg);
-  }
- else
-  {
-    yyerror (]b4_yyerror_args[YY_("syntax error"));
-    if (yysize != 0)
-      goto yyexhaustedlab;
-  }
+        int yysyntax_error_result =
+          yysyntax_error (&yymsg_alloc, &yymsg, yymsgbuf, yystate,
+                          yytoken);
+        if (yysyntax_error_result == 0)
+          yyerror (]b4_yyerror_args[yymsg);
+        else
+          {
+            yyerror (]b4_yyerror_args[YY_("syntax error"));
+            if (yysyntax_error_result == 2)
+              goto yyexhaustedlab;
+          }
       }
 #endif
     }



Re: [PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Thu, 10 Sep 2009, Joel E. Denny wrote:

> I'd like to push this patch to master and branch-2.5.  I need it for that
> lookahead correction implementation I spoke about recently, but I think
> it's worthwhile on its own as well.  Any objections?

I just took a look at the other skeletons, and I'm now pretty convinced
this is a step in the right direction.  However, I'll wait a day or so in
case someone knows a reason yacc.c needs to keep the current design.

> I haven't looked at
> porting this to the other skeletons yet, but would there be any objections
> there?

Nevermind.  For C++ and Java, we don't have to worry about string memory
management, of course.  glr.c's approach is already similar to the patch
below but it uses longjmp for failures (yuck!) and it doesn't try to hang
onto whatever memory it previously allocated for messages.

> >From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001
> From: Joel E. Denny <jdenny@...>
> Date: Mon, 7 Sep 2009 22:01:16 -0400
> Subject: [PATCH] yysyntax_error: make it manage its own memory.
>
> This improves readability, eliminates the need to invoke the
> function twice and thus repeat the lookahead collection, and
> prepares for future extensions that will make those repetitions
> more expensive and that will require additional memory
> management in yysyntax_error.
> * data/yacc.c (yysyntax_error): Add arguments for message
> buffer variables stored in the parser.  Instead of size, return
> status similar to yyparse status but indicating success of
> message creation.  Import memory management code from...
> (yyparse, yypush_parse): ... here.
> ---
>  ChangeLog        |   14 ++
>  data/yacc.c      |  124 +++++-----
>  src/parse-gram.c |  664 +++++++++++++++++++++++++++---------------------------
>  src/parse-gram.h |   10 +-
>  4 files changed, 415 insertions(+), 397 deletions(-)
>
> diff --git a/ChangeLog b/ChangeLog
> index 6c97582..be87163 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,17 @@
> +2009-09-07  Joel E. Denny  <jdenny@...>
> +
> + yysyntax_error: make it manage its own memory.
> + This improves readability, eliminates the need to invoke the
> + function twice and thus repeat the lookahead collection, and
> + prepares for future extensions that will make those repetitions
> + more expensive and that will require additional memory
> + management in yysyntax_error.
> + * data/yacc.c (yysyntax_error): Add arguments for message
> + buffer variables stored in the parser.  Instead of size, return
> + status similar to yyparse status but indicating success of
> + message creation.  Import memory management code from...
> + (yyparse, yypush_parse): ... here.
> +
>  2009-09-10  Joel E. Denny  <jdenny@...>
>  
>   Clean up yacc.c a little.
> diff --git a/data/yacc.c b/data/yacc.c
> index 26c5996..013165b 100644
> --- a/data/yacc.c
> +++ b/data/yacc.c
> @@ -889,20 +889,21 @@ yytnamerr (char *yyres, const char *yystr)
>  }
>  # endif
>  
> -/* Copy into YYRESULT an error message about the unexpected token
> -   YYTOKEN while in state YYSTATE.  Return the number of bytes copied,
> -   including the terminating null byte.  If YYRESULT is null, do not
> -   copy anything; just return the number of bytes that would be
> -   copied.  As a special case, return 0 if an ordinary "syntax error"
> -   message will do.  Return YYSIZE_MAXIMUM if overflow occurs during
> -   size calculation.  */
> -static YYSIZE_T
> -yysyntax_error (char *yyresult, int yystate, int yytoken)
> +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
> +   about the unexpected token YYTOKEN while in state YYSTATE.  If *YYMSG
> +   is not large enough for the message, free *YYMSG iff it's not
> +   YYMSGBUF, allocate a new buffer, and update *YYMSG and *YYMSG_ALLOC
> +   accordingly.  Return 0 if *YYMSG was successfully written.  Return 1
> +   if an ordinary "syntax error" message will suffice instead.  Return 2
> +   if memory could not be allocated for the error message.  */
> +static int
> +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, char *yymsgbuf,
> +                int yystate, int yytoken)
>  {
>    int yyn = yypact[yystate];
>  
>    if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
> -    return 0;
> +    return 1;
>    else
>      {
>        YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
> @@ -946,7 +947,7 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
>      yysize = yysize1;
>    }
>  
> -        switch (yycount)
> +      switch (yycount)
>          {
>  #define YYCASE_(N, S)                           \
>            case N:                               \
> @@ -965,28 +966,47 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
>        yysize = yysize1;
>  
>        if (yysize_overflow)
> - return YYSIZE_MAXIMUM;
> +        yysize = YYSIZE_MAXIMUM;
>  
> -      if (yyresult)
> - {
> -  /* Avoid sprintf, as that infringes on the user's name space.
> -     Don't have undefined behavior even if the translation
> -     produced a string with the wrong number of "%s"s.  */
> -  char *yyp = yyresult;
> -  int yyi = 0;
> -  while ((*yyp = *yyformat) != '\0')
> -            if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
> -              {
> -                yyp += yytnamerr (yyp, yyarg[yyi++]);
> -                yyformat += 2;
> -              }
> -            else
> -              {
> -                yyp++;
> -                yyformat++;
> -              }
> - }
> -      return yysize;
> +      if (*yymsg_alloc < yysize && *yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
> +        {
> +          YYSIZE_T yyalloc = 2 * yysize;
> +          if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
> +            yyalloc = YYSTACK_ALLOC_MAXIMUM;
> +          if (*yymsg != yymsgbuf)
> +            YYSTACK_FREE (*yymsg);
> +          *yymsg = (char *) YYSTACK_ALLOC (yyalloc);
> +          if (*yymsg)
> +            *yymsg_alloc = yyalloc;
> +          else
> +            {
> +              *yymsg = yymsgbuf;
> +              *yymsg_alloc = sizeof yymsgbuf;
> +            }
> +        }
> +
> +      if (! (0 < yysize && yysize <= *yymsg_alloc))
> +        return 2;
> +
> +      /* Avoid sprintf, as that infringes on the user's name space.
> +         Don't have undefined behavior even if the translation
> +         produced a string with the wrong number of "%s"s.  */
> +      {
> +        char *yyp = *yymsg;
> +        int yyi = 0;
> +        while ((*yyp = *yyformat) != '\0')
> +          if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
> +            {
> +              yyp += yytnamerr (yyp, yyarg[yyi++]);
> +              yyformat += 2;
> +            }
> +          else
> +            {
> +              yyp++;
> +              yyformat++;
> +            }
> +        return 0;
> +      }
>      }
>  }
>  #endif /* YYERROR_VERBOSE */
> @@ -1432,35 +1452,17 @@ yyerrlab:
>        yyerror (]b4_yyerror_args[YY_("syntax error"));
>  #else
>        {
> - YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken);
> - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
> -  {
> -    YYSIZE_T yyalloc = 2 * yysize;
> -    if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
> -      yyalloc = YYSTACK_ALLOC_MAXIMUM;
> -    if (yymsg != yymsgbuf)
> -      YYSTACK_FREE (yymsg);
> -    yymsg = (char *) YYSTACK_ALLOC (yyalloc);
> -    if (yymsg)
> -      yymsg_alloc = yyalloc;
> -    else
> -      {
> - yymsg = yymsgbuf;
> - yymsg_alloc = sizeof yymsgbuf;
> -      }
> -  }
> -
> - if (0 < yysize && yysize <= yymsg_alloc)
> -  {
> -    (void) yysyntax_error (yymsg, yystate, yytoken);
> -    yyerror (]b4_yyerror_args[yymsg);
> -  }
> - else
> -  {
> -    yyerror (]b4_yyerror_args[YY_("syntax error"));
> -    if (yysize != 0)
> -      goto yyexhaustedlab;
> -  }
> +        int yysyntax_error_result =
> +          yysyntax_error (&yymsg_alloc, &yymsg, yymsgbuf, yystate,
> +                          yytoken);
> +        if (yysyntax_error_result == 0)
> +          yyerror (]b4_yyerror_args[yymsg);
> +        else
> +          {
> +            yyerror (]b4_yyerror_args[YY_("syntax error"));
> +            if (yysyntax_error_result == 2)
> +              goto yyexhaustedlab;
> +          }
>        }
>  #endif
>      }
>
>
>
>



Re: [PATCH] yysyntax_error: make it manage its own memory.

by Akim Demaille :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


Le 10 sept. 2009 à 07:16, Joel E. Denny a écrit :

> I'd like to push this patch to master and branch-2.5.  I need it for  
> that
> lookahead correction implementation I spoke about recently, but I  
> think
> it's worthwhile on its own as well.  Any objections?  I haven't  
> looked at
> porting this to the other skeletons yet, but would there be any  
> objections
> there?
>
>> From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00  
>> 2001
> From: Joel E. Denny <jdenny@...>
> Date: Mon, 7 Sep 2009 22:01:16 -0400
> Subject: [PATCH] yysyntax_error: make it manage its own memory.
>
> This improves readability, eliminates the need to invoke the
> function twice and thus repeat the lookahead collection, and
> prepares for future extensions that will make those repetitions
> more expensive and that will require additional memory
> management in yysyntax_error.
> * data/yacc.c (yysyntax_error): Add arguments for message
> buffer variables stored in the parser.  Instead of size, return
> status similar to yyparse status but indicating success of
> message creation.  Import memory management code from...
> (yyparse, yypush_parse): ... here.

Hi Joel,

I'm not sure I understand what's going on here.  The code is meant to  
be used with either alloca or malloc, and now that you moved  
everything in syntax_error, I fail to see how alloca could work as  
expected.  Reading the patch (not the resulting code), it really seems  
like you're possibly returning some pointer to a alloca'ed block of  
memory.

I can't see how alloca could be used if not the way Paul made it.


Re: [PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Thu, 10 Sep 2009, Akim Demaille wrote:

> Le 10 sept. 2009 à 07:16, Joel E. Denny a écrit :

> > > From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001
> > From: Joel E. Denny <jdenny@...>
> > Date: Mon, 7 Sep 2009 22:01:16 -0400
> > Subject: [PATCH] yysyntax_error: make it manage its own memory.

> I'm not sure I understand what's going on here.  The code is meant to be used
> with either alloca or malloc, and now that you moved everything in
> syntax_error, I fail to see how alloca could work as expected.  Reading the
> patch (not the resulting code), it really seems like you're possibly returning
> some pointer to a alloca'ed block of memory.
>
> I can't see how alloca could be used if not the way Paul made it.

You're right!  I forgot about alloca.  Thanks for catching that.

Here are two new patches that I'd like to apply to master and branch-2.5.

The first patch adds two new test groups to regression.at.  I normally
don't like extending regression.at because I feel like it's the
"miscellaneous" or "I can't think of a better category" file.  
Unfortunately, right now, I can't think of a better category.

The first test group exercises alloca with verbose error messages.  It
reveals the bug you found in my above patch.  The second test group
reveals an obscure bug in the existing verbose error message
implementation.  I'm not sure how likely that bug is in a practical
parser, but I guess it doesn't hurt to test for it.  Because both of these
tests seem heavily dependent upon the exact yacc.c implementation we have
now, I've added some sanity checks to them to be sure they stay meaningful
in the future.

The second patch is another attempt to prepare yysyntax_error for my
lookahead correction code but without breaking alloca support.  It also
fixes that obscure bug I just mentioned.  More importantly, I think it
makes the code more readable and slightly more efficient.  The approach
this time is that yysyntax_error only requires a second invocation if
reallocation is needed.

I'm finding this to be a tricky area.  I'd appreciate any comments anyone
has on these patches.

From dc23d678388635a058492294f6cd45105f581c98 Mon Sep 17 00:00:00 2001
From: Joel E. Denny <jdenny@...>
Date: Thu, 10 Sep 2009 18:29:01 -0400
Subject: [PATCH] yacc.c: test yysyntax_error memory management more.

* tests/regression.at (-Dparse.error=verbose and
YYSTACK_USE_ALLOCA): New test group.
(-Dparse.error=verbose overflow): New test group that reveals
an obscure bug.  Expected fail for now.
---
 ChangeLog           |    8 ++
 tests/regression.at |  200 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 0 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6c97582..b508741 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2009-09-10  Joel E. Denny  <jdenny@...>
 
+ yacc.c: test yysyntax_error memory management more.
+ * tests/regression.at (-Dparse.error=verbose and
+ YYSTACK_USE_ALLOCA): New test group.
+ (-Dparse.error=verbose overflow): New test group that reveals
+ an obscure bug.  Expected fail for now.
+
+2009-09-10  Joel E. Denny  <jdenny@...>
+
  Clean up yacc.c a little.
  * data/yacc.c: Clean up M4 for readability, and make output
  whitespace more consistent.  For the main parse function
diff --git a/tests/regression.at b/tests/regression.at
index 6bfc77e..7e17a91 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1237,3 +1237,203 @@ AT_COMPILE([[input]])
 AT_PARSER_CHECK([[./input]])
 
 AT_CLEANUP
+
+
+
+## ---------------------------------------------- ##
+## -Dparse.error=verbose and YYSTACK_USE_ALLOCA.  ##
+## ---------------------------------------------- ##
+
+AT_SETUP([[-Dparse.error=verbose and YYSTACK_USE_ALLOCA]])
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+  #include <stdio.h>
+  void yyerror (char const *);
+  int yylex (void);
+  #define YYSTACK_USE_ALLOCA 1
+}
+
+%define parse.error verbose
+
+%%
+
+start: check syntax_error syntax_error ;
+
+check:
+{
+  if (sizeof yymsgbuf > 128)
+    {
+      fprintf (stderr,
+               "The initial size of yymsgbuf in yyparse has increased\n"
+               "since this test group was last updated.  As a result,\n"
+               "this test group may no longer manage to induce a\n"
+               "reallocation of the syntax error message buffer.\n"
+               "This test group must be adjusted to produce a longer\n"
+               "error message.\n");
+      YYABORT;
+    }
+}
+;
+
+// Induce a syntax error message whose total length is more than
+// sizeof yymsgbuf in yyparse.  Each token here is 64 bytes.
+syntax_error:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| error 'a' 'b' 'c'
+;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+  /* Induce two syntax error messages (which requires full error
+     recovery by shifting 3 tokens) in order to detect any loss of the
+     reallocated buffer.  */
+  static char const *input = "abc";
+  return *input++;
+}
+
+int
+main (void)
+{
+  return yyparse ();
+}
+]])
+
+AT_BISON_CHECK([[-o input.c input.y]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [],
+[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
+syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
+]])
+
+AT_CLEANUP
+
+
+
+## -------------------------------- ##
+## -Dparse.error=verbose overflow.  ##
+## -------------------------------- ##
+
+# Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an
+# invocation of yysyntax_error has caused yymsg_alloc to grow to exactly
+# YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had
+# to be clipped to YYSTACK_ALLOC_MAXIMUM).  Now imagine a subsequent
+# invocation of yysyntax_error that overflows during its size
+# calculation and thus returns YYSIZE_MAXIMUM to yyparse.  Then, yyparse
+# will invoke yyerror using the old contents of yymsg.  This bug needs
+# to be fixed.
+
+AT_SETUP([[-Dparse.error=verbose overflow]])
+
+AT_XFAIL_IF([[:]])
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+  #include <stdio.h>
+  void yyerror (char const *);
+  int yylex (void);
+
+  /* This prevents this test case from having to induce error messages
+     large enough to overflow size_t.  */
+  #define YYSIZE_T unsigned char
+
+  /* Bring in malloc so yacc.c doesn't try to provide a malloc prototype
+     using our YYSIZE_T.  */
+  #include <stdlib.h>
+
+  /* Max depth is usually much smaller than YYSTACK_ALLOC_MAXIMUM, and
+     we don't want gcc to warn everywhere this constant would be too big
+     to make sense for our YYSIZE_T.  */
+  #define YYMAXDEPTH 100
+}
+
+%define parse.error verbose
+
+%%
+
+start: syntax_error1 check syntax_error2 ;
+
+// Induce a syntax error message whose total length causes yymsg in
+// yyparse to be reallocated to size YYSTACK_ALLOC_MAXIMUM, which
+// should be 255.  Each token here is 64 bytes.
+syntax_error1:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| "123456789112345678921234567893123456789412345678951234567896123C"
+| error 'a' 'b' 'c'
+;
+
+check:
+{
+  if (yymsg_alloc != YYSTACK_ALLOC_MAXIMUM
+      || YYSTACK_ALLOC_MAXIMUM != YYSIZE_MAXIMUM
+      || YYSIZE_MAXIMUM != 255)
+    {
+      fprintf (stderr,
+               "The assumptions of this test group are no longer\n"
+               "valid, so it may no longer catch the error it was\n"
+               "designed to catch.\n");
+      YYABORT;
+    }
+}
+;
+
+// Now overflow.
+syntax_error2:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| "123456789112345678921234567893123456789412345678951234567896123C"
+| "123456789112345678921234567893123456789412345678951234567896123D"
+| "123456789112345678921234567893123456789412345678951234567896123E"
+;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+  /* Induce two syntax error messages (which requires full error
+     recovery by shifting 3 tokens).  */
+  static char const *input = "abc";
+  return *input++;
+}
+
+int
+main (void)
+{
+  return yyparse ();
+}
+]])
+
+AT_BISON_CHECK([[-o input.c input.y]])
+
+# gcc warns about tautologies and fallacies involving comparisons for
+# unsigned char.  However, it doesn't produce these same warnings for
+# size_t and many other types when the warnings would seem to make just
+# as much sense.  We ignore the warnings.
+CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//`
+AT_COMPILE([[input]])
+
+AT_PARSER_CHECK([[./input]], [[2]], [],
+[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B or 123456789112345678921234567893123456789412345678951234567896123C
+syntax error
+memory exhausted
+]])
+
+AT_CLEANUP
--
1.5.4.3


From b9bdbec37d95944fac7226374f6c5e325c808558 Mon Sep 17 00:00:00 2001
From: Joel E. Denny <jdenny@...>
Date: Fri, 11 Sep 2009 02:54:53 -0400
Subject: [PATCH] yysyntax_error: avoid duplicate lookahead collection.

Except when memory reallocation is required, this change
eliminates the need to invoke yysyntax_error twice and thus to
repeat the collection of lookaheads.  It also prepares for
future extensions that will make those repetitions more
expensive and that will require additional memory management in
yysyntax_error.  Finally, it fixes an obscure bug already
exercised in the test suite.
* data/yacc.c (yysyntax_error): Add arguments for message
buffer variables stored in the parser.  Instead of size, return
status similar to yyparse status but indicating success of
message creation.  Other than the actual reallocation of the
message buffer, import and clean up memory management code
from...
(yyparse, yypush_parse): ... here.
* tests/regression.at (-Dparse.error=verbose overflow): No
longer an expected failure.
---
 ChangeLog           |   20 ++
 data/yacc.c         |  142 ++++++-----
 src/parse-gram.c    |  686 ++++++++++++++++++++++++++-------------------------
 src/parse-gram.h    |   12 +-
 tests/regression.at |   11 +-
 5 files changed, 452 insertions(+), 419 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b508741..4d877c0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2009-09-11  Joel E. Denny  <jdenny@...>
+
+ yysyntax_error: avoid duplicate lookahead collection.
+ Except when memory reallocation is required, this change
+ eliminates the need to invoke yysyntax_error twice and thus to
+ repeat the collection of lookaheads.  It also prepares for
+ future extensions that will make those repetitions more
+ expensive and that will require additional memory management in
+ yysyntax_error.  Finally, it fixes an obscure bug already
+ exercised in the test suite.
+ * data/yacc.c (yysyntax_error): Add arguments for message
+ buffer variables stored in the parser.  Instead of size, return
+ status similar to yyparse status but indicating success of
+ message creation.  Other than the actual reallocation of the
+ message buffer, import and clean up memory management code
+ from...
+ (yyparse, yypush_parse): ... here.
+ * tests/regression.at (-Dparse.error=verbose overflow): No
+ longer an expected failure.
+
 2009-09-10  Joel E. Denny  <jdenny@...>
 
  yacc.c: test yysyntax_error memory management more.
diff --git a/data/yacc.c b/data/yacc.c
index 26c5996..c98cd55 100644
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -889,26 +889,27 @@ yytnamerr (char *yyres, const char *yystr)
 }
 # endif
 
-/* Copy into YYRESULT an error message about the unexpected token
-   YYTOKEN while in state YYSTATE.  Return the number of bytes copied,
-   including the terminating null byte.  If YYRESULT is null, do not
-   copy anything; just return the number of bytes that would be
-   copied.  As a special case, return 0 if an ordinary "syntax error"
-   message will do.  Return YYSIZE_MAXIMUM if overflow occurs during
-   size calculation.  */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yytoken)
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN while in state YYSTATE.
+
+   Return 0 if *YYMSG was successfully written.  Return 1 if an ordinary
+   "syntax error" message will suffice instead.  Return 2 if *YYMSG is
+   not large enough to hold the message.  In the last case, also set
+   *YYMSG_ALLOC to either (a) the required number of bytes or (b) zero
+   if the required number of bytes is too large to store.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+                int yystate, int yytoken)
 {
   int yyn = yypact[yystate];
 
   if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
-    return 0;
+    return 1;
   else
     {
       YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
       YYSIZE_T yysize = yysize0;
       YYSIZE_T yysize1;
-      int yysize_overflow = 0;
       enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
       /* Internationalized format string. */
       const char *yyformat = 0;
@@ -942,11 +943,17 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
       }
     yyarg[yycount++] = yytname[yyx];
     yysize1 = yysize + yytnamerr (0, yytname[yyx]);
-    yysize_overflow |= (yysize1 < yysize);
+    if (! (yysize <= yysize1
+   && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+      {
+ /* Overflow.  */
+ *yymsg_alloc = 0;
+ return 2;
+      }
     yysize = yysize1;
   }
 
-        switch (yycount)
+      switch (yycount)
         {
 #define YYCASE_(N, S)                           \
           case N:                               \
@@ -961,32 +968,42 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
         }
 
       yysize1 = yysize + yystrlen (yyformat);
-      yysize_overflow |= (yysize1 < yysize);
+      if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+        {
+          /* Overflow.  */
+          *yymsg_alloc = 0;
+          return 2;
+        }
       yysize = yysize1;
 
-      if (yysize_overflow)
- return YYSIZE_MAXIMUM;
+      if (*yymsg_alloc < yysize)
+        {
+          *yymsg_alloc = 2 * yysize;
+          if (! (yysize <= *yymsg_alloc
+                 && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+            *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+          return 2;
+        }
 
-      if (yyresult)
- {
-  /* Avoid sprintf, as that infringes on the user's name space.
-     Don't have undefined behavior even if the translation
-     produced a string with the wrong number of "%s"s.  */
-  char *yyp = yyresult;
-  int yyi = 0;
-  while ((*yyp = *yyformat) != '\0')
-            if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
-              {
-                yyp += yytnamerr (yyp, yyarg[yyi++]);
-                yyformat += 2;
-              }
-            else
-              {
-                yyp++;
-                yyformat++;
-              }
- }
-      return yysize;
+      /* Avoid sprintf, as that infringes on the user's name space.
+         Don't have undefined behavior even if the translation
+         produced a string with the wrong number of "%s"s.  */
+      {
+        char *yyp = *yymsg;
+        int yyi = 0;
+        while ((*yyp = *yyformat) != '\0')
+          if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+            {
+              yyp += yytnamerr (yyp, yyarg[yyi++]);
+              yyformat += 2;
+            }
+          else
+            {
+              yyp++;
+              yyformat++;
+            }
+      }
+      return 0;
     }
 }
 #endif /* YYERROR_VERBOSE */
@@ -1431,37 +1448,28 @@ yyerrlab:
 #if ! YYERROR_VERBOSE
       yyerror (]b4_yyerror_args[YY_("syntax error"));
 #else
-      {
- YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
-  {
-    YYSIZE_T yyalloc = 2 * yysize;
-    if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
-      yyalloc = YYSTACK_ALLOC_MAXIMUM;
-    if (yymsg != yymsgbuf)
-      YYSTACK_FREE (yymsg);
-    yymsg = (char *) YYSTACK_ALLOC (yyalloc);
-    if (yymsg)
-      yymsg_alloc = yyalloc;
-    else
-      {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
-      }
-  }
-
- if (0 < yysize && yysize <= yymsg_alloc)
-  {
-    (void) yysyntax_error (yymsg, yystate, yytoken);
-    yyerror (]b4_yyerror_args[yymsg);
-  }
- else
-  {
-    yyerror (]b4_yyerror_args[YY_("syntax error"));
-    if (yysize != 0)
-      goto yyexhaustedlab;
-  }
-      }
+      while (1)
+        {
+          int yysyntax_error_status =
+            yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken);
+          if (yysyntax_error_status == 2 && yymsg_alloc > 0)
+            {
+              if (yymsg != yymsgbuf)
+                YYSTACK_FREE (yymsg);
+              yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+              if (yymsg)
+                continue;
+              yymsg = yymsgbuf;
+              yymsg_alloc = sizeof yymsgbuf;
+            }
+          if (yysyntax_error_status == 0)
+            yyerror (]b4_yyerror_args[yymsg);
+          else
+            yyerror (]b4_yyerror_args[YY_("syntax error"));
+          if (yysyntax_error_status == 2)
+            goto yyexhaustedlab;
+          break;
+        }
 #endif
     }
 
diff --git a/tests/regression.at b/tests/regression.at
index 7e17a91..efe72af 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1327,16 +1327,13 @@ AT_CLEANUP
 # Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an
 # invocation of yysyntax_error has caused yymsg_alloc to grow to exactly
 # YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had
-# to be clipped to YYSTACK_ALLOC_MAXIMUM).  Now imagine a subsequent
-# invocation of yysyntax_error that overflows during its size
-# calculation and thus returns YYSIZE_MAXIMUM to yyparse.  Then, yyparse
-# will invoke yyerror using the old contents of yymsg.  This bug needs
-# to be fixed.
+# to be clipped to YYSTACK_ALLOC_MAXIMUM).  In an old version of yacc.c,
+# a subsequent invocation of yysyntax_error that overflows during its
+# size calculation would return YYSIZE_MAXIMUM to yyparse.  Then,
+# yyparse would invoke yyerror using the old contents of yymsg.
 
 AT_SETUP([[-Dparse.error=verbose overflow]])
 
-AT_XFAIL_IF([[:]])
-
 AT_DATA_GRAMMAR([input.y],
 [[%code {
   #include <stdio.h>
--
1.5.4.3

Re: [PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Fri, 11 Sep 2009, Joel E. Denny wrote:

> Here are two new patches that I'd like to apply to master and branch-2.5.
>
> The first patch adds two new test groups to regression.at.

> >From dc23d678388635a058492294f6cd45105f581c98 Mon Sep 17 00:00:00 2001
> From: Joel E. Denny <jdenny@...>
> Date: Thu, 10 Sep 2009 18:29:01 -0400
> Subject: [PATCH] yacc.c: test yysyntax_error memory management more.
>
> * tests/regression.at (-Dparse.error=verbose and
> YYSTACK_USE_ALLOCA): New test group.
> (-Dparse.error=verbose overflow): New test group that reveals
> an obscure bug.  Expected fail for now.

The following needs to be folded into that patch to fix
maintainer-push-check.  It also improves the diagnostic.

diff --git a/tests/regression.at b/tests/regression.at
index 7e17a91..de91823 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1382,7 +1382,12 @@ check:
       fprintf (stderr,
                "The assumptions of this test group are no longer\n"
                "valid, so it may no longer catch the error it was\n"
-               "designed to catch.\n");
+               "designed to catch.  Specifically, the following\n"
+               "values should all be 255:\n\n");
+      fprintf (stderr, "  yymsg_alloc = %d\n", yymsg_alloc);
+      fprintf (stderr, "  YYSTACK_ALLOC_MAXIMUM = %d\n",
+               YYSTACK_ALLOC_MAXIMUM);
+      fprintf (stderr, "  YYSIZE_MAXIMUM = %d\n", YYSIZE_MAXIMUM);
       YYABORT;
     }
 }
@@ -1417,6 +1422,10 @@ yylex (void)
 int
 main (void)
 {
+  /* Push parsers throw away the message buffer between tokens, so skip
+     this test under maintainer-push-check.  */
+  if (YYPUSH)
+    return 77;
   return yyparse ();
 }
 ]])



Re: [PATCH] yysyntax_error: make it manage its own memory.

by Akim Demaille :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message


Le 11 sept. 2009 à 09:40, Joel E. Denny a écrit :

> +# gcc warns about tautologies and fallacies involving comparisons for
> +# unsigned char.  However, it doesn't produce these same warnings for
> +# size_t and many other types when the warnings would seem to make  
> just
> +# as much sense.  We ignore the warnings.
> +CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//`
> +AT_COMPILE([[input]])

You might want to introduce something like this in tests/atconfig.in,  
or use "$O0CFLAGS $WARN_CFLAGS" instead of getting bits of $CFLAGS.

Other than that, the patches are fine, please install.  (One note  
though, maybe we already debated about this, but I forgot: I prefer to  
use < and <= and not > and >=.  Another lesson I took from Paul  
Eggert.  Not everybody agrees.)


Re: [PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi Akim.

On Wed, 16 Sep 2009, Akim Demaille wrote:

> Le 11 sept. 2009 à 09:40, Joel E. Denny a écrit :
>
> > +# gcc warns about tautologies and fallacies involving comparisons for
> > +# unsigned char.  However, it doesn't produce these same warnings for
> > +# size_t and many other types when the warnings would seem to make just
> > +# as much sense.  We ignore the warnings.
> > +CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//`
> > +AT_COMPILE([[input]])
>
> You might want to introduce something like this in tests/atconfig.in, or use
> "$O0CFLAGS $WARN_CFLAGS" instead of getting bits of $CFLAGS.
Thanks.

> Other than that, the patches are fine, please install.  (One note though,
> maybe we already debated about this, but I forgot: I prefer to use < and <=
> and not > and >=.  Another lesson I took from Paul Eggert.  Not everybody
> agrees.)

I got that from Paul as well, but sometimes I forget to do it.  Thanks for
the reminder.

I'll fold in the differences below and push those to master.

I'd also like to cherry pick to branch-2.5 because I think it would be
nice to release lookahead correction with IELR.  However, yysyntax_error
on master has already evolved away from branch-2.5, so I first need to
cherry pick some of your commits from master:

  eeb29 Simplify the i18n of the error messages.
  84eed Pass the token type to yysyntax_error.

I'm not sure I'll find time to port lookahead correction to other
skeletons before the release of 2.5, so I'm not currently planning to
search for and cherry pick your corresponding commits for other skeletons.

Does all this sound ok to you?

diff --git a/ChangeLog b/ChangeLog
index 61ba1e1..cc89fde 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 2009-09-12  Joel E. Denny  <jdenny@...>
 
  yacc.c: test yysyntax_error memory management more.
+ * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro.
  * tests/regression.at (-Dparse.error=verbose and
  YYSTACK_USE_ALLOCA): New test group.
  (-Dparse.error=verbose overflow): New test group that reveals
diff --git a/tests/atlocal.in b/tests/atlocal.in
index 91ba674..3e33965 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -10,6 +10,10 @@
 # We want no optimization.
 CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@ @WERROR_CFLAGS@'
 
+# Sometimes a test group needs to ignore gcc warnings, so it locally
+# sets CFLAGS to this.
+NO_WERROR_CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@'
+
 # We need `config.h'.
 CPPFLAGS="-I$abs_top_builddir/lib @CPPFLAGS@"
 
diff --git a/tests/regression.at b/tests/regression.at
index de91823..a08a9a8 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1262,7 +1262,7 @@ start: check syntax_error syntax_error ;
 
 check:
 {
-  if (sizeof yymsgbuf > 128)
+  if (128 < sizeof yymsgbuf)
     {
       fprintf (stderr,
                "The initial size of yymsgbuf in yyparse has increased\n"
@@ -1436,7 +1436,7 @@ AT_BISON_CHECK([[-o input.c input.y]])
 # unsigned char.  However, it doesn't produce these same warnings for
 # size_t and many other types when the warnings would seem to make just
 # as much sense.  We ignore the warnings.
-CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//`
+CFLAGS="$NO_WERROR_CFLAGS"
 AT_COMPILE([[input]])
 
 AT_PARSER_CHECK([[./input]], [[2]], [],
diff --git a/data/yacc.c b/data/yacc.c
index c98cd55..ea2ae14 100644
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -1452,7 +1452,7 @@ yyerrlab:
         {
           int yysyntax_error_status =
             yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken);
-          if (yysyntax_error_status == 2 && yymsg_alloc > 0)
+          if (yysyntax_error_status == 2 && 0 < yymsg_alloc)
             {
               if (yymsg != yymsgbuf)
                 YYSTACK_FREE (yymsg);

Re: [PATCH] yysyntax_error: make it manage its own memory.

by Joel E. Denny-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Mon, 21 Sep 2009, Joel E. Denny wrote:

> I'll fold in the differences below and push those to master.

After that and perhaps some other minor tweaks I've forgotten, the patches
below are the result.  I pushed them to master.

> I'd also like to cherry pick to branch-2.5 because I think it would be
> nice to release lookahead correction with IELR.  However, yysyntax_error
> on master has already evolved away from branch-2.5, so I first need to
> cherry pick some of your commits from master:
>
>   eeb29 Simplify the i18n of the error messages.
>   84eed Pass the token type to yysyntax_error.

I pushed all that too.

> I'm not sure I'll find time to port lookahead correction to other
> skeletons before the release of 2.5, so I'm not currently planning to
> search for and cherry pick your corresponding commits for other skeletons.

As it turns out, I have several more bug fixes that I need to push before
pushing my lookahead correction implementation.  Even though I'm not
worried about whether lookahead correction makes it into the other
skeletons before 2.5, I think those fixes should.  I'd like to post each
complete fix one at a time for comments, but that means I first need to
port them to the other skeletons.  I'll work on all that when I can find
more time.

>From 52cea04ad36abf3ab684b88cba45d6c26dda80c9 Mon Sep 17 00:00:00 2001
From: Joel E. Denny <jdenny@...>
Date: Wed, 23 Sep 2009 17:37:58 -0400
Subject: [PATCH] yysyntax_error: test memory management more.

* tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro.
* tests/regression.at (parse.error=verbose and
YYSTACK_USE_ALLOCA): New test group.
(parse.error=verbose overflow): New test group that reveals an
obscure bug.  Expected fail for now.
---
 ChangeLog           |    9 ++
 tests/atlocal.in    |    4 +
 tests/regression.at |  209 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 222 insertions(+), 0 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a813d69..92cfc13 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2009-09-23  Joel E. Denny  <jdenny@...>
+
+ yysyntax_error: test memory management more.
+ * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro.
+ * tests/regression.at (parse.error=verbose and
+ YYSTACK_USE_ALLOCA): New test group.
+ (parse.error=verbose overflow): New test group that reveals an
+ obscure bug.  Expected fail for now.
+
 2009-10-04  Joel E. Denny  <jdenny@...>
 
  benchmarks: use %debug consistently among grammars.
diff --git a/tests/atlocal.in b/tests/atlocal.in
index 2e46329..9264a40 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -10,6 +10,10 @@
 # We want no optimization.
 CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@ @WERROR_CFLAGS@'
 
+# Sometimes a test group needs to ignore gcc warnings, so it locally
+# sets CFLAGS to this.
+NO_WERROR_CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@'
+
 # We need `config.h'.
 CPPFLAGS="-I$abs_top_builddir/lib @CPPFLAGS@"
 
diff --git a/tests/regression.at b/tests/regression.at
index cfc071e..0420f4e 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1260,3 +1260,212 @@ AT_BISON_CHECK([[-o input.c -Dlr.type=ielr input.y]])
 AT_CHECK([[diff -u lalr.c ielr.c]])
 
 AT_CLEANUP
+
+
+
+## -------------------------------------------- ##
+## parse.error=verbose and YYSTACK_USE_ALLOCA.  ##
+## -------------------------------------------- ##
+
+AT_SETUP([[parse.error=verbose and YYSTACK_USE_ALLOCA]])
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+  #include <stdio.h>
+  void yyerror (char const *);
+  int yylex (void);
+  #define YYSTACK_USE_ALLOCA 1
+}
+
+%define parse.error verbose
+
+%%
+
+start: check syntax_error syntax_error ;
+
+check:
+{
+  if (128 < sizeof yymsgbuf)
+    {
+      fprintf (stderr,
+               "The initial size of yymsgbuf in yyparse has increased\n"
+               "since this test group was last updated.  As a result,\n"
+               "this test group may no longer manage to induce a\n"
+               "reallocation of the syntax error message buffer.\n"
+               "This test group must be adjusted to produce a longer\n"
+               "error message.\n");
+      YYABORT;
+    }
+}
+;
+
+// Induce a syntax error message whose total length is more than
+// sizeof yymsgbuf in yyparse.  Each token here is 64 bytes.
+syntax_error:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| error 'a' 'b' 'c'
+;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+  /* Induce two syntax error messages (which requires full error
+     recovery by shifting 3 tokens) in order to detect any loss of the
+     reallocated buffer.  */
+  static char const *input = "abc";
+  return *input++;
+}
+
+int
+main (void)
+{
+  return yyparse ();
+}
+]])
+
+AT_BISON_CHECK([[-o input.c input.y]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [],
+[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
+syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B
+]])
+
+AT_CLEANUP
+
+
+
+## ------------------------------ ##
+## parse.error=verbose overflow.  ##
+## ------------------------------ ##
+
+# Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an
+# invocation of yysyntax_error has caused yymsg_alloc to grow to exactly
+# YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had
+# to be clipped to YYSTACK_ALLOC_MAXIMUM).  Now imagine a subsequent
+# invocation of yysyntax_error that overflows during its size
+# calculation and thus returns YYSIZE_MAXIMUM to yyparse.  Then, yyparse
+# will invoke yyerror using the old contents of yymsg.  This bug needs
+# to be fixed.
+
+AT_SETUP([[parse.error=verbose overflow]])
+
+AT_XFAIL_IF([[:]])
+
+AT_DATA_GRAMMAR([input.y],
+[[%code {
+  #include <stdio.h>
+  void yyerror (char const *);
+  int yylex (void);
+
+  /* This prevents this test case from having to induce error messages
+     large enough to overflow size_t.  */
+  #define YYSIZE_T unsigned char
+
+  /* Bring in malloc so yacc.c doesn't try to provide a malloc prototype
+     using our YYSIZE_T.  */
+  #include <stdlib.h>
+
+  /* Max depth is usually much smaller than YYSTACK_ALLOC_MAXIMUM, and
+     we don't want gcc to warn everywhere this constant would be too big
+     to make sense for our YYSIZE_T.  */
+  #define YYMAXDEPTH 100
+}
+
+%define parse.error verbose
+
+%%
+
+start: syntax_error1 check syntax_error2 ;
+
+// Induce a syntax error message whose total length causes yymsg in
+// yyparse to be reallocated to size YYSTACK_ALLOC_MAXIMUM, which
+// should be 255.  Each token here is 64 bytes.
+syntax_error1:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| "123456789112345678921234567893123456789412345678951234567896123C"
+| error 'a' 'b' 'c'
+;
+
+check:
+{
+  if (yymsg_alloc != YYSTACK_ALLOC_MAXIMUM
+      || YYSTACK_ALLOC_MAXIMUM != YYSIZE_MAXIMUM
+      || YYSIZE_MAXIMUM != 255)
+    {
+      fprintf (stderr,
+               "The assumptions of this test group are no longer\n"
+               "valid, so it may no longer catch the error it was\n"
+               "designed to catch.  Specifically, the following\n"
+               "values should all be 255:\n\n");
+      fprintf (stderr, "  yymsg_alloc = %d\n", yymsg_alloc);
+      fprintf (stderr, "  YYSTACK_ALLOC_MAXIMUM = %d\n",
+               YYSTACK_ALLOC_MAXIMUM);
+      fprintf (stderr, "  YYSIZE_MAXIMUM = %d\n", YYSIZE_MAXIMUM);
+      YYABORT;
+    }
+}
+;
+
+// Now overflow.
+syntax_error2:
+  "123456789112345678921234567893123456789412345678951234567896123A"
+| "123456789112345678921234567893123456789412345678951234567896123B"
+| "123456789112345678921234567893123456789412345678951234567896123C"
+| "123456789112345678921234567893123456789412345678951234567896123D"
+| "123456789112345678921234567893123456789412345678951234567896123E"
+;
+
+%%
+
+void
+yyerror (char const *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+}
+
+int
+yylex (void)
+{
+  /* Induce two syntax error messages (which requires full error
+     recovery by shifting 3 tokens).  */
+  static char const *input = "abc";
+  return *input++;
+}
+
+int
+main (void)
+{
+  /* Push parsers throw away the message buffer between tokens, so skip
+     this test under maintainer-push-check.  */
+  if (YYPUSH)
+    return 77;
+  return yyparse ();
+}
+]])
+
+AT_BISON_CHECK([[-o input.c input.y]])
+
+# gcc warns about tautologies and fallacies involving comparisons for
+# unsigned char.  However, it doesn't produce these same warnings for
+# size_t and many other types when the warnings would seem to make just
+# as much sense.  We ignore the warnings.
+[CFLAGS="$NO_WERROR_CFLAGS"]
+AT_COMPILE([[input]])
+
+AT_PARSER_CHECK([[./input]], [[2]], [],
+[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B or 123456789112345678921234567893123456789412345678951234567896123C
+syntax error
+memory exhausted
+]])
+
+AT_CLEANUP
--
1.5.4.3


>From 45319f1365eb8d125424f31401d9d33cc02ff4ad Mon Sep 17 00:00:00 2001
From: Joel E. Denny <jdenny@...>
Date: Wed, 23 Sep 2009 17:39:39 -0400
Subject: [PATCH] yysyntax_error: avoid duplicate lookahead collection.

Except when memory reallocation is required, this change
eliminates the need to invoke yysyntax_error twice and thus to
repeat the collection of lookaheads.  It also prepares for
future extensions that will make those repetitions more
expensive and that will require additional memory management in
yysyntax_error.  Finally, it fixes an obscure bug already
exercised in the test suite.
* data/yacc.c (yysyntax_error): Add arguments for message
buffer variables stored in the parser.  Instead of size, return
status similar to yyparse status but indicating success of
message creation.  Other than the actual reallocation of the
message buffer, import and clean up memory management code
from...
(yyparse, yypush_parse): ... here.
* tests/regression.at (parse.error=verbose overflow): No longer
an expected failure.
---
 ChangeLog           |   20 ++
 data/yacc.c         |  142 ++++++-----
 src/parse-gram.c    |  686 ++++++++++++++++++++++++++-------------------------
 src/parse-gram.h    |   12 +-
 tests/regression.at |   11 +-
 5 files changed, 452 insertions(+), 419 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 92cfc13..6395ddd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,25 @@
 2009-09-23  Joel E. Denny  <jdenny@...>
 
+ yysyntax_error: avoid duplicate lookahead collection.
+ Except when memory reallocation is required, this change
+ eliminates the need to invoke yysyntax_error twice and thus to
+ repeat the collection of lookaheads.  It also prepares for
+ future extensions that will make those repetitions more
+ expensive and that will require additional memory management in
+ yysyntax_error.  Finally, it fixes an obscure bug already
+ exercised in the test suite.
+ * data/yacc.c (yysyntax_error): Add arguments for message
+ buffer variables stored in the parser.  Instead of size, return
+ status similar to yyparse status but indicating success of
+ message creation.  Other than the actual reallocation of the
+ message buffer, import and clean up memory management code
+ from...
+ (yyparse, yypush_parse): ... here.
+ * tests/regression.at (parse.error=verbose overflow): No longer
+ an expected failure.
+
+2009-09-23  Joel E. Denny  <jdenny@...>
+
  yysyntax_error: test memory management more.
  * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro.
  * tests/regression.at (parse.error=verbose and
diff --git a/data/yacc.c b/data/yacc.c
index 26c5996..ea2ae14 100644
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -889,26 +889,27 @@ yytnamerr (char *yyres, const char *yystr)
 }
 # endif
 
-/* Copy into YYRESULT an error message about the unexpected token
-   YYTOKEN while in state YYSTATE.  Return the number of bytes copied,
-   including the terminating null byte.  If YYRESULT is null, do not
-   copy anything; just return the number of bytes that would be
-   copied.  As a special case, return 0 if an ordinary "syntax error"
-   message will do.  Return YYSIZE_MAXIMUM if overflow occurs during
-   size calculation.  */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yytoken)
+/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
+   about the unexpected token YYTOKEN while in state YYSTATE.
+
+   Return 0 if *YYMSG was successfully written.  Return 1 if an ordinary
+   "syntax error" message will suffice instead.  Return 2 if *YYMSG is
+   not large enough to hold the message.  In the last case, also set
+   *YYMSG_ALLOC to either (a) the required number of bytes or (b) zero
+   if the required number of bytes is too large to store.  */
+static int
+yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
+                int yystate, int yytoken)
 {
   int yyn = yypact[yystate];
 
   if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
-    return 0;
+    return 1;
   else
     {
       YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
       YYSIZE_T yysize = yysize0;
       YYSIZE_T yysize1;
-      int yysize_overflow = 0;
       enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
       /* Internationalized format string. */
       const char *yyformat = 0;
@@ -942,11 +943,17 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
       }
     yyarg[yycount++] = yytname[yyx];
     yysize1 = yysize + yytnamerr (0, yytname[yyx]);
-    yysize_overflow |= (yysize1 < yysize);
+    if (! (yysize <= yysize1
+   && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+      {
+ /* Overflow.  */
+ *yymsg_alloc = 0;
+ return 2;
+      }
     yysize = yysize1;
   }
 
-        switch (yycount)
+      switch (yycount)
         {
 #define YYCASE_(N, S)                           \
           case N:                               \
@@ -961,32 +968,42 @@ yysyntax_error (char *yyresult, int yystate, int yytoken)
         }
 
       yysize1 = yysize + yystrlen (yyformat);
-      yysize_overflow |= (yysize1 < yysize);
+      if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+        {
+          /* Overflow.  */
+          *yymsg_alloc = 0;
+          return 2;
+        }
       yysize = yysize1;
 
-      if (yysize_overflow)
- return YYSIZE_MAXIMUM;
+      if (*yymsg_alloc < yysize)
+        {
+          *yymsg_alloc = 2 * yysize;
+          if (! (yysize <= *yymsg_alloc
+                 && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
+            *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
+          return 2;
+        }
 
-      if (yyresult)
- {
-  /* Avoid sprintf, as that infringes on the user's name space.
-     Don't have undefined behavior even if the translation
-     produced a string with the wrong number of "%s"s.  */
-  char *yyp = yyresult;
-  int yyi = 0;
-  while ((*yyp = *yyformat) != '\0')
-            if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
-              {
-                yyp += yytnamerr (yyp, yyarg[yyi++]);
-                yyformat += 2;
-              }
-            else
-              {
-                yyp++;
-                yyformat++;
-              }
- }
-      return yysize;
+      /* Avoid sprintf, as that infringes on the user's name space.
+         Don't have undefined behavior even if the translation
+         produced a string with the wrong number of "%s"s.  */
+      {
+        char *yyp = *yymsg;
+        int yyi = 0;
+        while ((*yyp = *yyformat) != '\0')
+          if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
+            {
+              yyp += yytnamerr (yyp, yyarg[yyi++]);
+              yyformat += 2;
+            }
+          else
+            {
+              yyp++;
+              yyformat++;
+            }
+      }
+      return 0;
     }
 }
 #endif /* YYERROR_VERBOSE */
@@ -1431,37 +1448,28 @@ yyerrlab:
 #if ! YYERROR_VERBOSE
       yyerror (]b4_yyerror_args[YY_("syntax error"));
 #else
-      {
- YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
-  {
-    YYSIZE_T yyalloc = 2 * yysize;
-    if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
-      yyalloc = YYSTACK_ALLOC_MAXIMUM;
-    if (yymsg != yymsgbuf)
-      YYSTACK_FREE (yymsg);
-    yymsg = (char *) YYSTACK_ALLOC (yyalloc);
-    if (yymsg)
-      yymsg_alloc = yyalloc;
-    else
-      {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
-      }
-  }
-
- if (0 < yysize && yysize <= yymsg_alloc)
-  {
-    (void) yysyntax_error (yymsg, yystate, yytoken);
-    yyerror (]b4_yyerror_args[yymsg);
-  }
- else
-  {
-    yyerror (]b4_yyerror_args[YY_("syntax error"));
-    if (yysize != 0)
-      goto yyexhaustedlab;
-  }
-      }
+      while (1)
+        {
+          int yysyntax_error_status =
+            yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken);
+          if (yysyntax_error_status == 2 && 0 < yymsg_alloc)
+            {
+              if (yymsg != yymsgbuf)
+                YYSTACK_FREE (yymsg);
+              yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
+              if (yymsg)
+                continue;
+              yymsg = yymsgbuf;
+              yymsg_alloc = sizeof yymsgbuf;
+            }
+          if (yysyntax_error_status == 0)
+            yyerror (]b4_yyerror_args[yymsg);
+          else
+            yyerror (]b4_yyerror_args[YY_("syntax error"));
+          if (yysyntax_error_status == 2)
+            goto yyexhaustedlab;
+          break;
+        }
 #endif
     }
 
diff --git a/tests/regression.at b/tests/regression.at
index 0420f4e..3857916 100644
--- a/tests/regression.at
+++ b/tests/regression.at
@@ -1350,16 +1350,13 @@ AT_CLEANUP
 # Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an
 # invocation of yysyntax_error has caused yymsg_alloc to grow to exactly
 # YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had
-# to be clipped to YYSTACK_ALLOC_MAXIMUM).  Now imagine a subsequent
-# invocation of yysyntax_error that overflows during its size
-# calculation and thus returns YYSIZE_MAXIMUM to yyparse.  Then, yyparse
-# will invoke yyerror using the old contents of yymsg.  This bug needs
-# to be fixed.
+# to be clipped to YYSTACK_ALLOC_MAXIMUM).  In an old version of yacc.c,
+# a subsequent invocation of yysyntax_error that overflows during its
+# size calculation would return YYSIZE_MAXIMUM to yyparse.  Then,
+# yyparse would invoke yyerror using the old contents of yymsg.
 
 AT_SETUP([[parse.error=verbose overflow]])
 
-AT_XFAIL_IF([[:]])
-
 AT_DATA_GRAMMAR([input.y],
 [[%code {
   #include <stdio.h>
--
1.5.4.3