|
View:
New views
8 Messages
—
Rating Filter:
Alert me
|
|
|
[PATCH] yysyntax_error: make it manage its own memory.I'd like to push this patch to master and branch-2.5. I need it for that
lookahead correction implementation I spoke about recently, but I think it's worthwhile on its own as well. Any objections? I haven't looked at porting this to the other skeletons yet, but would there be any objections there? From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001 From: Joel E. Denny <jdenny@...> Date: Mon, 7 Sep 2009 22:01:16 -0400 Subject: [PATCH] yysyntax_error: make it manage its own memory. This improves readability, eliminates the need to invoke the function twice and thus repeat the lookahead collection, and prepares for future extensions that will make those repetitions more expensive and that will require additional memory management in yysyntax_error. * data/yacc.c (yysyntax_error): Add arguments for message buffer variables stored in the parser. Instead of size, return status similar to yyparse status but indicating success of message creation. Import memory management code from... (yyparse, yypush_parse): ... here. --- ChangeLog | 14 ++ data/yacc.c | 124 +++++----- src/parse-gram.c | 664 +++++++++++++++++++++++++++--------------------------- src/parse-gram.h | 10 +- 4 files changed, 415 insertions(+), 397 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c97582..be87163 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2009-09-07 Joel E. Denny <jdenny@...> + + yysyntax_error: make it manage its own memory. + This improves readability, eliminates the need to invoke the + function twice and thus repeat the lookahead collection, and + prepares for future extensions that will make those repetitions + more expensive and that will require additional memory + management in yysyntax_error. + * data/yacc.c (yysyntax_error): Add arguments for message + buffer variables stored in the parser. Instead of size, return + status similar to yyparse status but indicating success of + message creation. Import memory management code from... + (yyparse, yypush_parse): ... here. + 2009-09-10 Joel E. Denny <jdenny@...> Clean up yacc.c a little. diff --git a/data/yacc.c b/data/yacc.c index 26c5996..013165b 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -889,20 +889,21 @@ yytnamerr (char *yyres, const char *yystr) } # endif -/* Copy into YYRESULT an error message about the unexpected token - YYTOKEN while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yytoken) +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN while in state YYSTATE. If *YYMSG + is not large enough for the message, free *YYMSG iff it's not + YYMSGBUF, allocate a new buffer, and update *YYMSG and *YYMSG_ALLOC + accordingly. Return 0 if *YYMSG was successfully written. Return 1 + if an ordinary "syntax error" message will suffice instead. Return 2 + if memory could not be allocated for the error message. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, char *yymsgbuf, + int yystate, int yytoken) { int yyn = yypact[yystate]; if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; + return 1; else { YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); @@ -946,7 +947,7 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) yysize = yysize1; } - switch (yycount) + switch (yycount) { #define YYCASE_(N, S) \ case N: \ @@ -965,28 +966,47 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) yysize = yysize1; if (yysize_overflow) - return YYSIZE_MAXIMUM; + yysize = YYSIZE_MAXIMUM; - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - yyp++; - yyformat++; - } - } - return yysize; + if (*yymsg_alloc < yysize && *yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) + { + YYSIZE_T yyalloc = 2 * yysize; + if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) + yyalloc = YYSTACK_ALLOC_MAXIMUM; + if (*yymsg != yymsgbuf) + YYSTACK_FREE (*yymsg); + *yymsg = (char *) YYSTACK_ALLOC (yyalloc); + if (*yymsg) + *yymsg_alloc = yyalloc; + else + { + *yymsg = yymsgbuf; + *yymsg_alloc = sizeof yymsgbuf; + } + } + + if (! (0 < yysize && yysize <= *yymsg_alloc)) + return 2; + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + return 0; + } } } #endif /* YYERROR_VERBOSE */ @@ -1432,35 +1452,17 @@ yyerrlab: yyerror (]b4_yyerror_args[YY_("syntax error")); #else { - YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yytoken); - yyerror (]b4_yyerror_args[yymsg); - } - else - { - yyerror (]b4_yyerror_args[YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } + int yysyntax_error_result = + yysyntax_error (&yymsg_alloc, &yymsg, yymsgbuf, yystate, + yytoken); + if (yysyntax_error_result == 0) + yyerror (]b4_yyerror_args[yymsg); + else + { + yyerror (]b4_yyerror_args[YY_("syntax error")); + if (yysyntax_error_result == 2) + goto yyexhaustedlab; + } } #endif } |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.On Thu, 10 Sep 2009, Joel E. Denny wrote:
> I'd like to push this patch to master and branch-2.5. I need it for that > lookahead correction implementation I spoke about recently, but I think > it's worthwhile on its own as well. Any objections? I just took a look at the other skeletons, and I'm now pretty convinced this is a step in the right direction. However, I'll wait a day or so in case someone knows a reason yacc.c needs to keep the current design. > I haven't looked at > porting this to the other skeletons yet, but would there be any objections > there? Nevermind. For C++ and Java, we don't have to worry about string memory management, of course. glr.c's approach is already similar to the patch below but it uses longjmp for failures (yuck!) and it doesn't try to hang onto whatever memory it previously allocated for messages. > >From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001 > From: Joel E. Denny <jdenny@...> > Date: Mon, 7 Sep 2009 22:01:16 -0400 > Subject: [PATCH] yysyntax_error: make it manage its own memory. > > This improves readability, eliminates the need to invoke the > function twice and thus repeat the lookahead collection, and > prepares for future extensions that will make those repetitions > more expensive and that will require additional memory > management in yysyntax_error. > * data/yacc.c (yysyntax_error): Add arguments for message > buffer variables stored in the parser. Instead of size, return > status similar to yyparse status but indicating success of > message creation. Import memory management code from... > (yyparse, yypush_parse): ... here. > --- > ChangeLog | 14 ++ > data/yacc.c | 124 +++++----- > src/parse-gram.c | 664 +++++++++++++++++++++++++++--------------------------- > src/parse-gram.h | 10 +- > 4 files changed, 415 insertions(+), 397 deletions(-) > > diff --git a/ChangeLog b/ChangeLog > index 6c97582..be87163 100644 > --- a/ChangeLog > +++ b/ChangeLog > @@ -1,3 +1,17 @@ > +2009-09-07 Joel E. Denny <jdenny@...> > + > + yysyntax_error: make it manage its own memory. > + This improves readability, eliminates the need to invoke the > + function twice and thus repeat the lookahead collection, and > + prepares for future extensions that will make those repetitions > + more expensive and that will require additional memory > + management in yysyntax_error. > + * data/yacc.c (yysyntax_error): Add arguments for message > + buffer variables stored in the parser. Instead of size, return > + status similar to yyparse status but indicating success of > + message creation. Import memory management code from... > + (yyparse, yypush_parse): ... here. > + > 2009-09-10 Joel E. Denny <jdenny@...> > > Clean up yacc.c a little. > diff --git a/data/yacc.c b/data/yacc.c > index 26c5996..013165b 100644 > --- a/data/yacc.c > +++ b/data/yacc.c > @@ -889,20 +889,21 @@ yytnamerr (char *yyres, const char *yystr) > } > # endif > > -/* Copy into YYRESULT an error message about the unexpected token > - YYTOKEN while in state YYSTATE. Return the number of bytes copied, > - including the terminating null byte. If YYRESULT is null, do not > - copy anything; just return the number of bytes that would be > - copied. As a special case, return 0 if an ordinary "syntax error" > - message will do. Return YYSIZE_MAXIMUM if overflow occurs during > - size calculation. */ > -static YYSIZE_T > -yysyntax_error (char *yyresult, int yystate, int yytoken) > +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message > + about the unexpected token YYTOKEN while in state YYSTATE. If *YYMSG > + is not large enough for the message, free *YYMSG iff it's not > + YYMSGBUF, allocate a new buffer, and update *YYMSG and *YYMSG_ALLOC > + accordingly. Return 0 if *YYMSG was successfully written. Return 1 > + if an ordinary "syntax error" message will suffice instead. Return 2 > + if memory could not be allocated for the error message. */ > +static int > +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, char *yymsgbuf, > + int yystate, int yytoken) > { > int yyn = yypact[yystate]; > > if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) > - return 0; > + return 1; > else > { > YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); > @@ -946,7 +947,7 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) > yysize = yysize1; > } > > - switch (yycount) > + switch (yycount) > { > #define YYCASE_(N, S) \ > case N: \ > @@ -965,28 +966,47 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) > yysize = yysize1; > > if (yysize_overflow) > - return YYSIZE_MAXIMUM; > + yysize = YYSIZE_MAXIMUM; > > - if (yyresult) > - { > - /* Avoid sprintf, as that infringes on the user's name space. > - Don't have undefined behavior even if the translation > - produced a string with the wrong number of "%s"s. */ > - char *yyp = yyresult; > - int yyi = 0; > - while ((*yyp = *yyformat) != '\0') > - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) > - { > - yyp += yytnamerr (yyp, yyarg[yyi++]); > - yyformat += 2; > - } > - else > - { > - yyp++; > - yyformat++; > - } > - } > - return yysize; > + if (*yymsg_alloc < yysize && *yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) > + { > + YYSIZE_T yyalloc = 2 * yysize; > + if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) > + yyalloc = YYSTACK_ALLOC_MAXIMUM; > + if (*yymsg != yymsgbuf) > + YYSTACK_FREE (*yymsg); > + *yymsg = (char *) YYSTACK_ALLOC (yyalloc); > + if (*yymsg) > + *yymsg_alloc = yyalloc; > + else > + { > + *yymsg = yymsgbuf; > + *yymsg_alloc = sizeof yymsgbuf; > + } > + } > + > + if (! (0 < yysize && yysize <= *yymsg_alloc)) > + return 2; > + > + /* Avoid sprintf, as that infringes on the user's name space. > + Don't have undefined behavior even if the translation > + produced a string with the wrong number of "%s"s. */ > + { > + char *yyp = *yymsg; > + int yyi = 0; > + while ((*yyp = *yyformat) != '\0') > + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) > + { > + yyp += yytnamerr (yyp, yyarg[yyi++]); > + yyformat += 2; > + } > + else > + { > + yyp++; > + yyformat++; > + } > + return 0; > + } > } > } > #endif /* YYERROR_VERBOSE */ > @@ -1432,35 +1452,17 @@ yyerrlab: > yyerror (]b4_yyerror_args[YY_("syntax error")); > #else > { > - YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken); > - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) > - { > - YYSIZE_T yyalloc = 2 * yysize; > - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) > - yyalloc = YYSTACK_ALLOC_MAXIMUM; > - if (yymsg != yymsgbuf) > - YYSTACK_FREE (yymsg); > - yymsg = (char *) YYSTACK_ALLOC (yyalloc); > - if (yymsg) > - yymsg_alloc = yyalloc; > - else > - { > - yymsg = yymsgbuf; > - yymsg_alloc = sizeof yymsgbuf; > - } > - } > - > - if (0 < yysize && yysize <= yymsg_alloc) > - { > - (void) yysyntax_error (yymsg, yystate, yytoken); > - yyerror (]b4_yyerror_args[yymsg); > - } > - else > - { > - yyerror (]b4_yyerror_args[YY_("syntax error")); > - if (yysize != 0) > - goto yyexhaustedlab; > - } > + int yysyntax_error_result = > + yysyntax_error (&yymsg_alloc, &yymsg, yymsgbuf, yystate, > + yytoken); > + if (yysyntax_error_result == 0) > + yyerror (]b4_yyerror_args[yymsg); > + else > + { > + yyerror (]b4_yyerror_args[YY_("syntax error")); > + if (yysyntax_error_result == 2) > + goto yyexhaustedlab; > + } > } > #endif > } > > > > |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.Le 10 sept. 2009 à 07:16, Joel E. Denny a écrit : > I'd like to push this patch to master and branch-2.5. I need it for > that > lookahead correction implementation I spoke about recently, but I > think > it's worthwhile on its own as well. Any objections? I haven't > looked at > porting this to the other skeletons yet, but would there be any > objections > there? > >> From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 >> 2001 > From: Joel E. Denny <jdenny@...> > Date: Mon, 7 Sep 2009 22:01:16 -0400 > Subject: [PATCH] yysyntax_error: make it manage its own memory. > > This improves readability, eliminates the need to invoke the > function twice and thus repeat the lookahead collection, and > prepares for future extensions that will make those repetitions > more expensive and that will require additional memory > management in yysyntax_error. > * data/yacc.c (yysyntax_error): Add arguments for message > buffer variables stored in the parser. Instead of size, return > status similar to yyparse status but indicating success of > message creation. Import memory management code from... > (yyparse, yypush_parse): ... here. Hi Joel, I'm not sure I understand what's going on here. The code is meant to be used with either alloca or malloc, and now that you moved everything in syntax_error, I fail to see how alloca could work as expected. Reading the patch (not the resulting code), it really seems like you're possibly returning some pointer to a alloca'ed block of memory. I can't see how alloca could be used if not the way Paul made it. |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.On Thu, 10 Sep 2009, Akim Demaille wrote:
> Le 10 sept. 2009 à 07:16, Joel E. Denny a écrit : > > > From d3334adb83a8b87cfa7528ab184d0dbbe7283858 Mon Sep 17 00:00:00 2001 > > From: Joel E. Denny <jdenny@...> > > Date: Mon, 7 Sep 2009 22:01:16 -0400 > > Subject: [PATCH] yysyntax_error: make it manage its own memory. > I'm not sure I understand what's going on here. The code is meant to be used > with either alloca or malloc, and now that you moved everything in > syntax_error, I fail to see how alloca could work as expected. Reading the > patch (not the resulting code), it really seems like you're possibly returning > some pointer to a alloca'ed block of memory. > > I can't see how alloca could be used if not the way Paul made it. You're right! I forgot about alloca. Thanks for catching that. Here are two new patches that I'd like to apply to master and branch-2.5. The first patch adds two new test groups to regression.at. I normally don't like extending regression.at because I feel like it's the "miscellaneous" or "I can't think of a better category" file. Unfortunately, right now, I can't think of a better category. The first test group exercises alloca with verbose error messages. It reveals the bug you found in my above patch. The second test group reveals an obscure bug in the existing verbose error message implementation. I'm not sure how likely that bug is in a practical parser, but I guess it doesn't hurt to test for it. Because both of these tests seem heavily dependent upon the exact yacc.c implementation we have now, I've added some sanity checks to them to be sure they stay meaningful in the future. The second patch is another attempt to prepare yysyntax_error for my lookahead correction code but without breaking alloca support. It also fixes that obscure bug I just mentioned. More importantly, I think it makes the code more readable and slightly more efficient. The approach this time is that yysyntax_error only requires a second invocation if reallocation is needed. I'm finding this to be a tricky area. I'd appreciate any comments anyone has on these patches. From dc23d678388635a058492294f6cd45105f581c98 Mon Sep 17 00:00:00 2001 From: Joel E. Denny <jdenny@...> Date: Thu, 10 Sep 2009 18:29:01 -0400 Subject: [PATCH] yacc.c: test yysyntax_error memory management more. * tests/regression.at (-Dparse.error=verbose and YYSTACK_USE_ALLOCA): New test group. (-Dparse.error=verbose overflow): New test group that reveals an obscure bug. Expected fail for now. --- ChangeLog | 8 ++ tests/regression.at | 200 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 0 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6c97582..b508741 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ 2009-09-10 Joel E. Denny <jdenny@...> + yacc.c: test yysyntax_error memory management more. + * tests/regression.at (-Dparse.error=verbose and + YYSTACK_USE_ALLOCA): New test group. + (-Dparse.error=verbose overflow): New test group that reveals + an obscure bug. Expected fail for now. + +2009-09-10 Joel E. Denny <jdenny@...> + Clean up yacc.c a little. * data/yacc.c: Clean up M4 for readability, and make output whitespace more consistent. For the main parse function diff --git a/tests/regression.at b/tests/regression.at index 6bfc77e..7e17a91 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1237,3 +1237,203 @@ AT_COMPILE([[input]]) AT_PARSER_CHECK([[./input]]) AT_CLEANUP + + + +## ---------------------------------------------- ## +## -Dparse.error=verbose and YYSTACK_USE_ALLOCA. ## +## ---------------------------------------------- ## + +AT_SETUP([[-Dparse.error=verbose and YYSTACK_USE_ALLOCA]]) + +AT_DATA_GRAMMAR([input.y], +[[%code { + #include <stdio.h> + void yyerror (char const *); + int yylex (void); + #define YYSTACK_USE_ALLOCA 1 +} + +%define parse.error verbose + +%% + +start: check syntax_error syntax_error ; + +check: +{ + if (sizeof yymsgbuf > 128) + { + fprintf (stderr, + "The initial size of yymsgbuf in yyparse has increased\n" + "since this test group was last updated. As a result,\n" + "this test group may no longer manage to induce a\n" + "reallocation of the syntax error message buffer.\n" + "This test group must be adjusted to produce a longer\n" + "error message.\n"); + YYABORT; + } +} +; + +// Induce a syntax error message whose total length is more than +// sizeof yymsgbuf in yyparse. Each token here is 64 bytes. +syntax_error: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| error 'a' 'b' 'c' +; + +%% + +void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +int +yylex (void) +{ + /* Induce two syntax error messages (which requires full error + recovery by shifting 3 tokens) in order to detect any loss of the + reallocated buffer. */ + static char const *input = "abc"; + return *input++; +} + +int +main (void) +{ + return yyparse (); +} +]]) + +AT_BISON_CHECK([[-o input.c input.y]]) +AT_COMPILE([[input]]) +AT_PARSER_CHECK([[./input]], [[1]], [], +[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B +syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B +]]) + +AT_CLEANUP + + + +## -------------------------------- ## +## -Dparse.error=verbose overflow. ## +## -------------------------------- ## + +# Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an +# invocation of yysyntax_error has caused yymsg_alloc to grow to exactly +# YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had +# to be clipped to YYSTACK_ALLOC_MAXIMUM). Now imagine a subsequent +# invocation of yysyntax_error that overflows during its size +# calculation and thus returns YYSIZE_MAXIMUM to yyparse. Then, yyparse +# will invoke yyerror using the old contents of yymsg. This bug needs +# to be fixed. + +AT_SETUP([[-Dparse.error=verbose overflow]]) + +AT_XFAIL_IF([[:]]) + +AT_DATA_GRAMMAR([input.y], +[[%code { + #include <stdio.h> + void yyerror (char const *); + int yylex (void); + + /* This prevents this test case from having to induce error messages + large enough to overflow size_t. */ + #define YYSIZE_T unsigned char + + /* Bring in malloc so yacc.c doesn't try to provide a malloc prototype + using our YYSIZE_T. */ + #include <stdlib.h> + + /* Max depth is usually much smaller than YYSTACK_ALLOC_MAXIMUM, and + we don't want gcc to warn everywhere this constant would be too big + to make sense for our YYSIZE_T. */ + #define YYMAXDEPTH 100 +} + +%define parse.error verbose + +%% + +start: syntax_error1 check syntax_error2 ; + +// Induce a syntax error message whose total length causes yymsg in +// yyparse to be reallocated to size YYSTACK_ALLOC_MAXIMUM, which +// should be 255. Each token here is 64 bytes. +syntax_error1: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| "123456789112345678921234567893123456789412345678951234567896123C" +| error 'a' 'b' 'c' +; + +check: +{ + if (yymsg_alloc != YYSTACK_ALLOC_MAXIMUM + || YYSTACK_ALLOC_MAXIMUM != YYSIZE_MAXIMUM + || YYSIZE_MAXIMUM != 255) + { + fprintf (stderr, + "The assumptions of this test group are no longer\n" + "valid, so it may no longer catch the error it was\n" + "designed to catch.\n"); + YYABORT; + } +} +; + +// Now overflow. +syntax_error2: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| "123456789112345678921234567893123456789412345678951234567896123C" +| "123456789112345678921234567893123456789412345678951234567896123D" +| "123456789112345678921234567893123456789412345678951234567896123E" +; + +%% + +void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +int +yylex (void) +{ + /* Induce two syntax error messages (which requires full error + recovery by shifting 3 tokens). */ + static char const *input = "abc"; + return *input++; +} + +int +main (void) +{ + return yyparse (); +} +]]) + +AT_BISON_CHECK([[-o input.c input.y]]) + +# gcc warns about tautologies and fallacies involving comparisons for +# unsigned char. However, it doesn't produce these same warnings for +# size_t and many other types when the warnings would seem to make just +# as much sense. We ignore the warnings. +CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//` +AT_COMPILE([[input]]) + +AT_PARSER_CHECK([[./input]], [[2]], [], +[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B or 123456789112345678921234567893123456789412345678951234567896123C +syntax error +memory exhausted +]]) + +AT_CLEANUP -- 1.5.4.3 From b9bdbec37d95944fac7226374f6c5e325c808558 Mon Sep 17 00:00:00 2001 From: Joel E. Denny <jdenny@...> Date: Fri, 11 Sep 2009 02:54:53 -0400 Subject: [PATCH] yysyntax_error: avoid duplicate lookahead collection. Except when memory reallocation is required, this change eliminates the need to invoke yysyntax_error twice and thus to repeat the collection of lookaheads. It also prepares for future extensions that will make those repetitions more expensive and that will require additional memory management in yysyntax_error. Finally, it fixes an obscure bug already exercised in the test suite. * data/yacc.c (yysyntax_error): Add arguments for message buffer variables stored in the parser. Instead of size, return status similar to yyparse status but indicating success of message creation. Other than the actual reallocation of the message buffer, import and clean up memory management code from... (yyparse, yypush_parse): ... here. * tests/regression.at (-Dparse.error=verbose overflow): No longer an expected failure. --- ChangeLog | 20 ++ data/yacc.c | 142 ++++++----- src/parse-gram.c | 686 ++++++++++++++++++++++++++------------------------- src/parse-gram.h | 12 +- tests/regression.at | 11 +- 5 files changed, 452 insertions(+), 419 deletions(-) diff --git a/ChangeLog b/ChangeLog index b508741..4d877c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2009-09-11 Joel E. Denny <jdenny@...> + + yysyntax_error: avoid duplicate lookahead collection. + Except when memory reallocation is required, this change + eliminates the need to invoke yysyntax_error twice and thus to + repeat the collection of lookaheads. It also prepares for + future extensions that will make those repetitions more + expensive and that will require additional memory management in + yysyntax_error. Finally, it fixes an obscure bug already + exercised in the test suite. + * data/yacc.c (yysyntax_error): Add arguments for message + buffer variables stored in the parser. Instead of size, return + status similar to yyparse status but indicating success of + message creation. Other than the actual reallocation of the + message buffer, import and clean up memory management code + from... + (yyparse, yypush_parse): ... here. + * tests/regression.at (-Dparse.error=verbose overflow): No + longer an expected failure. + 2009-09-10 Joel E. Denny <jdenny@...> yacc.c: test yysyntax_error memory management more. diff --git a/data/yacc.c b/data/yacc.c index 26c5996..c98cd55 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -889,26 +889,27 @@ yytnamerr (char *yyres, const char *yystr) } # endif -/* Copy into YYRESULT an error message about the unexpected token - YYTOKEN while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yytoken) +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN while in state YYSTATE. + + Return 0 if *YYMSG was successfully written. Return 1 if an ordinary + "syntax error" message will suffice instead. Return 2 if *YYMSG is + not large enough to hold the message. In the last case, also set + *YYMSG_ALLOC to either (a) the required number of bytes or (b) zero + if the required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + int yystate, int yytoken) { int yyn = yypact[yystate]; if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; + return 1; else { YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; - int yysize_overflow = 0; enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ const char *yyformat = 0; @@ -942,11 +943,17 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) } yyarg[yycount++] = yytname[yyx]; yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + { + /* Overflow. */ + *yymsg_alloc = 0; + return 2; + } yysize = yysize1; } - switch (yycount) + switch (yycount) { #define YYCASE_(N, S) \ case N: \ @@ -961,32 +968,42 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) } yysize1 = yysize + yystrlen (yyformat); - yysize_overflow |= (yysize1 < yysize); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + { + /* Overflow. */ + *yymsg_alloc = 0; + return 2; + } yysize = yysize1; - if (yysize_overflow) - return YYSIZE_MAXIMUM; + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 2; + } - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - yyp++; - yyformat++; - } - } - return yysize; + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; } } #endif /* YYERROR_VERBOSE */ @@ -1431,37 +1448,28 @@ yyerrlab: #if ! YYERROR_VERBOSE yyerror (]b4_yyerror_args[YY_("syntax error")); #else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yytoken); - yyerror (]b4_yyerror_args[yymsg); - } - else - { - yyerror (]b4_yyerror_args[YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } + while (1) + { + int yysyntax_error_status = + yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken); + if (yysyntax_error_status == 2 && yymsg_alloc > 0) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (yymsg) + continue; + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + } + if (yysyntax_error_status == 0) + yyerror (]b4_yyerror_args[yymsg); + else + yyerror (]b4_yyerror_args[YY_("syntax error")); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + break; + } #endif } diff --git a/tests/regression.at b/tests/regression.at index 7e17a91..efe72af 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1327,16 +1327,13 @@ AT_CLEANUP # Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an # invocation of yysyntax_error has caused yymsg_alloc to grow to exactly # YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had -# to be clipped to YYSTACK_ALLOC_MAXIMUM). Now imagine a subsequent -# invocation of yysyntax_error that overflows during its size -# calculation and thus returns YYSIZE_MAXIMUM to yyparse. Then, yyparse -# will invoke yyerror using the old contents of yymsg. This bug needs -# to be fixed. +# to be clipped to YYSTACK_ALLOC_MAXIMUM). In an old version of yacc.c, +# a subsequent invocation of yysyntax_error that overflows during its +# size calculation would return YYSIZE_MAXIMUM to yyparse. Then, +# yyparse would invoke yyerror using the old contents of yymsg. AT_SETUP([[-Dparse.error=verbose overflow]]) -AT_XFAIL_IF([[:]]) - AT_DATA_GRAMMAR([input.y], [[%code { #include <stdio.h> -- 1.5.4.3 |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.On Fri, 11 Sep 2009, Joel E. Denny wrote:
> Here are two new patches that I'd like to apply to master and branch-2.5. > > The first patch adds two new test groups to regression.at. > >From dc23d678388635a058492294f6cd45105f581c98 Mon Sep 17 00:00:00 2001 > From: Joel E. Denny <jdenny@...> > Date: Thu, 10 Sep 2009 18:29:01 -0400 > Subject: [PATCH] yacc.c: test yysyntax_error memory management more. > > * tests/regression.at (-Dparse.error=verbose and > YYSTACK_USE_ALLOCA): New test group. > (-Dparse.error=verbose overflow): New test group that reveals > an obscure bug. Expected fail for now. The following needs to be folded into that patch to fix maintainer-push-check. It also improves the diagnostic. diff --git a/tests/regression.at b/tests/regression.at index 7e17a91..de91823 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1382,7 +1382,12 @@ check: fprintf (stderr, "The assumptions of this test group are no longer\n" "valid, so it may no longer catch the error it was\n" - "designed to catch.\n"); + "designed to catch. Specifically, the following\n" + "values should all be 255:\n\n"); + fprintf (stderr, " yymsg_alloc = %d\n", yymsg_alloc); + fprintf (stderr, " YYSTACK_ALLOC_MAXIMUM = %d\n", + YYSTACK_ALLOC_MAXIMUM); + fprintf (stderr, " YYSIZE_MAXIMUM = %d\n", YYSIZE_MAXIMUM); YYABORT; } } @@ -1417,6 +1422,10 @@ yylex (void) int main (void) { + /* Push parsers throw away the message buffer between tokens, so skip + this test under maintainer-push-check. */ + if (YYPUSH) + return 77; return yyparse (); } ]]) |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.Le 11 sept. 2009 à 09:40, Joel E. Denny a écrit : > +# gcc warns about tautologies and fallacies involving comparisons for > +# unsigned char. However, it doesn't produce these same warnings for > +# size_t and many other types when the warnings would seem to make > just > +# as much sense. We ignore the warnings. > +CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//` > +AT_COMPILE([[input]]) You might want to introduce something like this in tests/atconfig.in, or use "$O0CFLAGS $WARN_CFLAGS" instead of getting bits of $CFLAGS. Other than that, the patches are fine, please install. (One note though, maybe we already debated about this, but I forgot: I prefer to use < and <= and not > and >=. Another lesson I took from Paul Eggert. Not everybody agrees.) |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.Hi Akim.
On Wed, 16 Sep 2009, Akim Demaille wrote: > Le 11 sept. 2009 à 09:40, Joel E. Denny a écrit : > > > +# gcc warns about tautologies and fallacies involving comparisons for > > +# unsigned char. However, it doesn't produce these same warnings for > > +# size_t and many other types when the warnings would seem to make just > > +# as much sense. We ignore the warnings. > > +CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//` > > +AT_COMPILE([[input]]) > > You might want to introduce something like this in tests/atconfig.in, or use > "$O0CFLAGS $WARN_CFLAGS" instead of getting bits of $CFLAGS. > Other than that, the patches are fine, please install. (One note though, > maybe we already debated about this, but I forgot: I prefer to use < and <= > and not > and >=. Another lesson I took from Paul Eggert. Not everybody > agrees.) I got that from Paul as well, but sometimes I forget to do it. Thanks for the reminder. I'll fold in the differences below and push those to master. I'd also like to cherry pick to branch-2.5 because I think it would be nice to release lookahead correction with IELR. However, yysyntax_error on master has already evolved away from branch-2.5, so I first need to cherry pick some of your commits from master: eeb29 Simplify the i18n of the error messages. 84eed Pass the token type to yysyntax_error. I'm not sure I'll find time to port lookahead correction to other skeletons before the release of 2.5, so I'm not currently planning to search for and cherry pick your corresponding commits for other skeletons. Does all this sound ok to you? diff --git a/ChangeLog b/ChangeLog index 61ba1e1..cc89fde 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ 2009-09-12 Joel E. Denny <jdenny@...> yacc.c: test yysyntax_error memory management more. + * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro. * tests/regression.at (-Dparse.error=verbose and YYSTACK_USE_ALLOCA): New test group. (-Dparse.error=verbose overflow): New test group that reveals diff --git a/tests/atlocal.in b/tests/atlocal.in index 91ba674..3e33965 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -10,6 +10,10 @@ # We want no optimization. CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@ @WERROR_CFLAGS@' +# Sometimes a test group needs to ignore gcc warnings, so it locally +# sets CFLAGS to this. +NO_WERROR_CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@' + # We need `config.h'. CPPFLAGS="-I$abs_top_builddir/lib @CPPFLAGS@" diff --git a/tests/regression.at b/tests/regression.at index de91823..a08a9a8 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1262,7 +1262,7 @@ start: check syntax_error syntax_error ; check: { - if (sizeof yymsgbuf > 128) + if (128 < sizeof yymsgbuf) { fprintf (stderr, "The initial size of yymsgbuf in yyparse has increased\n" @@ -1436,7 +1436,7 @@ AT_BISON_CHECK([[-o input.c input.y]]) # unsigned char. However, it doesn't produce these same warnings for # size_t and many other types when the warnings would seem to make just # as much sense. We ignore the warnings. -CFLAGS=`echo x"$CFLAGS" | sed s/-Werror// | sed s/^x//` +CFLAGS="$NO_WERROR_CFLAGS" AT_COMPILE([[input]]) AT_PARSER_CHECK([[./input]], [[2]], [], diff --git a/data/yacc.c b/data/yacc.c index c98cd55..ea2ae14 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -1452,7 +1452,7 @@ yyerrlab: { int yysyntax_error_status = yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken); - if (yysyntax_error_status == 2 && yymsg_alloc > 0) + if (yysyntax_error_status == 2 && 0 < yymsg_alloc) { if (yymsg != yymsgbuf) YYSTACK_FREE (yymsg); |
|
|
Re: [PATCH] yysyntax_error: make it manage its own memory.On Mon, 21 Sep 2009, Joel E. Denny wrote:
> I'll fold in the differences below and push those to master. After that and perhaps some other minor tweaks I've forgotten, the patches below are the result. I pushed them to master. > I'd also like to cherry pick to branch-2.5 because I think it would be > nice to release lookahead correction with IELR. However, yysyntax_error > on master has already evolved away from branch-2.5, so I first need to > cherry pick some of your commits from master: > > eeb29 Simplify the i18n of the error messages. > 84eed Pass the token type to yysyntax_error. I pushed all that too. > I'm not sure I'll find time to port lookahead correction to other > skeletons before the release of 2.5, so I'm not currently planning to > search for and cherry pick your corresponding commits for other skeletons. As it turns out, I have several more bug fixes that I need to push before pushing my lookahead correction implementation. Even though I'm not worried about whether lookahead correction makes it into the other skeletons before 2.5, I think those fixes should. I'd like to post each complete fix one at a time for comments, but that means I first need to port them to the other skeletons. I'll work on all that when I can find more time. >From 52cea04ad36abf3ab684b88cba45d6c26dda80c9 Mon Sep 17 00:00:00 2001 From: Joel E. Denny <jdenny@...> Date: Wed, 23 Sep 2009 17:37:58 -0400 Subject: [PATCH] yysyntax_error: test memory management more. * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro. * tests/regression.at (parse.error=verbose and YYSTACK_USE_ALLOCA): New test group. (parse.error=verbose overflow): New test group that reveals an obscure bug. Expected fail for now. --- ChangeLog | 9 ++ tests/atlocal.in | 4 + tests/regression.at | 209 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 0 deletions(-) diff --git a/ChangeLog b/ChangeLog index a813d69..92cfc13 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2009-09-23 Joel E. Denny <jdenny@...> + + yysyntax_error: test memory management more. + * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro. + * tests/regression.at (parse.error=verbose and + YYSTACK_USE_ALLOCA): New test group. + (parse.error=verbose overflow): New test group that reveals an + obscure bug. Expected fail for now. + 2009-10-04 Joel E. Denny <jdenny@...> benchmarks: use %debug consistently among grammars. diff --git a/tests/atlocal.in b/tests/atlocal.in index 2e46329..9264a40 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -10,6 +10,10 @@ # We want no optimization. CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@ @WERROR_CFLAGS@' +# Sometimes a test group needs to ignore gcc warnings, so it locally +# sets CFLAGS to this. +NO_WERROR_CFLAGS='@O0CFLAGS@ @WARN_CFLAGS@' + # We need `config.h'. CPPFLAGS="-I$abs_top_builddir/lib @CPPFLAGS@" diff --git a/tests/regression.at b/tests/regression.at index cfc071e..0420f4e 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1260,3 +1260,212 @@ AT_BISON_CHECK([[-o input.c -Dlr.type=ielr input.y]]) AT_CHECK([[diff -u lalr.c ielr.c]]) AT_CLEANUP + + + +## -------------------------------------------- ## +## parse.error=verbose and YYSTACK_USE_ALLOCA. ## +## -------------------------------------------- ## + +AT_SETUP([[parse.error=verbose and YYSTACK_USE_ALLOCA]]) + +AT_DATA_GRAMMAR([input.y], +[[%code { + #include <stdio.h> + void yyerror (char const *); + int yylex (void); + #define YYSTACK_USE_ALLOCA 1 +} + +%define parse.error verbose + +%% + +start: check syntax_error syntax_error ; + +check: +{ + if (128 < sizeof yymsgbuf) + { + fprintf (stderr, + "The initial size of yymsgbuf in yyparse has increased\n" + "since this test group was last updated. As a result,\n" + "this test group may no longer manage to induce a\n" + "reallocation of the syntax error message buffer.\n" + "This test group must be adjusted to produce a longer\n" + "error message.\n"); + YYABORT; + } +} +; + +// Induce a syntax error message whose total length is more than +// sizeof yymsgbuf in yyparse. Each token here is 64 bytes. +syntax_error: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| error 'a' 'b' 'c' +; + +%% + +void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +int +yylex (void) +{ + /* Induce two syntax error messages (which requires full error + recovery by shifting 3 tokens) in order to detect any loss of the + reallocated buffer. */ + static char const *input = "abc"; + return *input++; +} + +int +main (void) +{ + return yyparse (); +} +]]) + +AT_BISON_CHECK([[-o input.c input.y]]) +AT_COMPILE([[input]]) +AT_PARSER_CHECK([[./input]], [[1]], [], +[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B +syntax error, unexpected $end, expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B +]]) + +AT_CLEANUP + + + +## ------------------------------ ## +## parse.error=verbose overflow. ## +## ------------------------------ ## + +# Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an +# invocation of yysyntax_error has caused yymsg_alloc to grow to exactly +# YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had +# to be clipped to YYSTACK_ALLOC_MAXIMUM). Now imagine a subsequent +# invocation of yysyntax_error that overflows during its size +# calculation and thus returns YYSIZE_MAXIMUM to yyparse. Then, yyparse +# will invoke yyerror using the old contents of yymsg. This bug needs +# to be fixed. + +AT_SETUP([[parse.error=verbose overflow]]) + +AT_XFAIL_IF([[:]]) + +AT_DATA_GRAMMAR([input.y], +[[%code { + #include <stdio.h> + void yyerror (char const *); + int yylex (void); + + /* This prevents this test case from having to induce error messages + large enough to overflow size_t. */ + #define YYSIZE_T unsigned char + + /* Bring in malloc so yacc.c doesn't try to provide a malloc prototype + using our YYSIZE_T. */ + #include <stdlib.h> + + /* Max depth is usually much smaller than YYSTACK_ALLOC_MAXIMUM, and + we don't want gcc to warn everywhere this constant would be too big + to make sense for our YYSIZE_T. */ + #define YYMAXDEPTH 100 +} + +%define parse.error verbose + +%% + +start: syntax_error1 check syntax_error2 ; + +// Induce a syntax error message whose total length causes yymsg in +// yyparse to be reallocated to size YYSTACK_ALLOC_MAXIMUM, which +// should be 255. Each token here is 64 bytes. +syntax_error1: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| "123456789112345678921234567893123456789412345678951234567896123C" +| error 'a' 'b' 'c' +; + +check: +{ + if (yymsg_alloc != YYSTACK_ALLOC_MAXIMUM + || YYSTACK_ALLOC_MAXIMUM != YYSIZE_MAXIMUM + || YYSIZE_MAXIMUM != 255) + { + fprintf (stderr, + "The assumptions of this test group are no longer\n" + "valid, so it may no longer catch the error it was\n" + "designed to catch. Specifically, the following\n" + "values should all be 255:\n\n"); + fprintf (stderr, " yymsg_alloc = %d\n", yymsg_alloc); + fprintf (stderr, " YYSTACK_ALLOC_MAXIMUM = %d\n", + YYSTACK_ALLOC_MAXIMUM); + fprintf (stderr, " YYSIZE_MAXIMUM = %d\n", YYSIZE_MAXIMUM); + YYABORT; + } +} +; + +// Now overflow. +syntax_error2: + "123456789112345678921234567893123456789412345678951234567896123A" +| "123456789112345678921234567893123456789412345678951234567896123B" +| "123456789112345678921234567893123456789412345678951234567896123C" +| "123456789112345678921234567893123456789412345678951234567896123D" +| "123456789112345678921234567893123456789412345678951234567896123E" +; + +%% + +void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +int +yylex (void) +{ + /* Induce two syntax error messages (which requires full error + recovery by shifting 3 tokens). */ + static char const *input = "abc"; + return *input++; +} + +int +main (void) +{ + /* Push parsers throw away the message buffer between tokens, so skip + this test under maintainer-push-check. */ + if (YYPUSH) + return 77; + return yyparse (); +} +]]) + +AT_BISON_CHECK([[-o input.c input.y]]) + +# gcc warns about tautologies and fallacies involving comparisons for +# unsigned char. However, it doesn't produce these same warnings for +# size_t and many other types when the warnings would seem to make just +# as much sense. We ignore the warnings. +[CFLAGS="$NO_WERROR_CFLAGS"] +AT_COMPILE([[input]]) + +AT_PARSER_CHECK([[./input]], [[2]], [], +[[syntax error, unexpected 'a', expecting 123456789112345678921234567893123456789412345678951234567896123A or 123456789112345678921234567893123456789412345678951234567896123B or 123456789112345678921234567893123456789412345678951234567896123C +syntax error +memory exhausted +]]) + +AT_CLEANUP -- 1.5.4.3 >From 45319f1365eb8d125424f31401d9d33cc02ff4ad Mon Sep 17 00:00:00 2001 From: Joel E. Denny <jdenny@...> Date: Wed, 23 Sep 2009 17:39:39 -0400 Subject: [PATCH] yysyntax_error: avoid duplicate lookahead collection. Except when memory reallocation is required, this change eliminates the need to invoke yysyntax_error twice and thus to repeat the collection of lookaheads. It also prepares for future extensions that will make those repetitions more expensive and that will require additional memory management in yysyntax_error. Finally, it fixes an obscure bug already exercised in the test suite. * data/yacc.c (yysyntax_error): Add arguments for message buffer variables stored in the parser. Instead of size, return status similar to yyparse status but indicating success of message creation. Other than the actual reallocation of the message buffer, import and clean up memory management code from... (yyparse, yypush_parse): ... here. * tests/regression.at (parse.error=verbose overflow): No longer an expected failure. --- ChangeLog | 20 ++ data/yacc.c | 142 ++++++----- src/parse-gram.c | 686 ++++++++++++++++++++++++++------------------------- src/parse-gram.h | 12 +- tests/regression.at | 11 +- 5 files changed, 452 insertions(+), 419 deletions(-) diff --git a/ChangeLog b/ChangeLog index 92cfc13..6395ddd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,25 @@ 2009-09-23 Joel E. Denny <jdenny@...> + yysyntax_error: avoid duplicate lookahead collection. + Except when memory reallocation is required, this change + eliminates the need to invoke yysyntax_error twice and thus to + repeat the collection of lookaheads. It also prepares for + future extensions that will make those repetitions more + expensive and that will require additional memory management in + yysyntax_error. Finally, it fixes an obscure bug already + exercised in the test suite. + * data/yacc.c (yysyntax_error): Add arguments for message + buffer variables stored in the parser. Instead of size, return + status similar to yyparse status but indicating success of + message creation. Other than the actual reallocation of the + message buffer, import and clean up memory management code + from... + (yyparse, yypush_parse): ... here. + * tests/regression.at (parse.error=verbose overflow): No longer + an expected failure. + +2009-09-23 Joel E. Denny <jdenny@...> + yysyntax_error: test memory management more. * tests/atlocal.in (NO_WERROR_CFLAGS): New cpp macro. * tests/regression.at (parse.error=verbose and diff --git a/data/yacc.c b/data/yacc.c index 26c5996..ea2ae14 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -889,26 +889,27 @@ yytnamerr (char *yyres, const char *yystr) } # endif -/* Copy into YYRESULT an error message about the unexpected token - YYTOKEN while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yytoken) +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN while in state YYSTATE. + + Return 0 if *YYMSG was successfully written. Return 1 if an ordinary + "syntax error" message will suffice instead. Return 2 if *YYMSG is + not large enough to hold the message. In the last case, also set + *YYMSG_ALLOC to either (a) the required number of bytes or (b) zero + if the required number of bytes is too large to store. */ +static int +yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, + int yystate, int yytoken) { int yyn = yypact[yystate]; if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; + return 1; else { YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); YYSIZE_T yysize = yysize0; YYSIZE_T yysize1; - int yysize_overflow = 0; enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; /* Internationalized format string. */ const char *yyformat = 0; @@ -942,11 +943,17 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) } yyarg[yycount++] = yytname[yyx]; yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); + if (! (yysize <= yysize1 + && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + { + /* Overflow. */ + *yymsg_alloc = 0; + return 2; + } yysize = yysize1; } - switch (yycount) + switch (yycount) { #define YYCASE_(N, S) \ case N: \ @@ -961,32 +968,42 @@ yysyntax_error (char *yyresult, int yystate, int yytoken) } yysize1 = yysize + yystrlen (yyformat); - yysize_overflow |= (yysize1 < yysize); + if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) + { + /* Overflow. */ + *yymsg_alloc = 0; + return 2; + } yysize = yysize1; - if (yysize_overflow) - return YYSIZE_MAXIMUM; + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return 2; + } - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - yyp++; - yyformat++; - } - } - return yysize; + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyformat += 2; + } + else + { + yyp++; + yyformat++; + } + } + return 0; } } #endif /* YYERROR_VERBOSE */ @@ -1431,37 +1448,28 @@ yyerrlab: #if ! YYERROR_VERBOSE yyerror (]b4_yyerror_args[YY_("syntax error")); #else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yytoken); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yytoken); - yyerror (]b4_yyerror_args[yymsg); - } - else - { - yyerror (]b4_yyerror_args[YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } + while (1) + { + int yysyntax_error_status = + yysyntax_error (&yymsg_alloc, &yymsg, yystate, yytoken); + if (yysyntax_error_status == 2 && 0 < yymsg_alloc) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); + if (yymsg) + continue; + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + } + if (yysyntax_error_status == 0) + yyerror (]b4_yyerror_args[yymsg); + else + yyerror (]b4_yyerror_args[YY_("syntax error")); + if (yysyntax_error_status == 2) + goto yyexhaustedlab; + break; + } #endif } diff --git a/tests/regression.at b/tests/regression.at index 0420f4e..3857916 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -1350,16 +1350,13 @@ AT_CLEANUP # Imagine the case where YYSTACK_ALLOC_MAXIMUM = YYSIZE_MAXIMUM and an # invocation of yysyntax_error has caused yymsg_alloc to grow to exactly # YYSTACK_ALLOC_MAXIMUM (perhaps because the normal doubling of size had -# to be clipped to YYSTACK_ALLOC_MAXIMUM). Now imagine a subsequent -# invocation of yysyntax_error that overflows during its size -# calculation and thus returns YYSIZE_MAXIMUM to yyparse. Then, yyparse -# will invoke yyerror using the old contents of yymsg. This bug needs -# to be fixed. +# to be clipped to YYSTACK_ALLOC_MAXIMUM). In an old version of yacc.c, +# a subsequent invocation of yysyntax_error that overflows during its +# size calculation would return YYSIZE_MAXIMUM to yyparse. Then, +# yyparse would invoke yyerror using the old contents of yymsg. AT_SETUP([[parse.error=verbose overflow]]) -AT_XFAIL_IF([[:]]) - AT_DATA_GRAMMAR([input.y], [[%code { #include <stdio.h> -- 1.5.4.3 |
| Free embeddable forum powered by Nabble | Forum Help |