« Return to Thread: fread failing to handle newlines gracefully

Re: : fread failing to handle newlines gracefully

by Raimo Niskanen-5 :: Rate this Message:

Reply to Author | View in Thread

On Thu, Feb 05, 2009 at 09:35:01AM +0100, Raimo Niskanen wrote:

> On Fri, Jan 30, 2009 at 11:12:23PM +1100, Matthew Palmer wrote:
> > Hi,
> >
> > I recently fielded a question on Stack Overflow regarding some strange
> > behaviour of fread, with the ~d format spec
> > (http://stackoverflow.com/questions/473327/unexpected-behavior-of-iofread-in-erlang/490023#490023).
> > Digging deep, it appears to be a bit of a bug in the way that newlines are
> > handled in the format parser, as demonstrated here:
> >
> > 3> io_lib_fread:fread([], "10 11\n12 13 14\n", "~d").
> > {done,{ok,"\n"}," 1112 13 14\n"}
> >
> > It's eaten the newline between "11" and "12", which means that on the next
> > pass through it'll read the next number as "1112" instead of "11", which
> > poses some obvious problems.
> >
> > I've come up with the attached patch to fix the problem.  I'm not very
> > experienced with Erlang yet, so it might not be the cleanest way to do it,
> > but it does definitely fix the problem, and without (apparently) breaking
> > any other common use case.  Comments appreciated.  If there's a test suite I
> > should be adding to as well, I'm more than happy to do that, I just couldn't
> > find one in my source tarball.
> >
> > - Matt
>
> Thank you for your effort!
>
> Your diagnosis and patch are basically right. I have now been digging
> into the matter and it took a while to ensure that. I will polish
> the patch a bit (the code needs some cleanup), and improve it
> for \r\n combinations.
>
> Unfortunately there are no tests in the source tarball. We are striving
> towards their release but so far large parts of them are too much
> depending on our internal environment to be globally useful...
>
> I will write tests too. When done I will release a new
> source code patch here.
OK. The patch is appended. I also found and corrected
a bug for e.g io_lib:fread("~s", "\rabc").

>
> > --- ../erlang/erlang-12.b.3-dfsg/lib/stdlib/src/io_lib_fread.erl 2004-09-14 21:41:00.000000000 +1000
> > +++ ./io_lib_fread.erl 2009-01-30 15:44:57.000000000 +1100
> > @@ -35,9 +35,9 @@
> >      fread_collect(MoreChars, [], Rest, RestFormat, N, Inputs).
> >  
> >  fread_collect([$\r|More], Stack, Rest, RestFormat, N, Inputs) ->
> > -    fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, More);
> > +    fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, [$\r|More]);
> >  fread_collect([$\n|More], Stack, Rest, RestFormat, N, Inputs) ->
> > -    fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, More);
> > +    fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, [$\n|More]);
> >  fread_collect([C|More], Stack, Rest, RestFormat, N, Inputs) ->
> >      fread_collect(More, [C|Stack], Rest, RestFormat, N, Inputs);
> >  fread_collect([], Stack, Rest, RestFormat, N, Inputs) ->
> > @@ -55,8 +55,8 @@
> >   eof ->
> >      fread(RestFormat,eof,N,Inputs,eof);
> >   _ ->
> > -    %% Don't forget to count the newline.
> > -    {more,{More,RestFormat,N+1,Inputs}}
> > +    %% Don't forget to strip and count the newline.
> > +    {more,{tl(More),RestFormat,N+1,Inputs}}
> >      end;
> >   Other -> %An error has occurred
> >      {done,Other,More}
>
> > _______________________________________________
> > erlang-patches mailing list
> > erlang-patches@...
> > http://www.erlang.org/mailman/listinfo/erlang-patches
>
> --
>
> / Raimo Niskanen, Erlang/OTP, Ericsson AB
> _______________________________________________
> erlang-patches mailing list
> erlang-patches@...
> http://www.erlang.org/mailman/listinfo/erlang-patches
--

/ Raimo Niskanen, Erlang/OTP, Ericsson AB


*** lib/stdlib/src/io_lib_fread.erl@@/main/release/r13a_dev/1 2009-01-20 15:40:56.000000000 +0100
--- lib/stdlib/src/io_lib_fread.erl@@/main/release/r13a_dev/3 2009-02-09 16:08:22.000000000 +0100
***************
*** 27,68 ****
  %% fread(Continuation, CharList, FormatString)
  %%  This is the main function into the re-entrant formatted reader. It
  %%  repeatedly collects lines and calls fread/2 to format the input until
! %%  all the format string has been used.
 
  fread([], Chars, Format) ->
!     fread({[],Format,0,[]}, Chars, Format);
! fread({Rest,RestFormat,N,Inputs}, MoreChars, _Format) ->
!     %%io:format("FREAD: ~w `~s'~n", [{Rest,RestFormat,N,Inputs},MoreChars]),
!     fread_collect(MoreChars, [], Rest, RestFormat, N, Inputs).
!
! fread_collect([$\r|More], Stack, Rest, RestFormat, N, Inputs) ->
!     fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, More);
! fread_collect([$\n|More], Stack, Rest, RestFormat, N, Inputs) ->
!     fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, More);
! fread_collect([C|More], Stack, Rest, RestFormat, N, Inputs) ->
!     fread_collect(More, [C|Stack], Rest, RestFormat, N, Inputs);
! fread_collect([], Stack, Rest, RestFormat, N, Inputs) ->
!     {more,{reverse(Stack, Rest),RestFormat,N,Inputs}};
! fread_collect(eof, Stack, Rest, RestFormat, N, Inputs) ->
!     fread(RestFormat, Rest ++ reverse(Stack), N, Inputs, eof).
!
! fread(Format, Line, N0, Inputs0, More) ->
!     %%io:format("FREAD1: `~s' `~s'~n", [Format,Line]),
!     case fread(Format, Line, N0, Inputs0) of
! {ok,Input,Rest} ->
!    {done,{ok,Input},case More of eof -> Rest; _ -> Rest ++ More end};
! {more,RestFormat,N,Inputs} ->
!    case More of
! eof ->
!    fread(RestFormat,eof,N,Inputs,eof);
! _ ->
!    %% Don't forget to count the newline.
!    {more,{More,RestFormat,N+1,Inputs}}
!    end;
  Other -> %An error has occurred
     {done,Other,More}
      end.
 
  %% Conventions
  %%   ~s String White terminated
  %%   ~d         Integer terminated by ~[0-9]
--- 27,75 ----
  %% fread(Continuation, CharList, FormatString)
  %%  This is the main function into the re-entrant formatted reader. It
  %%  repeatedly collects lines and calls fread/2 to format the input until
! %%  all the format string has been used. And it counts the characters.
 
  fread([], Chars, Format) ->
!     %%io:format("FREAD: ~w `~s'~n", [Format,Chars]),
!     fread_collect(Format, [], 0, [], Chars);
! fread({Format,Stack,N,Results}=_Continuation, Chars, _) ->
!     %%io:format("FREAD: ~w `~s'~n", [_Continuation,Chars]),
!     fread_collect(Format, Stack, N, Results, Chars).
!
! fread_collect(Format, [$\r|Stack], N, Results, [$\n|Chars]) ->
!     fread_line(Format, reverse(Stack), N, Results, Chars, [$\r,$\n]);
! fread_collect(Format, Stack, N, Results, [$\n|Chars]) ->
!     fread_line(Format, reverse(Stack), N, Results, Chars, [$\n]);
! fread_collect(Format, Stack, N, Results, []) ->
!     Continuation = {Format,Stack,N,Results},
!     {more,Continuation};
! fread_collect(Format, [$\r|Stack], N, Results, Chars) -> % Maybe eof
!     fread_line(Format, reverse(Stack), N, Results, Chars, [$\r]);
! fread_collect(Format, Stack, N, Results, [C|Chars]) ->
!     fread_collect(Format, [C|Stack], N, Results, Chars);
! fread_collect(Format, Stack, N, Results, Chars) -> % eof
!     fread_line(Format, reverse(Stack), N, Results, Chars, []).
!
! fread_line(Format0, Line, N0, Results0, More, Newline) ->
!     %%io:format("FREAD1: `~s' `~s'~n", [Format0,Line]),
!     Chars = if is_list(More) -> More; true -> [] end,
!     case fread(Format0, Line, N0, Results0) of
! {ok,Results,[]} ->
!    {done,{ok,Results},Chars};
! {ok,Results,Rest} ->
!    %% Don't loose the whitespace
!    {done,{ok,Results},Rest++(Newline++Chars)};
! %% fread/4 should not return {more,...} on eof; guard just in case...
! %% Count newline characters here since fread/4 does not get them.
! {more,Format,N,Results} when is_list(Line), is_list(More) ->
!    fread_collect(Format, [], N+length(Newline), Results, More);
! {more,Format,N,Results} when is_list(Line) -> % eof
!    fread_line(Format, eof, N+length(Newline), Results, More, []);
  Other -> %An error has occurred
     {done,Other,More}
      end.
 
+
  %% Conventions
  %%   ~s String White terminated
  %%   ~d         Integer terminated by ~[0-9]
***************
*** 354,359 ****
--- 361,368 ----
      fread_skip_white(Line, N+1);
  fread_skip_white([$\t|Line], N) ->
      fread_skip_white(Line, N+1);
+ fread_skip_white([$\r|Line], N) ->
+     fread_skip_white(Line, N+1);
  fread_skip_white([$\n|Line], N) ->
      fread_skip_white(Line, N+1);
  fread_skip_white(Line, N) -> {Line,N}.


_______________________________________________
erlang-patches mailing list
erlang-patches@...
http://www.erlang.org/mailman/listinfo/erlang-patches

 « Return to Thread: fread failing to handle newlines gracefully