error handling and exit code (patch for tar-1.20/1.21)

View: New views
18 Messages — Rating Filter:   Alert me  

error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Sergey and all,

I'd like to submit upstream a patch from Openwall GNU/*/Linux that we
came up with in Nov-Dec 2008, and which we've extensively tested since
then (in production, doing nightly incremental backups of a network of
servers).  The patch is against GNU tar 1.20, and that's what it's been
tested with, but it applies to 1.21 as well (no rejects, no fuzz) and is
still fully relevant (as far as I can see).  I've attached it to this
message, and you can also find it (as well as other stuff) in our CVS
repository:

http://cvsweb.openwall.com/cgi/cvsweb.cgi/Owl/packages/tar/

Basically, the problem with tar, without this patch, was that its exit
code would not let us distinguish serious errors from benign ones (the
latter occur every night since the systems being backed up are live).

tar would often exit with TAREXIT_FAILURE rather than TAREXIT_DIFFERS
even on unimportant errors, such as a file in a deep directory
disappearing from under the running tar.  Clearly, this would force us
to ignore tar's exit code, yet we wanted to be informed of serious
errors (such as media I/O errors).

Presumably, everyone else who does incremental backups is ignoring tar's
exit code, and that's not great.

The patch also fixes an instance of "the opposite" problem - where tar
would potentially change its exit_status from TAREXIT_FAILURE (based on
a previous error) to TAREXIT_DIFFERS (forgetting that it had already run
into a more serious error).

Please have a look, comment, and consider for inclusion.

This effort was supported by CivicActions - http://civicactions.com -
and it took us quite some time to identify all those cases where tar
would return "the wrong" exit code.

Thanks,

Alexander Peslyak <solar at openwall.com>
GPG key ID: 5B341F15  fp: B3FB 63F4 D7A3 BCCC 6F6E  FC55 A2FC 027C 5B34 1F15
http://www.openwall.com - bringing security into open computing environments

diff -urpN tar-1.20.orig/src/create.c tar-1.20/src/create.c
--- tar-1.20.orig/src/create.c 2008-04-14 12:03:12 +0000
+++ tar-1.20/src/create.c 2008-12-23 12:18:20 +0000
@@ -1078,7 +1078,7 @@ dump_regular_file (int fd, struct tar_st
    size_left),
  quotearg_colon (st->orig_file_name),
  STRINGIFY_BIGINT (size_left, buf)));
-  if (! ignore_failed_read_option)
+  if (! ignore_failed_read_option && exit_status == TAREXIT_SUCCESS)
     exit_status = TAREXIT_DIFFERS;
   pad_archive (size_left - (bufsize - count));
   return dump_status_short;
@@ -1283,9 +1283,19 @@ create_archive (void)
 
       collect_and_sort_names ();
 
+      /* We assume that the collect_and_sort_names() call above has validated
+ all filenames passed as input to tar.  In the calls to dump_file(),
+ we don't know where each filename came from.  We choose to pass 0 for
+ top_level such that tar does not unnecessarily exit with
+ TAREXIT_FAILURE when a file in some deep directory disappears from
+ under us.  Unfortunately, this also means that tar won't set this exit
+ code for files that were actually at "top level", and were accessible
+ when collect_and_sort_names() ran, but disappeared or turned out to be
+ otherwise inaccessible for dump_file(). */
+
       while ((p = name_from_list ()) != NULL)
  if (!excluded_name (p))
-  dump_file (p, -1, (dev_t) 0);
+  dump_file (p, 0, (dev_t) 0);
 
       blank_name_list ();
       while ((p = name_from_list ()) != NULL)
@@ -1315,7 +1325,7 @@ create_archive (void)
   buffer = xrealloc (buffer, buffer_size);
  }
       strcpy (buffer + plen, q + 1);
-      dump_file (buffer, -1, (dev_t) 0);
+      dump_file (buffer, 0, (dev_t) 0);
     }
   q += qlen + 1;
  }
@@ -1499,7 +1509,11 @@ dump_file0 (struct tar_stat_info *st, co
 
   if (deref_stat (dereference_option, p, &st->stat) != 0)
     {
-      stat_diag (p);
+      if (!top_level && errno == ENOENT)
+ WARN ((0, 0, _("%s: File removed before we read it"),
+       quotearg_colon (p)));
+      else
+ stat_diag (p);
       return;
     }
   st->archive_file_size = original_size = st->stat.st_size;
@@ -1643,7 +1657,8 @@ dump_file0 (struct tar_stat_info *st, co
        : fstat (fd, &final_stat))
       != 0)
     {
-      stat_diag (p);
+      if (errno != ENOENT)
+ stat_diag (p);
       ok = false;
     }
  }
@@ -1700,7 +1715,11 @@ dump_file0 (struct tar_stat_info *st, co
       size = readlink (p, buffer, linklen + 1);
       if (size < 0)
  {
-  readlink_diag (p);
+  if (!top_level && errno == ENOENT)
+    WARN ((0, 0, _("%s: File removed before we read it"),
+   quotearg_colon (p)));
+  else
+    readlink_diag (p);
   return;
  }
       buffer[size] = '\0';
diff -urpN tar-1.20.orig/src/incremen.c tar-1.20/src/incremen.c
--- tar-1.20.orig/src/incremen.c 2008-04-14 12:03:13 +0000
+++ tar-1.20/src/incremen.c 2008-12-23 12:10:22 +0000
@@ -347,7 +347,10 @@ update_parent_directory (const char *nam
     {
       struct stat st;
       if (deref_stat (dereference_option, p, &st) != 0)
- stat_diag (name);
+ {
+  if (errno != ENOENT)
+    stat_diag (name);
+ }
       else
  directory->mtime = get_stat_mtime (&st);
     }
@@ -597,7 +600,8 @@ scan_directory (char *dir, dev_t device)
 
   if (deref_stat (dereference_option, name_buffer, &stat_data))
     {
-      stat_diag (name_buffer);
+      if (errno != ENOENT)
+ stat_diag (name_buffer);
       /* FIXME: used to be
            children = CHANGED_CHILDREN;
  but changed to: */
@@ -639,7 +643,8 @@ scan_directory (char *dir, dev_t device)
     {
       if (deref_stat (dereference_option, name_buffer, &stat_data))
  {
-  stat_diag (name_buffer);
+  if (errno != ENOENT)
+    stat_diag (name_buffer);
   *entry = 'N';
   continue;
  }

Re: error handling and exit code (patch for tar-1.20/1.21)

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Solar Designer <solar@...> ha escrit:

> I'd like to submit upstream a patch from Openwall GNU/*/Linux that we
> came up with in Nov-Dec 2008, and which we've extensively tested since
> then

Thanks a lot. These are very reasonable changes. I have one doubt,
though: are you sure that a file disappearing from under the running tar
can be treated as unimportant error, especially when in incremental backup
mode?

Regards,
Sergey



Re: error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Sergey,

Thank you for considering these changes, and for your response.

On Wed, Mar 04, 2009 at 11:13:12AM +0200, Sergey Poznyakoff wrote:
> Thanks a lot. These are very reasonable changes. I have one doubt,
> though: are you sure that a file disappearing from under the running tar
> can be treated as unimportant error,

It has to be treated that way when backing up live systems.

> especially when in incremental backup mode?

Yes, "especially" is the right word here, because when doing incremental
backups tar pre-builds filelists, which makes it run into this problem
more often as the delay between the time a filelist is built and the time
a certain directory is finally backed up is substantial.  Not ignoring
this "error" renders the exit code useless.

Unfortunately, there's the problem described in the lengthy comment
added by the patch - there's currently no way for dump_file() to know
whether a filename was explicitly given to tar (such as via the
command-line) or obtained by tar during initial traversal of the
directory trees.  (This problem is specific to incremental backups.)
I think this may be remedied with a subsequent patch, to be
developed/tested/reviewed/committed at a later time if someone invests
the time.  It'd be best to not ignore disappearing files if those were
given to tar explicitly, but to me that's not a good enough reason to
block the current patch from inclusion, because right now the exit code
with incremental backups of live systems is practically useless.

Also, there's the question whether a disappearing file (that was not
given to tar explicitly) should be treated as no error at all or as a
benign error (warning printed and/or exit code set to TAREXIT_DIFFERS).
In unpatched tar 1.20/1.21, this differs by context - there are a few
checks for ENOENT already.  The patch simply covers additional cases,
which I think were previously missed, and it also treats those ENOENT's
differently in different places in the code.  I tried to keep this
inconsistency consistent with the way it was in unpatched tar 1.20/1.21,
and to apply my common sense.  Maybe a review of all of those cases, not
only those being patched now, is in order - but I think it should be
approached as a separate task.

If at a later time we choose to reflect disappearing files in the exit
code in any way, then maybe another exit code should be introduced
(neither TAREXIT_DIFFERS, not TAREXIT_FAILURE), or maybe a command-line
option added to control whether tar should exit with TAREXIT_DIFFERS in
those cases.  I'd wait to see if there's any demand for that, though.
On backup setups that I co-administer, we'd ignore those "errors"
anyway, whereas TAREXIT_DIFFERS (which usually corresponds to the "file
changed as we read it" warning from tar) results in a warning appearing
inside backup reports, and TAREXIT_FAILURE results in the Subject line
changed and in a Nagios alert.

Finally, I find the choice of specific exit code values for
TAREXIT_DIFFERS and TAREXIT_FAILURE unfortunate.  In older versions of
tar (such as 1.15.1 to be specific), they were swapped, which I think
worked better.  We need to treat TAREXIT_DIFFERS as unimportant, even
though it currently corresponds to exit code 1, which is also commonly
produced by system libraries on unexpected/critical errors.  I think
it'd be best to leave exit code 1 alone (not have tar itself ever
produce it), or to un-swap these two exit codes (if compatibility with
older versions of tar is a plus).  (While we're at it, besides exit
code 1, another one that I'd keep "reserved for libraries" is 255 aka -1.)
I don't know why those exit codes were swapped (in paxutils?), so I am a
bit out of context here, and obviously I am of the opinion that this
issue should be discussed and maybe patched separately.

Thanks again,

Alexander



Re: error handling and exit code (patch for tar-1.20/1.21)

by Gene Heskett :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Wednesday 04 March 2009, Sergey Poznyakoff wrote:

>Solar Designer <solar@...> ha escrit:
>> I'd like to submit upstream a patch from Openwall GNU/*/Linux that we
>> came up with in Nov-Dec 2008, and which we've extensively tested since
>> then
>
>Thanks a lot. These are very reasonable changes. I have one doubt,
>though: are you sure that a file disappearing from under the running tar
>can be treated as unimportant error, especially when in incremental backup
>mode?
>
>Regards,
>Sergey

This is something I get during an amanda backup run, and the only cause is
always kmail doing background maintenance tasks that result in messages being
purged for old age reasons in some of the mailing list folders.  As its
advisory only, and isn't an exit causing error, I would much prefer that the
current behavior be maintained.  I _do_ want to be advised of it, and am in
the emails I get from amanda, so it is a valuable trouble shooting tool IMO.

--
Cheers, Gene
"There are four boxes to be used in defense of liberty:
 soap, ballot, jury, and ammo. Please use in that order."
-Ed Howdershelt (Author)
When you are at Rome live in the Roman style; when you are elsewhere live
as they live elsewhere.
                -- St. Ambrose




Re: error handling and exit code (patch for tar-1.20/1.21)

by John Hein :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Sergey Poznyakoff wrote at 11:13 +0200 on Mar  4, 2009:
 > Solar Designer <solar@...> ha escrit:
 >
 > > I'd like to submit upstream a patch from Openwall GNU/*/Linux that we
 > > came up with in Nov-Dec 2008, and which we've extensively tested since
 > > then
 >
 > Thanks a lot. These are very reasonable changes. I have one doubt,
 > though: are you sure that a file disappearing from under the running tar
 > can be treated as unimportant error, especially when in incremental backup
 > mode?

I've been using this patch locally to avoid the troublesome exit error
on 'file changed' if --ignore-failed-read is used...

===========================
As recently as 1.15.1, --ignore-failed-read used to prevent tar from
exiting with non-zero exit status for 'file changed'.

--- src/create.c.orig Mon Apr 14 06:03:12 2008
+++ src/create.c Thu Sep  4 11:22:31 2008
@@ -1658,7 +1658,8 @@
     {
       WARN ((0, 0, _("%s: file changed as we read it"),
      quotearg_colon (p)));
-      if (exit_status == TAREXIT_SUCCESS)
+      if (exit_status == TAREXIT_SUCCESS
+          && ! ignore_failed_read_option)
  exit_status = TAREXIT_DIFFERS;
     }
   else if (atime_preserve_option == replace_atime_preserve
===========================



Re: error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi Sergey,

On Wed, Mar 04, 2009 at 11:13:12AM +0200, Sergey Poznyakoff wrote:

> Solar Designer <solar@...> ha escrit:
>
> > I'd like to submit upstream a patch from Openwall GNU/*/Linux that we
> > came up with in Nov-Dec 2008, and which we've extensively tested since
> > then
>
> Thanks a lot. These are very reasonable changes. I have one doubt,
> though: are you sure that a file disappearing from under the running tar
> can be treated as unimportant error, especially when in incremental backup
> mode?

As you probably recall, I replied to the above message of yours in early
March - but then the discussion appears to have died, and no relevant
changes appear to be in the git repository.  Any status update?  Do you
possibly require the patch to be revised in some way (what way?) before
you can commit it?

My reply was:

http://lists.gnu.org/archive/html/bug-tar/2009-03/msg00011.html

BTW, one of the changes made with the patch is an obvious bug fix:

+++ tar-1.20/src/create.c 2008-12-23 12:18:20 +0000
@@ -1078,7 +1078,7 @@ dump_regular_file (int fd, struct tar_st
    size_left),
  quotearg_colon (st->orig_file_name),
  STRINGIFY_BIGINT (size_left, buf)));
-  if (! ignore_failed_read_option)
+  if (! ignore_failed_read_option && exit_status == TAREXIT_SUCCESS)
     exit_status = TAREXIT_DIFFERS;
   pad_archive (size_left - (bufsize - count));
   return dump_status_short;

Perhaps you can start by applying this one?

Thanks,

Alexander



Re: error handling and exit code (patch for tar-1.20/1.21)

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hello Alexander,

Current git master implements the functionality proposed by your
posting on 2009-03-29.  It is partially based on your patch.

The new behavior is as follows:

If a file or directory is removed while incremental dump is
in progress, tar exact actions depend on whether this file
was explicitly listed in the command line, or was gathered
during the file system scan.

If the file was explicitly listed in the command line, tar
issues error message and exits with the code 2, which means
fatal error.

Otherwise, if the file was gathered during the file system
scan, tar issues a warning, saying "File removed before we read it",
and sets the exit code to 1, which means "some files differ".
Additionally, if the --warning=no-file-removed option is given, no
warning is issued and exit code remains 0.

Would you please test it and inform me if it needs any further
improvements?

Thank you.

--
Sergey




Re: error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi Sergey,

On Sat, Aug 08, 2009 at 08:00:21PM +0300, Sergey Poznyakoff wrote:
> Current git master implements the functionality proposed by your
> posting on 2009-03-29.  It is partially based on your patch.

Thank you for taking care of this!

> The new behavior is as follows:
...

Sounds good to me.

> Would you please test it and inform me if it needs any further
> improvements?

I'd like to test it by updating the Owl-current package of tar to this
version, then deploying it on a few systems.  To do this, I'd prefer to
create a patch against the 1.22 release.  I've just tried the following:

lftpget http://ftp.gnu.org/gnu/tar/tar-1.22.tar.bz2
tar xjf tar-1.22.tar.bz2
git clone git://git.sv.gnu.org/tar.git
cd tar
git diff -r release_1_22 > ../tar-1.22-up-20090808.diff
cd ../tar-1.22
yes n | patch -p1 < ../tar-1.22-up-20090808.diff &> ../patchlog

...and I don't like some of what I see in "patchlog", which includes
some rejects on changes to Makefile.am's.  Of course, I could fix this
manually, then regenerate the patch, but I feel that I am doing
something wrong.  Any suggestions?

Also, is tar.git even complete?  It appears to be missing some libs
found in tar releases.

What is your recommended way to build tar from git?

In fact, would you possibly make the planned(?) 1.22.90 alpha release now?

Thanks again,

Alexander



Re: error handling and exit code (patch for tar-1.20/1.21)

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Solar Designer <solar@...> ha escrit:

> ...and I don't like some of what I see in "patchlog", which includes
> some rejects on changes to Makefile.am's.  Of course, I could fix this
> manually, then regenerate the patch, but I feel that I am doing
> something wrong.  Any suggestions?
[...]
> In fact, would you possibly make the planned(?) 1.22.90 alpha release now?

Yes, it is already available from ftp://alpha.gnu.org/gnu.  Please,
download it instead of patching 1.22.

> Also, is tar.git even complete?  It appears to be missing some libs
> found in tar releases.

It does not include files which are taken from other projects (gnulib and
paxutils) and files which are generated automatically during the bootstrap.

> What is your recommended way to build tar from git?

The git sources must be "bootstrapped" first.  This process and
prerequisites to it are described in detail in the file README-hacking.

Regards,
Sergey



Re: error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi Sergey,

On Sat, Aug 08, 2009 at 08:00:21PM +0300, Sergey Poznyakoff wrote:
> Current git master implements the functionality proposed by your
> posting on 2009-03-29.  It is partially based on your patch.
[...]
> Would you please test it and inform me if it needs any further
> improvements?

I've updated the Owl package of tar to 1.22.90 with some patches, and
I've installed that on a couple of systems where incremental backups are
done nightly.  We'll see how it performs.  I intend to slowly deploy it
(or a newer version) on more systems.

While making this update, I've reviewed your changes against my older
tar-1.20-owl-error-handling.diff patch.  It appears that you missed(?)
two hunks: one in create.c: dump_regular_file() and the other in
incremen.c: scan_directory().  Additionally, I think that the call to
dir_removed_diag() in scan_directory() should pass cmdline, not false,
for top_level.  I've attached a patch fixing these three issues.

As usual, our current tar patches are available at:

http://cvsweb.openwall.com/cgi/cvsweb.cgi/Owl/packages/tar/

Thanks,

Alexander

diff -urp tar-1.22.90.orig/src/create.c tar-1.22.90/src/create.c
--- tar-1.22.90.orig/src/create.c 2009-08-09 07:38:01 +0000
+++ tar-1.22.90/src/create.c 2009-08-17 03:05:44 +0000
@@ -1080,7 +1080,7 @@ dump_regular_file (int fd, struct tar_st
       size_left),
     quotearg_colon (st->orig_file_name),
     STRINGIFY_BIGINT (size_left, buf)));
-  if (! ignore_failed_read_option)
+  if (! ignore_failed_read_option && exit_status == TAREXIT_SUCCESS)
     exit_status = TAREXIT_DIFFERS;
   pad_archive (size_left - (bufsize - count));
   return dump_status_short;
diff -urp tar-1.22.90.orig/src/incremen.c tar-1.22.90/src/incremen.c
--- tar-1.22.90.orig/src/incremen.c 2009-08-09 07:38:01 +0000
+++ tar-1.22.90/src/incremen.c 2009-08-17 03:12:49 +0000
@@ -709,7 +709,7 @@ scan_directory (char *dir, dev_t device,
   
   if (deref_stat (dereference_option, name_buffer, &stat_data))
     {
-      dir_removed_diag (name_buffer, false, stat_diag);
+      dir_removed_diag (name_buffer, cmdline, stat_diag);
       /* FIXME: used to be
            children = CHANGED_CHILDREN;
  but changed to: */
@@ -760,7 +760,7 @@ scan_directory (char *dir, dev_t device,
     {
       if (deref_stat (dereference_option, name_buffer, &stat_data))
  {
-  stat_diag (name_buffer);
+  file_removed_diag (name_buffer, false, stat_diag);
   *entry = 'N';
   continue;
  }

Re: error handling and exit code (patch for tar-1.20/1.21)

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Sergey,

My installs of tar-1.22.90-owl1 have been running just as well as those
of tar-1.20-owl4 did so far.  All of them are doing incremental backups.

However, I've learned of a few issues:

In the testsuite, test 26 reliably fails when running with umask 077.

 26: restoring mode on existing directory            FAILED (extrac08.at:36)

In the diffs, I see:

-755
+700

Maybe the test needs to use the "p" option or maybe it needs to use
umask 022?  This depends on what exactly it is meant to test.

Then, test 38 fails intermittently.  It appears to be more likely to fail
during the first build on a system than during subsequent rebuilds.  In
the diffs, I see:

-tar: dir/file1: File removed before we read it
+tar: dir/file1: file changed as we read it

This must be some sort of a race condition.  I'm not sure if it needs to
be fixed in tar itself or in the test - I did not look into it.

Finally, as it relates to the tests, it was reported to me that test 12
fails on some systems (where all other tests pass):

 12: transforming hard links on create               FAILED (xform-h.at:39)

I am unable to reproduce this myself, and I have no diffs from the
reporter (yet).  Sorry for the lack of info on this one.

Now to a real issue:

On one of the systems still running 1.20-owl4, I just got tar to exit
with TAREXIT_FAILURE upon getting this error:

tar: ./home/clients/databases/b_main: Cannot savedir: No such file or directory

The directory above was removed while tar was running.  All other error
messages from tar were usual (some "File removed before we read it" and
"socket ignored" ones), so I suspect that only the one I quoted above
was the reason for the TAREXIT_FAILURE exit code.  I've just upgraded
that system to 1.22.90-owl1, but I don't expect it to make a difference
(it might take months until the issue shows up again, though).

In 1.22.90's src/incremen.c: scan_directory() we have this:

  char *dirp = savedir (dir);   /* for scanning directory */
[...]
  if (! dirp)
    savedir_error (dir);

Perhaps we need to patch this in our usual way?

Maybe we also need to check for other calls to savedir().

Thanks,

Alexander



Re: error handling and exit code (patch for tar-1.20/1.21)

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Solar Designer <solar@...> ha escrit:

>  26: restoring mode on existing directory            FAILED (extrac08.at:36)

Yes, this testcase should set umask.

> Then, test 38 fails intermittently.  It appears to be more likely to fail
> during the first build on a system than during subsequent rebuilds.  In
> the diffs, I see:

There is a race condition in the test. I'll fix it.

> Finally, as it relates to the tests, it was reported to me that test 12
> fails on some systems (where all other tests pass):
>
>  12: transforming hard links on create               FAILED (xform-h.at:39)

What systems are these? Perhaps they don't support hard links at all?

>   char *dirp = savedir (dir);   /* for scanning directory */
> [...]
>   if (! dirp)
>     savedir_error (dir);
>
> Perhaps we need to patch this in our usual way?

Looks reasonable. I'll see what can be done.

Regards,
Sergey



Re: error handling and exit code (patch for tar-1.20/1.21)

by Dmitry V. Levin :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Tue, Aug 25, 2009 at 10:37:13AM +0300, Sergey Poznyakoff wrote:

> Solar Designer ha escrit:
>
> >  26: restoring mode on existing directory            FAILED (extrac08.at:36)
>
> Yes, this testcase should set umask.
>
> > Then, test 38 fails intermittently.  It appears to be more likely to fail
> > during the first build on a system than during subsequent rebuilds.  In
> > the diffs, I see:
>
> There is a race condition in the test. I'll fix it.
>
> > Finally, as it relates to the tests, it was reported to me that test 12
> > fails on some systems (where all other tests pass):
> >
> >  12: transforming hard links on create               FAILED (xform-h.at:39)
>
> What systems are these? Perhaps they don't support hard links at all?
This test appears to be sensitive to file names order: if
"ls -U basedir" outputs "test test_link", then xform-h.at works,
otherwise, if it outputs "test_link test", then this test fails.


--
ldv


attachment0 (204 bytes) Download Attachment

Re: error handling and exit code (patch for tar-1.20/1.21)

by Dmitry V. Levin :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Tue, Aug 25, 2009 at 04:50:09PM +0400, Dmitry V. Levin wrote:

> On Tue, Aug 25, 2009 at 10:37:13AM +0300, Sergey Poznyakoff wrote:
> > Solar Designer ha escrit:
> >
> > >  26: restoring mode on existing directory            FAILED (extrac08.at:36)
> >
> > Yes, this testcase should set umask.
> >
> > > Then, test 38 fails intermittently.  It appears to be more likely to fail
> > > during the first build on a system than during subsequent rebuilds.  In
> > > the diffs, I see:
> >
> > There is a race condition in the test. I'll fix it.
> >
> > > Finally, as it relates to the tests, it was reported to me that test 12
> > > fails on some systems (where all other tests pass):
> > >
> > >  12: transforming hard links on create               FAILED (xform-h.at:39)
> >
> > What systems are these? Perhaps they don't support hard links at all?
>
> This test appears to be sensitive to file names order: if
> "ls -U basedir" outputs "test test_link", then xform-h.at works,
> otherwise, if it outputs "test_link test", then this test fails.
Proposed fix:

--- a/tar/tests/xform-h.at
+++ b/tar/tests/xform-h.at
@@ -32,7 +32,7 @@ AT_KEYWORDS([transform xform xform-h])
 
 m4_define([xform],[
 echo "$1"
-tar cf archive --transform="s,^basedir/,,$2" basedir
+tar cf archive --transform="s,^basedir/,,$2" basedir/*
 tar tvf archive | sed -n 's/.*test_link link to //p'
 ])
 

--
ldv


attachment0 (204 bytes) Download Attachment

Re: error handling and exit code (patch for tar-1.20/1.21)

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Dmitry V. Levin <ldv@...> ha escrit:

> This test appears to be sensitive to file names order: if
> "ls -U basedir" outputs "test test_link", then xform-h.at works,
> otherwise, if it outputs "test_link test", then this test fails.

Yes, that's right. Thanks for reporting.

Regards,
Sergey



savedir() error handling (was: error handling and exit code (patch for tar-1.20/1.21))

by Solar Designer :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Sergey,

On Tue, Aug 25, 2009 at 10:37:13AM +0300, Sergey Poznyakoff wrote:
> Solar Designer <solar@...> ha escrit:
> >   char *dirp = savedir (dir);   /* for scanning directory */
> > [...]
> >   if (! dirp)
> >     savedir_error (dir);
> >
> > Perhaps we need to patch this in our usual way?
>
> Looks reasonable. I'll see what can be done.

I just had this occur (TAREXIT_FAILURE on a removed deep directory) on a
system that was already running 1.22.90-owl1, confirming that it is
indeed still affected.

Are you going to propose a patch for this, or should I?

tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: Cannot savedir: No such file or directory
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: Directory removed before we read it
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/solution: Cannot savedir: No such file or directory
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/solution: Directory removed before we read it
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: File removed before we read it
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/blog.html: File removed before we read it
[...]
tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/node/3111.html: File removed before we read it
tar: Exiting with failure status due to previous errors

Thanks,

Alexander



Re: savedir() error handling

by Jean-Louis Martineau-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

I sent a patch a few days for the same problem. But my user user was not
able to test it.
The patch call savedir only if needed.

If you can give it a try.

Jean-Louis


Solar Designer wrote:

> Sergey,
>
> On Tue, Aug 25, 2009 at 10:37:13AM +0300, Sergey Poznyakoff wrote:
>  
>> Solar Designer <solar@...> ha escrit:
>>    
>>>   char *dirp = savedir (dir);   /* for scanning directory */
>>> [...]
>>>   if (! dirp)
>>>     savedir_error (dir);
>>>
>>> Perhaps we need to patch this in our usual way?
>>>      
>> Looks reasonable. I'll see what can be done.
>>    
>
> I just had this occur (TAREXIT_FAILURE on a removed deep directory) on a
> system that was already running 1.22.90-owl1, confirming that it is
> indeed still affected.
>
> Are you going to propose a patch for this, or should I?
>
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: Cannot savedir: No such file or directory
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: Directory removed before we read it
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/solution: Cannot savedir: No such file or directory
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/solution: Directory removed before we read it
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/group: File removed before we read it
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/blog.html: File removed before we read it
> [...]
> tar: ./home/clients/websites/w_client/drupal/5.10/cache/example.org/0/node/3111.html: File removed before we read it
> tar: Exiting with failure status due to previous errors
>
> Thanks,
>
> Alexander
>
>
>  

diff --git a/src/incremen.c b/src/incremen.c
index bb2ba2f..6da029a 100644
--- a/src/incremen.c
+++ b/src/incremen.c
@@ -691,7 +691,7 @@ makedumpdir (struct directory *directory, const char *dir)
 struct directory *
 scan_directory (char *dir, dev_t device, bool cmdline)
 {
-  char *dirp = savedir (dir); /* for scanning directory */
+  char *dirp = NULL; /* for scanning directory */
   char *name_buffer; /* directory, `/', and directory member */
   size_t name_buffer_size; /* allocated size of name_buffer, minus 2 */
   size_t name_length; /* used length in name_buffer */
@@ -699,8 +699,6 @@ scan_directory (char *dir, dev_t device, bool cmdline)
   struct directory *directory;
   char ch;
   
-  if (! dirp)
-    savedir_error (dir);
 
   name_buffer_size = strlen (dir) + NAME_FIELD_SIZE;
   name_buffer = xmalloc (name_buffer_size + 2);
@@ -714,7 +712,6 @@ scan_directory (char *dir, dev_t device, bool cmdline)
            children = CHANGED_CHILDREN;
  but changed to: */
       free (name_buffer);
-      free (dirp);
       return NULL;
     }
 
@@ -730,6 +727,13 @@ scan_directory (char *dir, dev_t device, bool cmdline)
       name_buffer[++name_length] = 0;
     }
 
+  if (directory->children != NO_CHILDREN)
+    {
+      dirp = savedir (dir);
+      if (! dirp)
+        savedir_error (dir);
+    }
+
   if (dirp && directory->children != NO_CHILDREN)
     {
       char *entry; /* directory entry being scanned */

Re: savedir() error handling

by Sergey Poznyakoff-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jean-Louis Martineau <martineau@...> ha escrit:

> I sent a patch a few days for the same problem. But my user user was
> not able to test it.

Hi Jean-Louis! Thanks for reminding me about the patch.

> The patch call savedir only if needed.
>
> If you can give it a try.

Yes, Alexander, please do.

Regards,
Sergey