[PATCH] md5: accepts a new --threads option

View: New views
20 Messages — Rating Filter:   Alert me  
< Prev | 1 - 2 - 3 - 4 - 5 | Next >

[PATCH] md5: accepts a new --threads option

by Giuseppe Scrivano-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hello,

inspired by the attempt to make `sort' multi-threaded, I added threads
support to md5sum and the sha* programs family.  It has effect only when
multiple files are specified.

Any comment?

Cheers,
Giuseppe



From 1e4ed081f41ac0955542d3a0f1ad143047b8ac25 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivano@...>
Date: Sun, 18 Oct 2009 00:19:25 +0200
Subject: [PATCH] md5: accepts a new --threads option

* NEWS: Mention it.
* bootstrap.conf: Use the `nproc' and `pthread' modules from gnulib.
* doc/coreutils.texi: Document the new feature.
* src/Makefile.am (md5sum, sha1sum, sha224, sha256, sha384, sha512):
Link to the pthread library.
* src/md5sum.c (main): Add --threads and move some code into new
functions.
(long_options, usage): Add --threads.
(do_file): New function.
(thread_start): New function.
(check_files): New function.
* tests/misc/md5sum: Test the new --threads option.
* tests/misc/sha1sum: Ditto.
* tests/misc/sha224sum: Ditto.
* tests/misc/sha256sum: Ditto.
* tests/misc/sha384sum: Ditto.
* tests/misc/sha512sum: Ditto.
---
 NEWS                 |    3 +
 bootstrap.conf       |    2 +
 doc/coreutils.texi   |    8 ++
 src/Makefile.am      |   12 ++--
 src/md5sum.c         |  234 +++++++++++++++++++++++++++++++++++++-------------
 tests/misc/md5sum    |    6 ++
 tests/misc/sha1sum   |    6 ++
 tests/misc/sha224sum |    6 ++
 tests/misc/sha256sum |    6 ++
 tests/misc/sha384sum |    6 ++
 tests/misc/sha512sum |    6 ++
 11 files changed, 230 insertions(+), 65 deletions(-)

diff --git a/NEWS b/NEWS
index f8269fc..70af0b3 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,9 @@ GNU coreutils NEWS                                    -*- outline -*-
   md5sum --check now also accepts openssl-style checksums.
   So do sha1sum, sha224sum, sha384sum and sha512sum.
 
+  md5sum, sha1sum, sha224sum, sha384sum and sha512sum accept a new option
+  --threads to improve parallelism when multiple files are specified.
+
 
 * Noteworthy changes in release 8.0 (2009-10-06) [beta]
 
diff --git a/bootstrap.conf b/bootstrap.conf
index e9b198c..fb3304d 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -155,6 +155,7 @@ gnulib_modules="
   mktime
   modechange
   mountlist
+  nproc
   mpsort
   obstack
   pathmax
@@ -166,6 +167,7 @@ gnulib_modules="
   priv-set
   progname
   propername
+  pthread
   putenv
   quote
   quotearg
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 5026e76..b81cb81 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3496,6 +3496,14 @@ distinguish between binary and text files.  On other systems, it is
 the default for reading standard input when standard input is a
 terminal.
 
+@itemx --threads=@var{n}
+@opindex --threads
+@cindex verifying MD5 checksums
+Use up to @var{n} threads when multiple files are specified.  If a
+value is not specified then the number of processors is used.  The
+number of threads used is limited by the number of specified files
+thus in any case are not created more threads than files.
+
 @item -w
 @itemx --warn
 @opindex -w
diff --git a/src/Makefile.am b/src/Makefile.am
index 915ea81..33d2563 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -220,7 +220,7 @@ link_LDADD = $(LDADD)
 ln_LDADD = $(LDADD)
 logname_LDADD = $(LDADD)
 ls_LDADD = $(LDADD)
-md5sum_LDADD = $(LDADD)
+md5sum_LDADD = $(LDADD) $(LIB_PTHREAD)
 mkdir_LDADD = $(LDADD)
 mkfifo_LDADD = $(LDADD)
 mknod_LDADD = $(LDADD)
@@ -244,11 +244,11 @@ rmdir_LDADD = $(LDADD)
 runcon_LDADD = $(LDADD)
 seq_LDADD = $(LDADD)
 setuidgid_LDADD = $(LDADD)
-sha1sum_LDADD = $(LDADD)
-sha224sum_LDADD = $(LDADD)
-sha256sum_LDADD = $(LDADD)
-sha384sum_LDADD = $(LDADD)
-sha512sum_LDADD = $(LDADD)
+sha1sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha224sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha256sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha384sum_LDADD = $(LDADD) $(LIB_PTHREAD)
+sha512sum_LDADD = $(LDADD) $(LIB_PTHREAD)
 shred_LDADD = $(LDADD)
 shuf_LDADD = $(LDADD)
 sleep_LDADD = $(LDADD)
diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..161f1a6 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -20,8 +20,11 @@
 
 #include <getopt.h>
 #include <sys/types.h>
+#include <pthread.h>
 
 #include "system.h"
+#include "nproc.h"
+#include "xstrtol.h"
 
 #if HASH_ALGO_MD5
 # include "md5.h"
@@ -126,7 +129,8 @@ static bool quiet = false;
 enum
 {
   STATUS_OPTION = CHAR_MAX + 1,
-  QUIET_OPTION
+  QUIET_OPTION,
+  THREADS_OPTION
 };
 
 static struct option const long_options[] =
@@ -136,12 +140,28 @@ static struct option const long_options[] =
   { "quiet", no_argument, NULL, QUIET_OPTION },
   { "status", no_argument, NULL, STATUS_OPTION },
   { "text", no_argument, NULL, 't' },
+  { "threads", optional_argument, NULL, THREADS_OPTION},
   { "warn", no_argument, NULL, 'w' },
   { GETOPT_HELP_OPTION_DECL },
   { GETOPT_VERSION_OPTION_DECL },
   { NULL, 0, NULL, 0 }
 };
 
+
+struct thread_arg
+{
+  char **files;
+  int n_files;
+  unsigned char **bin_buffer;
+  bool *res;
+  int *file_is_binary;
+  bool do_check;
+  bool *busy;
+
+  /* Protect BUSY.  */
+  pthread_mutex_t mutex;
+};
+
 void
 usage (int status)
 {
@@ -179,6 +199,8 @@ With no FILE, or when FILE is -, read standard input.\n\
   -t, --text              read in text mode (default)\n\
 "), stdout);
       fputs (_("\
+      --threads=N           use up to N threads\n"), stdout);
+      fputs (_("\
 \n\
 The following three options are useful only when verifying checksums:\n\
       --quiet             don't print OK for each successfully verified file\n\
@@ -599,16 +621,154 @@ digest_check (const char *checkfile_name)
           && n_open_or_read_failures == 0);
 }
 
+static void
+do_file (struct thread_arg *ts, int j)
+{
+  char *file = ts->files[j];
+  if (ts->do_check)
+    ts->res[j] = digest_check (file);
+  else
+    ts->res[j] = digest_file (file, &ts->file_is_binary[j],
+                              ts->bin_buffer[j]);
+}
+
+static void*
+thread_start (void *arg)
+{
+  struct thread_arg *ts = arg;
+  int current = 0;
+  while (1)
+    {
+      pthread_mutex_lock (&ts->mutex);
+
+      while (current < ts->n_files && ts->busy[current])
+        current++;
+
+      if (current < ts->n_files)
+        ts->busy[current] = true;
+
+      pthread_mutex_unlock (&ts->mutex);
+
+      /* No other files, exit from the thread.  */
+      if (ts->n_files <= current)
+        return NULL;
+
+      do_file (ts, current++);
+    }
+
+  return NULL;
+}
+
+static bool
+check_files (char **files, unsigned long n_threads, int n_files, bool do_check,
+             int binary)
+{
+  int j;
+  int ok = 1;
+  unsigned char *bin_buffer_unaligned = xnmalloc (DIGEST_BIN_BYTES
+                                                  + DIGEST_ALIGN, n_files);
+  unsigned char *bin_buffer[n_files];
+  bool res[n_files];
+  bool busy[n_files];
+  int file_is_binary[n_files];
+  pthread_t tids[n_threads - 1];
+
+  struct thread_arg ts =
+    {
+      .bin_buffer = bin_buffer,
+      .busy = busy,
+      .do_check = do_check,
+      .files = files,
+      .file_is_binary = file_is_binary,
+      .mutex = PTHREAD_MUTEX_INITIALIZER,
+      .n_files = n_files,
+      .res = res,
+    };
+
+  for (int j = 0; j < n_files; j++)
+    {
+      /* Make sure bin_buffer is properly aligned. */
+      unsigned char *tmp = &bin_buffer_unaligned[j * (DIGEST_BIN_BYTES
+                                                      + DIGEST_ALIGN)];
+      bin_buffer[j] = ptr_align (tmp, DIGEST_ALIGN);
+      file_is_binary[j] = binary;
+      ts.busy[j] = false;
+    }
+
+  for (j = 0; j < n_threads - 1; j++)
+    if (pthread_create (&tids[j], NULL, thread_start, &ts))
+      error (EXIT_FAILURE, errno, "cannot spawn a new thread");
+
+  /* Use the main thread as a regular thread.  */
+  thread_start (&ts);
+
+  for (j = 0; j < n_threads - 1; j++)
+    pthread_join (tids[j], NULL);
+
+  for (j = 0; j < n_files; j++)
+    {
+      char *file = files[j];
+
+      if (do_check)
+        ok &= res[j];
+      else
+        {
+          if (! res[j])
+            ok = false;
+          else
+            {
+              size_t i;
+
+              /* Output a leading backslash if the file name contains
+                 a newline or backslash.  */
+              if (strchr (file, '\n') || strchr (file, '\\'))
+                putchar ('\\');
+
+              for (i = 0; i < (digest_hex_bytes / 2); ++i)
+                printf ("%02x", bin_buffer[j][i]);
+
+              putchar (' ');
+              if (file_is_binary[j])
+                putchar ('*');
+              else
+                putchar (' ');
+
+              /* Translate each NEWLINE byte to the string, "\\n",
+                 and each backslash to "\\\\".  */
+              for (i = 0; i < strlen (file); ++i)
+                {
+                  switch (file[i])
+                    {
+                    case '\n':
+                      fputs ("\\n", stdout);
+                      break;
+
+                    case '\\':
+                      fputs ("\\\\", stdout);
+                      break;
+
+                    default:
+                      putchar (file[i]);
+                      break;
+                    }
+                }
+              putchar ('\n');
+            }
+        }
+    }
+  free (bin_buffer_unaligned);
+
+  return ok > 0;
+}
+
 int
 main (int argc, char **argv)
 {
-  unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
-  /* Make sure bin_buffer is properly aligned. */
-  unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
-  bool do_check = false;
   int opt;
-  bool ok = true;
+  bool ok;
   int binary = -1;
+  bool do_check = false;
+  unsigned long n_threads = 1;
 
   /* Setting values of global variables.  */
   initialize_main (&argc, &argv);
@@ -646,6 +806,12 @@ main (int argc, char **argv)
         warn = false;
         quiet = true;
         break;
+      case THREADS_OPTION:
+        if (optarg)
+          xstrtoul (optarg, NULL, 10, &n_threads, "");
+        else
+          n_threads = num_processors ();
+        break;
       case_GETOPT_HELP_CHAR;
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
       default:
@@ -689,59 +855,9 @@ main (int argc, char **argv)
   if (optind == argc)
     argv[argc++] = bad_cast ("-");
 
-  for (; optind < argc; ++optind)
-    {
-      char *file = argv[optind];
-
-      if (do_check)
-        ok &= digest_check (file);
-      else
-        {
-          int file_is_binary = binary;
-
-          if (! digest_file (file, &file_is_binary, bin_buffer))
-            ok = false;
-          else
-            {
-              size_t i;
-
-              /* Output a leading backslash if the file name contains
-                 a newline or backslash.  */
-              if (strchr (file, '\n') || strchr (file, '\\'))
-                putchar ('\\');
-
-              for (i = 0; i < (digest_hex_bytes / 2); ++i)
-                printf ("%02x", bin_buffer[i]);
-
-              putchar (' ');
-              if (file_is_binary)
-                putchar ('*');
-              else
-                putchar (' ');
-
-              /* Translate each NEWLINE byte to the string, "\\n",
-                 and each backslash to "\\\\".  */
-              for (i = 0; i < strlen (file); ++i)
-                {
-                  switch (file[i])
-                    {
-                    case '\n':
-                      fputs ("\\n", stdout);
-                      break;
-
-                    case '\\':
-                      fputs ("\\\\", stdout);
-                      break;
-
-                    default:
-                      putchar (file[i]);
-                      break;
-                    }
-                }
-              putchar ('\n');
-            }
-        }
-    }
+  size_t n_files = argc - optind;
+  ok = check_files (&argv[optind], MIN (n_threads, n_files), n_files, do_check,
+                    binary);
 
   if (have_read_stdin && fclose (stdin) == EOF)
     error (EXIT_FAILURE, errno, _("standard input"));
diff --git a/tests/misc/md5sum b/tests/misc/md5sum
index 30edd9e..ae49954 100755
--- a/tests/misc/md5sum
+++ b/tests/misc/md5sum
@@ -96,6 +96,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha1sum b/tests/misc/sha1sum
index d084204..2a3ca6a 100755
--- a/tests/misc/sha1sum
+++ b/tests/misc/sha1sum
@@ -82,6 +82,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha224sum b/tests/misc/sha224sum
index aace96c..0405510 100755
--- a/tests/misc/sha224sum
+++ b/tests/misc/sha224sum
@@ -41,6 +41,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha256sum b/tests/misc/sha256sum
index d85f248..e376155 100755
--- a/tests/misc/sha256sum
+++ b/tests/misc/sha256sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha384sum b/tests/misc/sha384sum
index c5818e2..0c60824 100755
--- a/tests/misc/sha384sum
+++ b/tests/misc/sha384sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
diff --git a/tests/misc/sha512sum b/tests/misc/sha512sum
index 9a45602..7e19cb9 100755
--- a/tests/misc/sha512sum
+++ b/tests/misc/sha512sum
@@ -47,6 +47,12 @@ foreach $t (@Tests)
     splice @$t, 1, 0, '--text' unless @$t[1] =~ /--check/;
   }
 
+# Insert the `--threads=2' argument for each test.
+foreach $t (@Tests)
+  {
+    splice @$t, 1, 0, '--threads=2' unless @$t[1] =~ /--check/;
+  }
+
 my $save_temps = $ENV{DEBUG};
 my $verbose = $ENV{VERBOSE};
 
--
1.6.3.3




Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Giuseppe Scrivano wrote:
> Hello,
>
> inspired by the attempt to make `sort' multi-threaded, I added threads
> support to md5sum and the sha* programs family.  It has effect only when
> multiple files are specified.
>
> Any comment?

How does it compare to:

files_per_process=10
cpus=4
find files | xargs -n$files_per_process -P$cpus md5sum

I would expect it to be a bit better as file_per_process
could be very large, thus having less overhead in starting
processes. Though is the benefit worth the extra implementation
complexity and new less general interface for users?

cheers,
Pádraig.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

> Giuseppe Scrivano wrote:
>> Hello,
>>
>> inspired by the attempt to make `sort' multi-threaded, I added threads
>> support to md5sum and the sha* programs family.  It has effect only when
>> multiple files are specified.
>>
>> Any comment?
>
> How does it compare to:
>
> files_per_process=10
> cpus=4
> find files | xargs -n$files_per_process -P$cpus md5sum
>
> I would expect it to be a bit better as file_per_process
> could be very large, thus having less overhead in starting
> processes. Though is the benefit worth the extra implementation
> complexity and new less general interface for users?

Hi James,

I was wondering would it be worth adding the nproc module to xargs
to support -P without any options to use the number of CPUs
as the value.

Hmm, I could see it being useful to specify NCPUs-1 also.
I wonder is there a general external method to determine
the number of CPUs.

cheers,
Pádraig.



Re: [PATCH] md5: accepts a new --threads option

by Giuseppe Scrivano-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi Pádraig,

> How does it compare to:
>
> files_per_process=10
> cpus=4
> find files | xargs -n$files_per_process -P$cpus md5sum
>
> I would expect it to be a bit better as file_per_process
> could be very large, thus having less overhead in starting
> processes. Though is the benefit worth the extra implementation
> complexity and new less general interface for users?

The main advantage is not to save processes startup overhead but to keep
the CPUs always busy when it is possible.

If the files have similar length then the results are approximately the
same.  It is very different when the files don't have similar length and
in this case a processor remains sleeping.

I created four files: `a' and `b' are 150Mb, differently `c' and `d' are
few Kb.  These are the results I get:

$ time ./sha1sum --threads /tmp/test_files/*
09e00486b4fb88805f7261fac1dd4c7f0ee7640e  /tmp/test_files/a
203c0607c7ebff14ecf23b37005a714f2dc19b0b  /tmp/test_files/b
3f786850e387550fdab836ed7e6dc881de23001b  /tmp/test_files/c
d7c8127a20a396cff08af086a1c695b0636f0c29  /tmp/test_files/d

real 0m1.804s



$ time find  /tmp/test_files/* | xargs -n4 -P2 ./sha1sum
09e00486b4fb88805f7261fac1dd4c7f0ee7640e  /tmp/test_files/a
203c0607c7ebff14ecf23b37005a714f2dc19b0b  /tmp/test_files/b
3f786850e387550fdab836ed7e6dc881de23001b  /tmp/test_files/c
d7c8127a20a396cff08af086a1c695b0636f0c29  /tmp/test_files/d

real 0m3.288s


Cheers,
Giuseppe



Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

> Pádraig Brady wrote:
>> Giuseppe Scrivano wrote:
>>> Hello,
>>>
>>> inspired by the attempt to make `sort' multi-threaded, I added threads
>>> support to md5sum and the sha* programs family.  It has effect only when
>>> multiple files are specified.
>>>
>>> Any comment?
>>
>> How does it compare to:
>>
>> files_per_process=10
>> cpus=4
>> find files | xargs -n$files_per_process -P$cpus md5sum
>>
>> I would expect it to be a bit better as file_per_process
>> could be very large, thus having less overhead in starting
>> processes. Though is the benefit worth the extra implementation
>> complexity and new less general interface for users?
>
> Hi James,
>
> I was wondering would it be worth adding the nproc module to xargs
> to support -P without any options to use the number of CPUs
> as the value.
>
> Hmm, I could see it being useful to specify NCPUs-1 also.
> I wonder is there a general external method to determine
> the number of CPUs.

As far as I know, there is nothing portable.
Want to add another program to coreutils?
With what Bruno has just added to gnulib's nproc module,
we should have most platforms covered.



Re: [PATCH] md5: accepts a new --threads option

by Giuseppe Scrivano-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jim Meyering <jim@...> writes:

>> Hmm, I could see it being useful to specify NCPUs-1 also.
>> I wonder is there a general external method to determine
>> the number of CPUs.
>
> As far as I know, there is nothing portable.
> Want to add another program to coreutils?
> With what Bruno has just added to gnulib's nproc module,
> we should have most platforms covered.

What about a new switch to `arch'?

Giuseppe



Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Giuseppe Scrivano wrote:

> Jim Meyering <jim@...> writes:
>
>>> Hmm, I could see it being useful to specify NCPUs-1 also.
>>> I wonder is there a general external method to determine
>>> the number of CPUs.
>>
>> As far as I know, there is nothing portable.
>> Want to add another program to coreutils?
>> With what Bruno has just added to gnulib's nproc module,
>> we should have most platforms covered.
>
> What about a new switch to `arch'?

Sorry, no.
arch is not installed by default, for portability reasons.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Giuseppe Scrivano wrote:

>
> I created four files: `a' and `b' are 150Mb, differently `c' and `d' are
> few Kb.  These are the results I get:
>
> $ time ./sha1sum --threads /tmp/test_files/*
> 09e00486b4fb88805f7261fac1dd4c7f0ee7640e  /tmp/test_files/a
> 203c0607c7ebff14ecf23b37005a714f2dc19b0b  /tmp/test_files/b
> 3f786850e387550fdab836ed7e6dc881de23001b  /tmp/test_files/c
> d7c8127a20a396cff08af086a1c695b0636f0c29  /tmp/test_files/d
>
> real 0m1.804s
>
> $ time find  /tmp/test_files/* | xargs -n4 -P2 ./sha1sum
> 09e00486b4fb88805f7261fac1dd4c7f0ee7640e  /tmp/test_files/a
> 203c0607c7ebff14ecf23b37005a714f2dc19b0b  /tmp/test_files/b
> 3f786850e387550fdab836ed7e6dc881de23001b  /tmp/test_files/c
> d7c8127a20a396cff08af086a1c695b0636f0c29  /tmp/test_files/d
>
> real 0m3.288s

With -n4 only 1 process would be started.
Could you repeat with -n1 please for comparison.
One would only need to increase -n for large numbers of files.

thanks,
Pádraig.



Re: [PATCH] md5: accepts a new --threads option

by Giuseppe Scrivano-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady <P@...> writes:

> With -n4 only 1 process would be started.
> Could you repeat with -n1 please for comparison.
> One would only need to increase -n for large numbers of files.

Sprry the mistake.  In this case results are equal.


$ time ./sha1sum --threads=2 /tmp/test_files/*
a91f3339c20255db34c8489440dcc43c004d41cb  /tmp/test_files/a
89b1e89850b377e74b027777c7cb15e480ba6311  /tmp/test_files/b
a54bc57f943632adb9982f492b145a61229c2721  /tmp/test_files/c
e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0  /tmp/test_files/d

real 0m1.821s


$ time find  /tmp/test_files/* | xargs -n2 -P2 ./sha1sum
a54bc57f943632adb9982f492b145a61229c2721  /tmp/test_files/c
e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0  /tmp/test_files/d
a91f3339c20255db34c8489440dcc43c004d41cb  /tmp/test_files/a
89b1e89850b377e74b027777c7cb15e480ba6311  /tmp/test_files/b

real 0m3.389s


$ time ls  /tmp/test_files/* | xargs -n1 -P2 ./sha1sum
89b1e89850b377e74b027777c7cb15e480ba6311  /tmp/test_files/b
a54bc57f943632adb9982f492b145a61229c2721  /tmp/test_files/c
e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0  /tmp/test_files/d
a91f3339c20255db34c8489440dcc43c004d41cb  /tmp/test_files/a

real 0m1.844s


Giuseppe



Re: [PATCH] md5: accepts a new --threads option

by Giuseppe Scrivano-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jim Meyering <jim@...> writes:

>> What about a new switch to `arch'?
>
> Sorry, no.
> arch is not installed by default, for portability reasons.

Does uname have the same problem?  It already has a "--processor"
option and IMHO it would be better to get similar information using
the same tool.

By the way, under GNU/Linux `uname --processor' returns "unknown".  Do
you think it is a good idea to read this information from "/proc"?

Cheers,
Giuseppe



Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Giuseppe Scrivano wrote:

> Jim Meyering <jim@...> writes:
>
>>> What about a new switch to `arch'?
>>
>> Sorry, no.
>> arch is not installed by default, for portability reasons.
>
> Does uname have the same problem?  It already has a "--processor"
> option and IMHO it would be better to get similar information using
> the same tool.

It's similar.  There are enough vendor-supplied variants of uname
that many installers opt not to install the one from coreutils.

> By the way, under GNU/Linux `uname --processor' returns "unknown".  Do
> you think it is a good idea to read this information from "/proc"?

Let's not go there ;-)
This has been proposed and rejected many times over the years.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

> Giuseppe Scrivano wrote:
>> Hello,
>>
>> inspired by the attempt to make `sort' multi-threaded, I added threads
>> support to md5sum and the sha* programs family.  It has effect only when
>> multiple files are specified.
>>
>> Any comment?
>
> How does it compare to:
>
> files_per_process=10
> cpus=4
> find files | xargs -n$files_per_process -P$cpus md5sum
>
> I would expect it to be a bit better as file_per_process
> could be very large, thus having less overhead in starting
> processes. Though is the benefit worth the extra implementation
> complexity and new less general interface for users?

Expanding a bit on why I don't think this should be added...

You don't gain much by splitting the work per file as
the UNIX toolkit is already well equipped as to process
multiple files in parallel with:

  find files | xargs -n$files_per_process -P$processes md5sum

That is a more general solution and works for any command
or collection of commands (script). Also more generally
the work could be split across multiple machines (in the case
where the processing cost is bigger than transmission), using
ssh or whatever:

  find files | dxargs¹ ...

Also one often wants to split the work per data source
rather than per CPU and so would need a variant of the
above rather than a contained threaded solution. Consider
the case where you have files on separate disks (heads).
You wouldn't want multiple threads/processes fighting over
the disk head so you would do something like:

  find /disk1 | xargs md5sum & find /disk2 | xargs md5sum

Note if we're piping/redirecting the output of the above
then we must be careful to line buffer the output from md5sum
so that it's not interspersed. Hmm I wonder should
we linebuffer the output from *sum by default.
In the meantime one can check for the correct output by
varying the -o parameter in the following:

  (
   find /etc | xargs ./stdbuf -oL md5sum &
   find /etc | xargs ./stdbuf -oL md5sum
  ) 2>/dev/null | sed -n '/[^ ]\{32\}/!p'

Now it's a different story if the data within a file
could be processed in parallel. I.E. if the digest
algorithms themselves could be parallelized.
The higher the processing cost compared to the I/O cost,
the bigger the benefit would be. Doing a very quick
check of these costs on my laptop...

$ timeout -sINT 10 dd bs=32K if=/dev/sda of=/dev/null
347570176 bytes (348 MB) copied, 10.004 s, 34.7 MB/s

$ timeout -sINT 10 dd bs=32K if=/dev/zero | ./md5sum
1816690688 bytes (1.8 GB) copied, 10.0002 s, 182 MB/s

$ timeout -sINT 10 dd bs=32K if=/dev/zero | ./cat >/dev/null
9205088256 bytes (9.2 GB) copied, 10.0514 s, 916 MB/s

$ timeout -sINT 10 dd bs=32K if=/dev/zero of=/dev/null
48931995648 bytes (49 GB) copied, 10.0314 s, 4.9 GB/s

Note there is some low hanging fruit with speeding up md5sum et. al.
They seem to use stdio needlessly, thus introducing data copying.
Also there is an improved sha1 floating around that's 25% more efficient.

cheers,
Pádraig.

¹ http://www.semicomplete.com/blog/geekery/distributed-xargs.html



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

>
> You wouldn't want multiple threads/processes fighting over
> the disk head so you would do something like:
>
>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>
> Note if we're piping/redirecting the output of the above
> then we must be careful to line buffer the output from md5sum
> so that it's not interspersed. Hmm I wonder should
> we linebuffer the output from *sum by default.
In the attached patch, I've changed the default buffering
to line buffered to address the above issue. For standard
size files there is a 2% performance drop.

cheers,
Pádraig.

p.s. I'll look at bypassing stdio on input to see
if I can get at least the 2% back

From 0db7057c6256d9cd25e988b3fe23e97a0e30f717 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <P@...>
Date: Tue, 20 Oct 2009 19:19:58 +0100
Subject: [PATCH] md5sum, sha*sum, sum: line buffer the outputted checksums

* src/md5sum.c (main): Set stdout to line buffered mode
to ensure parallel running instances don't intersperse
their output.  This adds 5% to the run time in the worst case
of many zero length files, or 2% with standard file sizes.
* src/sum.c (main): Likewise.
---
 src/md5sum.c |    6 ++++--
 src/sum.c    |    4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..b7db03e 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -513,7 +513,6 @@ digest_check (const char *checkfile_name)
               if (!status_only)
                 {
                   printf (_("%s: FAILED open or read\n"), filename);
-                  fflush (stdout);
                 }
             }
           else
@@ -539,7 +538,6 @@ digest_check (const char *checkfile_name)
                     printf ("%s: %s\n", filename, _("FAILED"));
                   else if (!quiet)
                     printf ("%s: %s\n", filename, _("OK"));
-                  fflush (stdout);
                 }
             }
         }
@@ -619,6 +617,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
     switch (opt)
       {
diff --git a/src/sum.c b/src/sum.c
index 91d7f34..f0e0cc0 100644
--- a/src/sum.c
+++ b/src/sum.c
@@ -233,6 +233,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   have_read_stdin = false;
 
   while ((optc = getopt_long (argc, argv, "rs", longopts, NULL)) != -1)
--
1.6.2.5


Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

> Pádraig Brady wrote:
>>
>> You wouldn't want multiple threads/processes fighting over
>> the disk head so you would do something like:
>>
>>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>>
>> Note if we're piping/redirecting the output of the above
>> then we must be careful to line buffer the output from md5sum
>> so that it's not interspersed. Hmm I wonder should
>> we linebuffer the output from *sum by default.
>
> In the attached patch, I've changed the default buffering
> to line buffered to address the above issue. For standard
> size files there is a 2% performance drop.

Good catch.
It sounds like this fixes a real (albeit obscure) bug, so this
might deserve a NEWS item, though I admit it is borderline.

Thanks!

> p.s. I'll look at bypassing stdio on input to see
> if I can get at least the 2% back

IMHO, even if it did, it would not be worth it.

>>From 0db7057c6256d9cd25e988b3fe23e97a0e30f717 Mon Sep 17 00:00:00 2001
> From: =?utf-8?q?P=C3=A1draig=20Brady?= <P@...>
> Date: Tue, 20 Oct 2009 19:19:58 +0100
> Subject: [PATCH] md5sum, sha*sum, sum: line buffer the outputted checksums

s/outputted/printed/
s/line buffer/line-buffer/

> * src/md5sum.c (main): Set stdout to line buffered mode
> to ensure parallel running instances don't intersperse
> their output.  This adds 5% to the run time in the worst case
> of many zero length files, or 2% with standard file sizes.
> * src/sum.c (main): Likewise.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jim Meyering wrote:

> Pádraig Brady wrote:
>> Pádraig Brady wrote:
>>> You wouldn't want multiple threads/processes fighting over
>>> the disk head so you would do something like:
>>>
>>>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>>>
>>> Note if we're piping/redirecting the output of the above
>>> then we must be careful to line buffer the output from md5sum
>>> so that it's not interspersed. Hmm I wonder should
>>> we linebuffer the output from *sum by default.
>> In the attached patch, I've changed the default buffering
>> to line buffered to address the above issue. For standard
>> size files there is a 2% performance drop.
>
> Good catch.
> It sounds like this fixes a real (albeit obscure) bug, so this
> might deserve a NEWS item, though I admit it is borderline.

Well it would easily be hit when one tries to parallelize the processes.
So I'll add a NEWS item and a test along the lines of:

(mkdir t && cd t && seq 100 | xargs touch)
(find t t t t -type f | xargs -n100 -P4 md5sum) | sed -n '/[0-9a-f]\{32\}  /!p' |
grep . >/dev/null && fail=1

cheers,
Pádraig.



Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:

> Jim Meyering wrote:
>> Pádraig Brady wrote:
>>> Pádraig Brady wrote:
>>>> You wouldn't want multiple threads/processes fighting over
>>>> the disk head so you would do something like:
>>>>
>>>>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>>>>
>>>> Note if we're piping/redirecting the output of the above
>>>> then we must be careful to line buffer the output from md5sum
>>>> so that it's not interspersed. Hmm I wonder should
>>>> we linebuffer the output from *sum by default.
>>> In the attached patch, I've changed the default buffering
>>> to line buffered to address the above issue. For standard
>>> size files there is a 2% performance drop.
>>
>> Good catch.
>> It sounds like this fixes a real (albeit obscure) bug, so this
>> might deserve a NEWS item, though I admit it is borderline.
>
> Well it would easily be hit when one tries to parallelize the processes.
> So I'll add a NEWS item and a test along the lines of:

Thanks!

> (mkdir t && cd t && seq 100 | xargs touch)
> (find t t t t -type f | xargs -n100 -P4 md5sum) \
> | sed -n '/[0-9a-f]\{32\}  /!p' | grep . >/dev/null && fail=1

Odd... that doesn't fail on any of the systems where I tried it:
rawhide, fedora 11, debian unstable.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jim Meyering wrote:

> Pádraig Brady wrote:
>> Jim Meyering wrote:
>>> Pádraig Brady wrote:
>>>> Pádraig Brady wrote:
>>>>> You wouldn't want multiple threads/processes fighting over
>>>>> the disk head so you would do something like:
>>>>>
>>>>>   find /disk1 | xargs md5sum & find /disk2 | xargs md5sum
>>>>>
>>>>> Note if we're piping/redirecting the output of the above
>>>>> then we must be careful to line buffer the output from md5sum
>>>>> so that it's not interspersed. Hmm I wonder should
>>>>> we linebuffer the output from *sum by default.
>>>> In the attached patch, I've changed the default buffering
>>>> to line buffered to address the above issue. For standard
>>>> size files there is a 2% performance drop.
>>> Good catch.
>>> It sounds like this fixes a real (albeit obscure) bug, so this
>>> might deserve a NEWS item, though I admit it is borderline.
>> Well it would easily be hit when one tries to parallelize the processes.
>> So I'll add a NEWS item and a test along the lines of:
>
> Thanks!
>
>> (mkdir t && cd t && seq 100 | xargs touch)
>> (find t t t t -type f | xargs -n100 -P4 md5sum) \
>> | sed -n '/[0-9a-f]\{32\}  /!p' | grep . >/dev/null && fail=1
>
> Odd... that doesn't fail on any of the systems where I tried it:
> rawhide, fedora 11, debian unstable.
Yep I noticed it triggered on my linux-2.6.22 glibc-2.6-1 box
but not on my F11 laptop. I've setup the test in the attached
to output more than 16KiB per process which triggers on F11 at least.

cheers,
Pádraig.

From 83085a61c1251f95bd324cd50f57c5154645a1b3 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?P=C3=A1draig=20Brady?= <P@...>
Date: Tue, 20 Oct 2009 19:19:58 +0100
Subject: [PATCH] md5sum, sha*sum, sum: line-buffer the printed checksums

* src/md5sum.c (main): Set stdout to line buffered mode
to ensure parallel running instances don't intersperse
their output.  This adds 5% to the run time in the worst case
of many zero length files, or 2% with standard file sizes.
* src/sum.c (main): Likewise.
* tests/misc/md5sum-parallel: New test for atomic output.
* tests/Makefile.am: Reference it.
* NEWS: Mention the fix
---
 NEWS                       |    5 +++++
 src/md5sum.c               |    6 ++++--
 src/sum.c                  |    4 ++++
 tests/Makefile.am          |    1 +
 tests/misc/md5sum-parallel |   37 +++++++++++++++++++++++++++++++++++++
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100755 tests/misc/md5sum-parallel

diff --git a/NEWS b/NEWS
index 1bf87cb..29bedac 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,11 @@ GNU coreutils NEWS                                    -*- outline -*-
   btrfs, cgroupfs, cramfs-wend, debugfs, futexfs, hfs, inotifyfs, minux3,
   nilfs, securityfs, selinux, xenfs
 
+  md5sum now prints checksums atomically so that concurrent
+  processes will not intersperse their output.
+  This also affected sum, sha1sum, sha224sum, sha384sum and sha512sum.
+  [the bug dates back to the initial implementation]
+
 ** New features
 
   md5sum --check now also accepts openssl-style checksums.
diff --git a/src/md5sum.c b/src/md5sum.c
index aa2a144..b7db03e 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -513,7 +513,6 @@ digest_check (const char *checkfile_name)
               if (!status_only)
                 {
                   printf (_("%s: FAILED open or read\n"), filename);
-                  fflush (stdout);
                 }
             }
           else
@@ -539,7 +538,6 @@ digest_check (const char *checkfile_name)
                     printf ("%s: %s\n", filename, _("FAILED"));
                   else if (!quiet)
                     printf ("%s: %s\n", filename, _("OK"));
-                  fflush (stdout);
                 }
             }
         }
@@ -619,6 +617,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
     switch (opt)
       {
diff --git a/src/sum.c b/src/sum.c
index 91d7f34..f0e0cc0 100644
--- a/src/sum.c
+++ b/src/sum.c
@@ -233,6 +233,10 @@ main (int argc, char **argv)
 
   atexit (close_stdout);
 
+  /* Line buffer stdout to ensure lines are written atomically and immediately
+     so that processes running in parallel do not intersperse their output.  */
+  setvbuf (stdout, NULL, _IOLBF, 0);
+
   have_read_stdin = false;
 
   while ((optc = getopt_long (argc, argv, "rs", longopts, NULL)) != -1)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 751db1c..4d8415f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -178,6 +178,7 @@ TESTS = \
   misc/id-groups \
   misc/md5sum \
   misc/md5sum-newline \
+  misc/md5sum-parallel \
   misc/mknod \
   misc/nice \
   misc/nl \
diff --git a/tests/misc/md5sum-parallel b/tests/misc/md5sum-parallel
new file mode 100755
index 0000000..763ee79
--- /dev/null
+++ b/tests/misc/md5sum-parallel
@@ -0,0 +1,37 @@
+#!/bin/sh
+# Ensure that md5sum prints each checksum atomically
+# so that concurrent md5sums don't intersperse their output
+
+# Copyright (C) 2009 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-lib.sh
+
+if test "$VERBOSE" = yes; then
+  set -x
+  md5sum --version
+fi
+
+fail=0
+
+(mkdir tmp && cd tmp && seq 500 | xargs touch)
+
+# This will output at least 16KiB per process
+# which is enough to trigger on Fedora 11 at least
+(find tmp tmp -type f | xargs -n500 -P2 md5sum) |
+sed -n '/[0-9a-f]\{32\}  /!p' |
+grep . > /dev/null && fail=1
+
+Exit $fail
--
1.6.2.5


RE: [PATCH] md5: accepts a new --threads option

by Voelker, Bernhard-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

 

Jim Meyering wrote:
> Pádraig Brady wrote:
> > (mkdir t && cd t && seq 100 | xargs touch)
> > (find t t t t -type f | xargs -n100 -P4 md5sum) \
> > | sed -n '/[0-9a-f]\{32\}  /!p' | grep . >/dev/null && fail=1
>
> Odd... that doesn't fail on any of the systems where I tried it:
> rawhide, fedora 11, debian unstable.

nor does it fail on Solaris 10/SPARC here ...

Berny





Re: [PATCH] md5: accepts a new --threads option

by Jim Meyering :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Pádraig Brady wrote:
...
>> Odd... that doesn't fail on any of the systems where I tried it:
>> rawhide, fedora 11, debian unstable.
>
> Yep I noticed it triggered on my linux-2.6.22 glibc-2.6-1 box
> but not on my F11 laptop. I've setup the test in the attached
> to output more than 16KiB per process which triggers on F11 at least.
...
> +(mkdir tmp && cd tmp && seq 500 | xargs touch)
> +
> +# This will output at least 16KiB per process
> +# which is enough to trigger on Fedora 11 at least
> +(find tmp tmp -type f | xargs -n500 -P2 md5sum) |
> +sed -n '/[0-9a-f]\{32\}  /!p' |
> +grep . > /dev/null && fail=1

Thanks.  That's better.
However, it too sometimes fails to fail:
[here, the grep matched nothing 2 of 200 times]

$ for i in $(seq 200); do (find tmp tmp -type f | xargs -n500 -P2 md5sum) \
  | sed -n '/[0-9a-f]\{32\}  /!p'|grep -q . && t=x || t=.; printf $t; done
xxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

Adding one more "tmp" made the above spot a failure for 1500
iterations in a row.



Re: [PATCH] md5: accepts a new --threads option

by Pádraig Brady :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Jim Meyering wrote:

> Pádraig Brady wrote:
> ...
>>> Odd... that doesn't fail on any of the systems where I tried it:
>>> rawhide, fedora 11, debian unstable.
>> Yep I noticed it triggered on my linux-2.6.22 glibc-2.6-1 box
>> but not on my F11 laptop. I've setup the test in the attached
>> to output more than 16KiB per process which triggers on F11 at least.
> ...
>> +(mkdir tmp && cd tmp && seq 500 | xargs touch)
>> +
>> +# This will output at least 16KiB per process
>> +# which is enough to trigger on Fedora 11 at least
>> +(find tmp tmp -type f | xargs -n500 -P2 md5sum) |
>> +sed -n '/[0-9a-f]\{32\}  /!p' |
>> +grep . > /dev/null && fail=1
>
> Thanks.  That's better.
> However, it too sometimes fails to fail:
> [here, the grep matched nothing 2 of 200 times]

Wow that's much too infrequent and suggests your
system is very fast and getting xargs to reap
and start another process, perturbs the buffering
enough. I'll add "tmp tmp tmp" to the test.

thanks,
Pádraig.


< Prev | 1 - 2 - 3 - 4 - 5 | Next >