Disable checksumming to improve local performance?

View: New views
2 Messages — Rating Filter:   Alert me  

Disable checksumming to improve local performance?

by Greg Siekas-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Is it possible to disable checksumming?  I'm using rsync with -W,  
whole file, so if a file has changed I don't want to just transfer the  
changes.  The source and destination or local filesystems.
I've noticed that performance in 3.0.6 is slower than 2.6.9.  3.0.6 is  
at ~90MB/sec and 2.6.9 is at ~107MB/sec.   I'm looking for anything  
that can help to improve transfer rates on large files.

thanks,
Greg



--
Please use reply-all for most replies to avoid omitting the mailing list.
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html

Re: Disable checksumming to improve local performance?

by Wayne Davison-2 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Fri, Sep 04, 2009 at 10:26:43AM -0700, Greg Siekas wrote:
> Is it possible to disable checksumming?  I'm using rsync with -W,
> whole file, so if a file has changed I don't want to just transfer
> the changes.

There is not currently a way to do that.  I whipped up the appended
patch that makes -WW disable full-file checksum computation and makes
that the default for a local transfer.  When comparing -W vs -WW in some
local-transfer testing, it didn't result in any perceivable difference
in transfer speed or runtime.  You can verify that it is enabled by
using --out-format='%i %C %n%L' and noting that all the checksums turn
into all-bits-on values (the checksum bytes are still transmitted so
that a read-error on the sending side can be indicated to the receiver).

..wayne..


index 6655acd..d8b228d 100644
--- a/compat.c
+++ b/compat.c
@@ -34,6 +34,7 @@ extern int use_qsort;
 extern int allow_inc_recurse;
 extern int append_mode;
 extern int fuzzy_basis;
+extern int whole_file;
 extern int read_batch;
 extern int delay_updates;
 extern int checksum_seed;
@@ -286,6 +287,8 @@ void setup_protocol(int f_out,int f_in)
  receiver_symlink_times = 1;
 #endif
  }
+ if (whole_file > 1 && protocol_version < 31)
+ whole_file = 1;
 
  if (need_unsorted_flist && (!am_sender || inc_recurse))
  unsort_ndx = ++file_extra_cnt;
index 407568d..72636eb 100644
--- a/main.c
+++ b/main.c
@@ -519,7 +519,7 @@ static pid_t do_cmd(char *cmd, char *machine, char *user, char **remote_argv, in
  /* If the user didn't request --[no-]whole-file, force
  * it on, but only if we're not batch processing. */
  if (whole_file < 0 && !write_batch)
- whole_file = 1;
+ whole_file = 2;
  set_allow_inc_recurse();
  pid = local_child(argc, args, f_in_p, f_out_p, child_main);
 #ifdef ICONV_CONST
index 611035f..2c356e3 100644
--- a/match.c
+++ b/match.c
@@ -25,6 +25,7 @@
 extern int checksum_seed;
 extern int append_mode;
 extern int checksum_len;
+extern int whole_file;
 
 int updating_basis_file;
 char sender_file_sum[MAX_DIGEST_LEN];
@@ -124,9 +125,11 @@ static void matched(int f, struct sum_struct *s, struct map_struct *buf,
  n += s->sums[i].len;
  }
 
- for (j = 0; j < n; j += CHUNK_SIZE) {
- int32 n1 = MIN(CHUNK_SIZE, n - j);
- sum_update(map_ptr(buf, last_match + j, n1), n1);
+ if (whole_file < 2) {
+ for (j = 0; j < n; j += CHUNK_SIZE) {
+ int32 n1 = MIN(CHUNK_SIZE, n - j);
+ sum_update(map_ptr(buf, last_match + j, n1), n1);
+ }
  }
 
  if (i >= 0)
@@ -336,7 +339,10 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
  matches = 0;
  data_transfer = 0;
 
- sum_init(checksum_seed);
+ if (whole_file < 2)
+ sum_init(checksum_seed);
+ else
+ memset(sender_file_sum, 0xFF, checksum_len);
 
  if (append_mode > 0) {
  if (append_mode == 2) {
@@ -377,7 +383,7 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len)
  matched(f, s, buf, len, -1);
  }
 
- if (sum_end(sender_file_sum) != checksum_len)
+ if (whole_file < 2 && sum_end(sender_file_sum) != checksum_len)
  overflow_exit("checksum_len"); /* Impossible... */
 
  /* If we had a read error, send a bad checksum.  We use all bits
index 66820b5..f9b1939 100644
--- a/options.c
+++ b/options.c
@@ -34,14 +34,9 @@ extern filter_rule_list daemon_filter_list;
 
 int make_backups = 0;
 
-/**
- * If 1, send the whole file as literal data rather than trying to
- * create an incremental diff.
- *
- * If -1, then look at whether we're local or remote and go by that.
- *
- * @sa disable_deltas_p()
- **/
+/* If 1, send the whole file as literal data rather than trying to
+ * create an incremental diff.  If > 1, disable full-file checksumming.
+ * If -1, then look at whether we're local or remote and go by that. */
 int whole_file = -1;
 
 int append_mode = 0;
@@ -928,7 +923,7 @@ static struct poptOption long_options[] = {
   {"exclude-from",     0,  POPT_ARG_STRING, 0, OPT_EXCLUDE_FROM, 0, 0 },
   {"include-from",     0,  POPT_ARG_STRING, 0, OPT_INCLUDE_FROM, 0, 0 },
   {"cvs-exclude",     'C', POPT_ARG_NONE,   &cvs_exclude, 0, 0, 0 },
-  {"whole-file",      'W', POPT_ARG_VAL,    &whole_file, 1, 0, 0 },
+  {"whole-file",      'W', POPT_ARG_NONE,   0, 'W', 0, 0 },
   {"no-whole-file",    0,  POPT_ARG_VAL,    &whole_file, 0, 0, 0 },
   {"no-W",             0,  POPT_ARG_VAL,    &whole_file, 0, 0, 0 },
   {"checksum",        'c', POPT_ARG_VAL,    &always_checksum, 1, 0, 0 },
@@ -1499,6 +1494,12 @@ int parse_arguments(int *argc_p, const char ***argv_p)
  }
  break;
 
+ case 'W':
+ if (whole_file < 0)
+ whole_file = 0;
+ whole_file++;
+ break;
+
  case 'P':
  if (refused_partial || refused_progress) {
  create_refuse_error(refused_partial
@@ -2288,11 +2289,14 @@ void server_options(char **args, int *argc_p)
  argstr[x++] = 'k';
  }
 
- if (whole_file > 0)
- argstr[x++] = 'W';
  /* We don't need to send --no-whole-file, because it's the
  * default for remote transfers, and in any case old versions
  * of rsync will not understand it. */
+ if (whole_file > 0) {
+ argstr[x++] = 'W';
+ if (whole_file > 1)
+ argstr[x++] = 'W';
+ }
 
  if (preserve_hard_links) {
  argstr[x++] = 'H';
index 1738dca..1761075 100644
--- a/receiver.c
+++ b/receiver.c
@@ -30,6 +30,7 @@ extern int log_before_transfer;
 extern int stdout_format_has_i;
 extern int logfile_format_has_i;
 extern int csum_length;
+extern int whole_file;
 extern int read_batch;
 extern int write_batch;
 extern int batch_gen_fd;
@@ -220,7 +221,10 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
  } else
  mapbuf = NULL;
 
- sum_init(checksum_seed);
+ if (whole_file < 2)
+ sum_init(checksum_seed);
+ else
+ memset(file_sum1, 0xFF, checksum_len);
 
  if (append_mode > 0) {
  OFF_T j;
@@ -263,7 +267,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
  stats.literal_data += i;
  cleanup_got_literal = 1;
 
- sum_update(data, i);
+ if (whole_file < 2)
+ sum_update(data, i);
 
  if (fd != -1 && write_file(fd,data,i) != i)
  goto report_write_error;
@@ -289,7 +294,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
  map = map_ptr(mapbuf,offset2,len);
 
  see_token(map, len);
- sum_update(map, len);
+ if (whole_file < 2)
+ sum_update(map, len);
  }
 
  if (updating_basis_or_equiv) {
@@ -334,7 +340,7 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
  exit_cleanup(RERR_FILEIO);
  }
 
- if (sum_end(file_sum1) != checksum_len)
+ if (whole_file < 2 && sum_end(file_sum1) != checksum_len)
  overflow_exit("checksum_len"); /* Impossible... */
 
  if (mapbuf)
index 9283425..dfe8bb9 100644
--- a/rsync.yo
+++ b/rsync.yo
@@ -1146,9 +1146,12 @@ dit(bf(-W, --whole-file)) With this option rsync's delta-transfer algorithm
 is not used and the whole file is sent as-is instead.  The transfer may be
 faster if this option is used when the bandwidth between the source and
 destination machines is higher than the bandwidth to disk (especially when the
-"disk" is actually a networked filesystem).  This is the default when both
+"disk" is actually a networked filesystem).  If the bf(--whole-file) option is
+repeated, rsync will also disable the computation of the full-file checksum.
+Both sides must be speaking at least protocol 31 to support this (first
+supported in 3.1.0).  A default of bf(-WW) is assume when both
 the source and destination are specified as local paths, but only if no
-batch-writing option is in effect.
+bf(-W), bf(--no-W), or batch-writing option was specified.
 
 dit(bf(-x, --one-file-system)) This tells rsync to avoid crossing a
 filesystem boundary when recursing.  This does not limit the user's ability


--
Please use reply-all for most replies to avoid omitting the mailing list.
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html