|
View:
New views
11 Messages
—
Rating Filter:
Alert me
|
|
|
tar patch to selectively divert named compressors to alternativesHi,
I kindly request you to review the patch below. I cloned the git repository and created the patch against something that git-describe calls "release_1_22-36-g63e0925". The patch introduces a level of indirection between the dedicated compressor selector options and the corresponding executable names. The parametrized executable names default to the previous fixed values. Each one can be changed via a specific long option before a compressor is selected with a dedicated named option. Thus they are suitable for adding to TAR_OPTIONS. They generally look like --COMPRESSOR-filter, as in --bzip2-filter. --------------------------------- changelog ---------------------------------- src/common.h: * Add global variables for the executable names of all 6 explicitly handled compressors. src/tar.c: * Add an option identifier enum for each new option. * Reorder the help display under "Compression options" so that it looks more logical and the new options fit in. Add the new options. * Handle the new options by pointing global variables at (parts of) command line arguments. Filter names can be changed only before a compressor is selected. Filter name changes coming after a compressor was selected would be either ineffective or would enable the user to trick the protection against specifying conflicting compressors. * --gzip, --bzip2 etc. select the executables specified by --gzip-filter, --bzip2-filter etc. Defaults are the previous fixed values. src/buffer.c: * In the "magic" table, used for content-based compressor selection, apply indirection instead of fixed compressor names. (Also fix the bug where the "xz" executable name was missing.) * Adapt the "compress_program" macro. src/suffix.c: * Make the "compression_suffixes" table static as it can be static. * In the "S" macro, the # (stringify) preprocessor operator is replaced by the ## (concat) operator, so that executable names are no more fixed but come from variables. * Adapt the "find_compression_program" function. ---------------------------------- "tests" ----------------------------------- Checking for regressions. $ touch f Named option: $ $TAR -c -v -j -f f.tar.bz2 f f $ $TAR -t -v -j -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f External filter: $ $TAR -c -v --use=bzip2 -f f.tar.bz2 f f $ $TAR -t -v --use=bzip2 -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f Named option conforming to external filter: $ $TAR -c -v -j --use=bzip2 -f f.tar.bz2 f f $ $TAR -t -v -j --use=bzip2 -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f Named option conflicting with external filter: $ $TAR -c -v -j --use=gzip -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j --use=gzip -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. Named option conflicting with named option: $ $TAR -c -v -j -z -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j -z -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. External filter conflicting with external filter (this is a bit bogus, because both are bzip2 compressors, but I don't consider this a big problem): $ $TAR -c -v --use=bzip2 --use=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v --use=bzip2 --use=lbzip2 -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. Automatic selection by suffix / contents: $ $TAR -c -v -a -f f.tar.bz2 f f $ $TAR -t -v -f f.tar.bz2 -rw------- lacos/lacos 0 2009-10-08 03:29 f $ file f.tar.bz2 f.tar.bz2: bzip2 compressed data, block size = 900k Automatic selection by suffix overrides --use: $ $TAR -c -v -a --use=gzip -f f.tar.bz2 f f $ file f.tar.bz2 f.tar.bz2: bzip2 compressed data, block size = 900k Automatic selection by suffix yields to --use: $ $TAR -c -v -a --use=gzip -f f.tar.qqq f f $ file f.tar.qqq f.tar.qqq: gzip compressed data, from Unix, last modified: Thu Oct 8 03:31:09 2009, max compression Cannot change filter after selecting method: $ $TAR -c -v --bzip2 --bzip2-filter=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: can't change alternative filters after selecting a compressor Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -c -v --use=bzip2 --bzip2-filter=lbzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: can't change alternative filters after selecting a compressor Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. The "tests" were successfully repeated after issuing: $ export TAR_OPTIONS="$TAR_OPTIONS --gzip-filter=pigz --bzip2-filter=lbzip2" $ export LBZIP2_PRINT_STATS=1 With the following differences: -j selects lbzip2 instead of bzip2: $ $TAR -c -v -j --use=bzip2 -f f.tar.bz2 f /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. $ $TAR -t -v -j --use=bzip2 -f f.tar.bz2 /home/lacos/tmp/tar/src/tar: Conflicting compression options Try `/home/lacos/tmp/tar/src/tar --help' or `/home/lacos/tmp/tar/src/tar --usage' for more information. and lbzip2 printed statistics whenever it was run, ie. in the "named option", "automatic selection by suffix / contents" and "automatic selection by suffix overrides --use" cases. -------------------------- "benchmarks", dual-core --------------------------- (TAR_OPTIONS set as above.) Decompression with lbzip2 (http://lacos.hu/), from a pipe: $ cat /usr/src/linux-source-2.6.26.tar.bz2 | time -p $TAR -t -j >/dev/null real 8.74 user 16.54 sys 0.80 Decompression with bzip2: $ (unset TAR_OPTIONS > time -p $TAR tjf /usr/src/linux-source-2.6.26.tar.bz2 >/dev/null) real 13.96 user 13.49 sys 0.46 Compression with lbzip2: $ time -p $TAR caf linux.tar.bz2 linux-source-2.6.26/ real 26.75 user 52.31 sys 1.00 Compression with bzip2: $ time -p $TAR -c --use=bzip2 -f linux.tar.bz2 linux-source-2.6.26/ real 52.47 user 51.65 sys 0.83 Compression with pigz (http://www.zlib.net/pigz/): $ time -p $TAR caf linux.tar.gz linux-source-2.6.26/ real 14.34 user 27.89 sys 0.67 Compression with gzip: $ time -p $TAR -c -I gzip -f linux.tar.gz linux-source-2.6.26/ real 22.60 user 22.17 sys 0.43 ---------------------------------- "patch" ----------------------------------- diff --git a/src/common.h b/src/common.h index 0020f08..f110fa0 100644 --- a/src/common.h +++ b/src/common.h @@ -130,6 +130,15 @@ GLOBAL unsigned checkpoint_option; /* Specified name of compression program, or "gzip" as implied by -z. */ GLOBAL const char *use_compress_program_option; +/* Alternative filter programs to execute in place of known compressors. */ +GLOBAL const char + *compress_filter_option, + *gzip_filter_option, + *bzip2_filter_option, + *lzma_filter_option, + *lzop_filter_option, + *xz_filter_option; + GLOBAL bool dereference_option; GLOBAL bool hard_dereference_option; diff --git a/src/tar.c b/src/tar.c index a639974..6571612 100644 --- a/src/tar.c +++ b/src/tar.c @@ -257,6 +257,12 @@ enum ANCHORED_OPTION = CHAR_MAX + 1, ATIME_PRESERVE_OPTION, BACKUP_OPTION, + COMPRESS_FILTER_OPTION, + GZIP_FILTER_OPTION, + BZIP2_FILTER_OPTION, + LZMA_FILTER_OPTION, + LZOP_FILTER_OPTION, + XZ_FILTER_OPTION, CHECK_DEVICE_OPTION, CHECKPOINT_OPTION, CHECKPOINT_ACTION_OPTION, @@ -615,22 +621,34 @@ static struct argp_option options[] = { N_("do not use archive suffix to determine the compression program"), GRID+1 }, {"bzip2", 'j', 0, 0, - N_("filter the archive through bzip2"), GRID+1 }, + N_("filter the archive through bzip2"), GRID+3 }, {"gzip", 'z', 0, 0, - N_("filter the archive through gzip"), GRID+1 }, - {"gunzip", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, - {"ungzip", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, + N_("filter the archive through gzip"), GRID+3 }, + {"gunzip", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, + {"ungzip", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, {"compress", 'Z', 0, 0, - N_("filter the archive through compress"), GRID+1 }, - {"uncompress", 0, 0, OPTION_ALIAS, NULL, GRID+1 }, + N_("filter the archive through compress"), GRID+3 }, + {"uncompress", 0, 0, OPTION_ALIAS, NULL, GRID+3 }, {"lzma", LZMA_OPTION, 0, 0, - N_("filter the archive through lzma"), GRID+1 }, + N_("filter the archive through lzma"), GRID+3 }, {"lzop", LZOP_OPTION, 0, 0, - N_("filter the archive through lzop"), GRID+8 }, + N_("filter the archive through lzop"), GRID+3 }, {"xz", 'J', 0, 0, - N_("filter the archive through xz"), GRID+8 }, + N_("filter the archive through xz"), GRID+3 }, {"use-compress-program", 'I', N_("PROG"), 0, - N_("filter through PROG (must accept -d)"), GRID+1 }, + N_("filter through PROG (must accept -d)"), GRID+5 }, + {"bzip2-filter", BZIP2_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -j / --bzip2 (must accept -d), eg. lbzip2"), GRID+7 }, + {"gzip-filter", GZIP_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -z / --gzip (must accept -d), eg. pigz"), GRID+7 }, + {"compress-filter", COMPRESS_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -Z / --compress (must accept -d)"), GRID+7 }, + {"lzma-filter", LZMA_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for --lzma (must accept -d)"), GRID+7 }, + {"lzop-filter", LZOP_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for --lzop (must accept -d)"), GRID+7 }, + {"xz-filter", XZ_FILTER_OPTION, N_("PROG"), 0, + N_("alternative filter for -J / --xz (must accept -d)"), GRID+7 }, #undef GRID #define GRID 100 @@ -1440,12 +1458,28 @@ parse_opt (int key, char *arg, struct argp_state *state) ignore_zeros_option = true; break; +#define FOP(filter, FILTER) \ + case FILTER ## _FILTER_OPTION: \ + if (0 != use_compress_program_option) \ + USAGE_ERROR ((0, 0, \ + _("can't change alternative filters after selecting a compressor"))); \ + filter ## _filter_option = arg; \ + break; + + FOP(compress, COMPRESS) + FOP(gzip, GZIP) + FOP(bzip2, BZIP2) + FOP(lzma, LZMA) + FOP(lzop, LZOP) + FOP(xz, XZ) +#undef FOP + case 'j': - set_use_compress_program_option ("bzip2"); + set_use_compress_program_option (bzip2_filter_option); break; case 'J': - set_use_compress_program_option ("xz"); + set_use_compress_program_option (xz_filter_option); break; case 'k': @@ -1489,11 +1523,11 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case LZMA_OPTION: - set_use_compress_program_option ("lzma"); + set_use_compress_program_option (lzma_filter_option); break; case LZOP_OPTION: - set_use_compress_program_option ("lzop"); + set_use_compress_program_option (lzop_filter_option); break; case 'm': @@ -1651,11 +1685,11 @@ parse_opt (int key, char *arg, struct argp_state *state) break; case 'z': - set_use_compress_program_option ("gzip"); + set_use_compress_program_option (gzip_filter_option); break; case 'Z': - set_use_compress_program_option ("compress"); + set_use_compress_program_option (compress_filter_option); break; case ANCHORED_OPTION: @@ -2204,6 +2238,15 @@ decode_options (int argc, char **argv) seek_option = -1; +#define DEFOP(filter) filter ## _filter_option = #filter + DEFOP(compress); + DEFOP(gzip); + DEFOP(bzip2); + DEFOP(lzma); + DEFOP(lzop); + DEFOP(xz); +#undef DEFOP + /* Convert old-style tar call by exploding option element and rearranging options accordingly. */ diff --git a/src/buffer.c b/src/buffer.c index dd97682..a9195fc 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -213,25 +213,25 @@ struct zip_magic enum compress_type type; size_t length; char *magic; - char *program; + const char * const *program; char *option; }; static struct zip_magic const magic[] = { { ct_tar }, { ct_none, }, - { ct_compress, 2, "\037\235", "compress", "-Z" }, - { ct_gzip, 2, "\037\213", "gzip", "-z" }, - { ct_bzip2, 3, "BZh", "bzip2", "-j" }, - { ct_lzma, 6, "\xFFLZMA", "lzma", "--lzma" }, /* FIXME: ???? */ - { ct_lzop, 4, "\211LZO", "lzop", "--lzop" }, - { ct_xz, 6, "\0xFD7zXZ", "-J" }, + { ct_compress, 2, "\037\235", &compress_filter_option, "-Z" }, + { ct_gzip, 2, "\037\213", &gzip_filter_option, "-z" }, + { ct_bzip2, 3, "BZh", &bzip2_filter_option, "-j" }, + { ct_lzma, 6, "\xFFLZMA", &lzma_filter_option, "--lzma" }, /* FIXME: ???? */ + { ct_lzop, 4, "\211LZO", &lzop_filter_option, "--lzop" }, + { ct_xz, 6, "\0xFD7zXZ", &xz_filter_option, "-J" } }; #define NMAGIC (sizeof(magic)/sizeof(magic[0])) #define compress_option(t) magic[t].option -#define compress_program(t) magic[t].program +#define compress_program(t) *magic[t].program /* Check if the file ARCHIVE is a compressed archive. */ enum compress_type diff --git a/src/suffix.c b/src/suffix.c index 6dbc68e..d394f32 100644 --- a/src/suffix.c +++ b/src/suffix.c @@ -23,11 +23,11 @@ struct compression_suffix { const char *suffix; size_t length; - const char *program; + const char * const *program; }; -struct compression_suffix compression_suffixes[] = { -#define S(s,p) #s, sizeof (#s) - 1, #p +static struct compression_suffix compression_suffixes[] = { +#define S(s,p) #s, sizeof (#s) - 1, &p ## _filter_option { S(gz, gzip) }, { S(tgz, gzip) }, { S(taz, gzip) }, @@ -64,7 +64,7 @@ find_compression_program (const char *name, const char *defprog) { if (compression_suffixes[i].length == len && memcmp (compression_suffixes[i].suffix, suf, len) == 0) - return compression_suffixes[i].program; + return *compression_suffixes[i].program; } } return defprog; |
|
|
Re: tar patch to selectively divert named compressors to alternativesERSEK Laszlo <lacos@...> ha escrit:
> I kindly request you to review the patch below. Have you tried the patch I sent you? > The patch introduces a level of indirection between the dedicated compressor > selector options and the corresponding executable names. The parametrized > executable names default to the previous fixed values. Each one can be > changed via a specific long option before a compressor is selected with a > dedicated named option. Thus they are suitable for adding to TAR_OPTIONS. > They generally look like --COMPRESSOR-filter, as in --bzip2-filter. I prefer a single option instead. Tar is already overloaded with command option names. Two option names per each compressor program is too much. > +GLOBAL const char > + *compress_filter_option, > + *gzip_filter_option, > + *bzip2_filter_option, > + *lzma_filter_option, > + *lzop_filter_option, > + *xz_filter_option; The same here. I'd like to avoid polluting the global namespace. Apart from that, there remains my remark from the previous posting. I'm not sure the game is worth the candles. It seems much simpler to have an option/variable that would allow to redefine the compressor name at compile time instead. Regards, Sergey |
|
|
Re: tar patch to selectively divert named compressors to alternativesOn Thu, 8 Oct 2009, Sergey Poznyakoff wrote:
> ERSEK Laszlo <lacos@...> ha escrit: > >> I kindly request you to review the patch below. > > Have you tried the patch I sent you? Your patch appeared on the list betwen me sending mine and mine appearing on the list, so no. > I prefer a single option instead. Tar is already overloaded with command > option names. Two option names per each compressor program is too much. Absolutely. I feel I was a bit too eager to write mine at all; after my post where I said "lbzip2 is not a drop-in replacement for bzip2" there was a short silence on the list and I supposed it's my task to write the patch. Since you as the maintainer put in the work (thanks for that!), just forget my patch. > Apart from that, there remains my remark from the previous posting. > I'm not sure the game is worth the candles. It seems much simpler to > have an option/variable that would allow to redefine the compressor > name at compile time instead. I was personally okay with --use=lbzip2. (Wherever GNU tar is unavailable, one has to pipe even gzip manually, and I have no problem with that either.) I was asked to ask you on the debian-mentors mailing list. I think the motivation is that such a default option would allow automatically speeding up compression/decompression without changing scripts or routinely issued commands. Changing the compressor name directly to an alternative at compile time would work, but that would give no easy way out for people unpleased with that alternative. Let's prepare a suggestion: GNU tar should be patched in Debian to replace each instance of the "bzip2" program name with "/etc/alternatives/bzip2-filter" (same for "gzip" -> "gzip-filter" etc.), and pbzip2, lbzip2, pigz etc. should install alternatives. The non-standard symlink names ("bzip2-filter") signify that these alternatives are meant only for when filtering is needed, not as general replacements. This way at least a system administrator could change the compressors without local recompilation. Would you support this suggestion? If so, I'd relay it to the debian-mentors mailing list, along with pointers to both patches posted here. Thank you very much, lacos |
|
|
Re: tar patch to selectively divert named compressors to alternativesERSEK Laszlo <lacos@...> ha escrit:
> Your patch appeared on the list betwen me sending mine and mine > appearing on the list, so no. Ah, I see :) > I think the motivation is that such a default option would allow > automatically speeding up compression/decompression without changing scripts > or routinely issued commands. Changing the compressor name directly to an > alternative at compile time would work, but that would give no easy way out > for people unpleased with that alternative. OK, thanks for the explanation. Please allow me some time to think about it. > Let's prepare a suggestion: > > GNU tar should be patched in Debian to replace each instance of the > "bzip2" program name with "/etc/alternatives/bzip2-filter" (same for > "gzip" -> "gzip-filter" etc.), and pbzip2, lbzip2, pigz etc. should > install alternatives. The non-standard symlink names ("bzip2-filter") > signify that these alternatives are meant only for when filtering is > needed, not as general replacements. This way at least a system > administrator could change the compressors without local recompilation. Looks reasonable, except that instead of "patching" the code, I'd rather provide compilation time options for that. Variants are: 1. ./configure --with-compressor=bzip2=/etc/alternatives/bzip2-filter,gzip=... 2. ./configure --with-bzip2=/etc/alternatives/bzip2-filter --with-gzip=... 3. ./configure BZIP2_PROGRAM=/etc/alternatives/bzip2-filter GZIP_PROGRAM=... In this particular case I'd rather implement (2). What do you think? Regards, Sergey |
|
|
Re: tar patch to selectively divert named compressors to alternatives I'd rather provide compilation time options for that. Variants are:
1. ./configure --with-compressor=bzip2=/etc/alternatives/bzip2-filter,gzip=... 2. ./configure --with-bzip2=/etc/alternatives/bzip2-filter --with-gzip=... 3. ./configure BZIP2_PROGRAM=/etc/alternatives/bzip2-filter GZIP_PROGRAM=... All of this is only being contemplated because of lbzip? No one has ever wanted "alternative" implementations of any other compressor, to my knowledge. Seems a painful increase of complexity. So how about working (Sergey, I don't mean you) on making lbzip an actual replacement for bzip? I know that isn't being done now, but that doesn't it couldn't be done. Free software and all that. Finally, does lbzip offer any advantages over xz (http://tukaani.org/xz)? Which already compresses better and decompresses faster than bz2, in general. karl |
|
|
Re: tar patch to selectively divert named compressors to alternativesOn Thu, 8 Oct 2009, Karl Berry wrote:
> I'd rather provide compilation time options for that. Variants are: > > 1. ./configure --with-compressor=bzip2=/etc/alternatives/bzip2-filter,gzip=... > 2. ./configure --with-bzip2=/etc/alternatives/bzip2-filter --with-gzip=... > 3. ./configure BZIP2_PROGRAM=/etc/alternatives/bzip2-filter GZIP_PROGRAM=... > > All of this is only being contemplated because of lbzip? No one has > ever wanted "alternative" implementations of any other compressor, No user has ever wanted a decompressor that would exercise all four cores of his new quad-core computer when extracting the 300M openoffice tarball, staring at the cpu load desktop applet and cursing about three quarters of his CPU idling? No system administrator has ever wanted a bzip2 compressor to compress his/her daily 7G of logs in 1/16th time on his/her 16 core server? For a long time, bzip2 was the most space-efficient main-stream, free software compressor, but it was relatively slow. Thus any speedup was useful. *Lots* of parallel bzip2 compressors and some decompressors were written before lbzip2, but in my interpretation, they never got the multi-threaded decompression quite right. See [0] if you care. And what did Mark Adler write pigz for, then? Why didn't he just extend gzip? He's a big name, maybe you'll accept from him that parallelism by way of multi-threading is not an additive property, you cannot just slap it on a pre-existing bitstream format. Why did Tim Cook write tamp? I'm not pushing for lbzip2 to be integrated better with tar, or at least not for myself. I asked first when I was asked to ask. I'm perfectly fine with --use and I would be fine even without it. > So how about working (Sergey, I don't mean you) on making lbzip an > actual replacement for bzip? If you mean me, I won't work on that, sorry. Please anybody feel free to fork lbzip2. Nice move though trying to allocate my time. I worked my ass of on lbzip2 after my day job, nights till 4 o'clock in the morning till I was falling out of my chair, running kilometers in my room like a rat in a maze thinking about the decompressor design. Don't care if it doesn't show, if it's "trivial" or "convoluted" for some (yeah, why didn't anybody implement it before, among the 3 or so pre-existing parallelizations?) Lbip2 is done for me. I have some experiments in the queue if some really nice people will help me out, I'll document those experiments, maybe lose face on them, I'll release 1.00 then just abandon it. > I know that isn't being done now, but that doesn't it couldn't be done. > Free software and all that. Sure. I'm not interested in toiling away on that shit for a year. You get my code and the four freedoms with it, you don't get my time and effort. I contacted Julian Seward, author of original bzip2, both when I had questions about bzip2 itself and when the idea first emerged to extend bzip2 with multi-threading, for example by merging lbzip2 into it. He didn't seem interested. So what? Do it yourself, free software and all that. Everybody will be thankful, me included, if you manage to do that. > Finally, does lbzip offer any advantages over xz > (http://tukaani.org/xz)? Which already compresses better and > decompresses faster than bz2, in general. I looked at their 1.0 file format when it came out and it was great, the blocks are length-prefixed which makes it obvious that it was designed with parallelism in mind. (The format is great for a whole series of other reasons, too.) Once they get the multi-threading done, I'll be the first to throw away lbzip2, use xz for efficiency-oriented compression and tamp (multi-threaded QuickLZ) for speed-oriented compression. In the meantime, there are *lots* of single-stream tar.bz2's on the net, and there are people (I am for sure) issuing $ wget -O - URL | tee -i f.tar.bz2 | tar -x --use=lbzip2 Just forget the damn thing, I'm fucking tired of it. Defending lbzip2 like it was some crusade of mine, sending around the same old links a thousand times, identifying the use cases where lbzip2 is "unique", proving I'm not doing this only for "fame" or some shit like that. Forget it. lacos [0] http://lists.debian.org/debian-mentors/2009/02/msg00135.html |
|
|
Re: tar patch to selectively divert named compressors to alternativesHi,
On Thu, 8 Oct 2009, Bdale Garbee wrote: > On Thu, 2009-10-08 at 16:18 +0200, ERSEK Laszlo wrote: >> On Sun, 4 Oct 2009, Paul Wise wrote: >> >>> Talk to the upstream tar maintainers about ways to make tar detect if >>> lbzip2 is available and use it instead of bzip2. >> >> [0] http://lists.gnu.org/archive/html/help-tar/2009-10/msg00008.html > > As the maintainer of the Debian packaging of tar, I'm ok with Sergey's > proposal in [0]. is [1]. If you still feel this modification of tar is worth it, I'll gladly do my part if there will be any (eg. installing the lbzip2 alternative for /usr/bin/bzip2-filter or so). Aníbal, I believe it would be useful if pbzip2 installed a bzip2-filter alternative as well. pbzip2 cooperates with --use since v1.0.4. On Thu, 8 Oct 2009, Karl Berry wrote: > So how about working (Sergey, I don't mean you) on making lbzip an > actual replacement for bzip? I know that isn't being done now, but that > doesn't it couldn't be done. Free software and all that. I've been thinking about making lbzip2's command line and compressed output more similar to those of bzip2. Even if I get anything done on this front, I can only promise *not* to support -f/--force, and (if implemented at all) make -k/--keep the default. lbzip2 will neither remove nor overwrite files, regardless of the NO WARRANTY clause of the GPL. Furthermore, I'm reluctant to introduce long options at all. Paul, if these would block acceptance of lbzip2 as a bzip2 alternative, please tell me up-front so I don't need to bother hacking upstream. Karl, my apologies. lacos [1] http://lists.gnu.org/archive/html/help-tar/2009-10/msg00012.html |
|
|
Re: tar patch to selectively divert named compressors to alternativesOn Fri, 2009-10-09 at 16:16 +0200, ERSEK Laszlo wrote:
> Paul, if these would block acceptance of lbzip2 as a bzip2 alternative, > please tell me up-front so I don't need to bother hacking upstream. I personally feel that alternatives are not the best way to do this, because there will be some distros that do not implement a feature like this. In my opinion the most useful solution would be for tar to gain knowledge about the various bzip2 filters available and have them selectable via an environment variable as well as a command-line option. I understand that tar upstream does not want to do that though. However, the decision as to which solution to choose is up to you and tar upstream. I will not block their/your choice from entering Debian. -- bye, pabs http://wiki.debian.org/PaulWise |
|
|
Re: tar patch to selectively divert named compressors to alternativesHi,
I think these options have been named: 1. Don't change tar at all. Somebody creates a (sufficiently) command line compatible, mult-threaded bzip2 alternative out of lbzip2 (for example). Through the alternatives system, tar will access whichever implementation is configured under the name "bzip2". 2. Add a ./configure --with-bzip2-filter compile-time switch to tar. The selected utility only needs to support a filter (--use) interface. Debian would add a new symlink, /usr/bin/bzip2-filter, which would lead, through the alternatives system, to /bin/bzip2 or for example to /usr/bin/lbzip2. 2a. Modify upstream tar. 2b. Maintain this patch as part of the Debian packaging of tar. 3. Add runtime bzip2-filter selection to tar (TAR_OPTIONS). 3a. Modify upstream tar. 3b. Maintain this patch as part of the Debian packaging of tar. What I believe to be the preferences: Aníbal: ? Bdale: 2a, 3a Karl: 1, 2b/3b lacos: 3a, 2a, 3b, 2b, 1 Paul: 3a, 3b Sergey: 1, 2b/3b, 2a, 3a Even if lbzip2 was command line compatible with bzip2, imagining a user issuing "bzip2" but actually executing lbzip2 scares me somehow. Bdale, I would be happy to help maintaining a tar patch of your choice in the Debian packaging. (Preferably the one I wrote, as I already understand that.) Thanks, lacos |
|
|
Re: tar patch to selectively divert named compressors to alternativesPaul Wise <pabs@...> ha escrit:
> I personally feel that alternatives are not the best way to do this, > because there will be some distros that do not implement a feature like > this. In my opinion the most useful solution would be for tar to gain > knowledge about the various bzip2 filters available and have them > selectable via an environment variable as well as a command-line option. > I understand that tar upstream does not want to do that though. I didn't say that I did not want to do that. I asked to allow me some more time to weigh all pros and cons and prepare the optimal solution. So far I have not finished that. In the meantime, I have installed the attached patch, which roughly corresponds to the 2nd alternative in Laszlo's terms. Regards, Sergey From a7e9b6a17b2c111f4afa5ae35e3a206483366693 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff <gray@...> Date: Sat, 10 Oct 2009 17:29:18 +0300 Subject: [PATCH] Allow installers to specify alternative program names for compression programs. This adds --with-gzip, --with-bzip2 etc. switches to the configure, so that one can do, e.g. ./configure --with-bzip2=lbzip2 and have lbzip2 executed whenever user calls `tar --bzip2'. * acinclude.m4: New file. * configure.ac: Add TAR_COMPR_PROGRAM invocations for the supported compressors. * src/buffer.c (magic): Use *_COMPRESSOR defines instead of hardcoded program names. * src/suffix.c (compression_suffixes): Likewise. --- acinclude.m4 | 26 ++++++++++++++++++++++++++ configure.ac | 7 +++++++ src/buffer.c | 12 ++++++------ src/suffix.c | 30 ++++++++++++++++-------------- 4 files changed, 55 insertions(+), 20 deletions(-) create mode 100644 acinclude.m4 diff --git a/acinclude.m4 b/acinclude.m4 new file mode 100644 index 0000000..fff919a --- /dev/null +++ b/acinclude.m4 @@ -0,0 +1,26 @@ +dnl Special Autoconf macros for GNU Tar -*- autoconf -*- +dnl Copyright (C) 2009 Free Software Foundation, Inc. +dnl +dnl GNU tar is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 3, or (at your option) +dnl any later version. +dnl +dnl GNU tar is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License along +dnl with GNU tar. If not, see <http://www.gnu.org/licenses/>. + +AC_DEFUN([TAR_COMPR_PROGRAM],[ + m4_pushdef([tar_compr_define],translit($1,[a-z+-],[A-ZX_])[_PROGRAM]) + m4_pushdef([tar_compr_var],[tar_cv_compressor_]translit($1,[+-],[x_])) + AC_ARG_WITH([--with-]$1, + AC_HELP_STRING([--with-]$1[=PROG], + [use PROG as ]$1[ compressor program]), + [tar_compr_var=${withvar}], + [tar_compr_var=m4_if($2,,$1,$2)]) + AC_DEFINE_UNQUOTED(tar_compr_define, "$tar_compr_var", + [Define to the program name of ]$1[ compressor program])]) diff --git a/configure.ac b/configure.ac index 1b1831a..7521d64 100644 --- a/configure.ac +++ b/configure.ac @@ -121,6 +121,13 @@ else [Define to the full path of your rsh, if any.]) fi +TAR_COMPR_PROGRAM(compress) +TAR_COMPR_PROGRAM(gzip) +TAR_COMPR_PROGRAM(bzip2) +TAR_COMPR_PROGRAM(lzma) +TAR_COMPR_PROGRAM(lzop) +TAR_COMPR_PROGRAM(xz) + AC_MSG_CHECKING(for default archive format) AC_ARG_VAR([DEFAULT_ARCHIVE_FORMAT], diff --git a/src/buffer.c b/src/buffer.c index fa9ccc2..d7ff214 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -220,12 +220,12 @@ struct zip_magic static struct zip_magic const magic[] = { { ct_tar }, { ct_none, }, - { ct_compress, 2, "\037\235", "compress", "-Z" }, - { ct_gzip, 2, "\037\213", "gzip", "-z" }, - { ct_bzip2, 3, "BZh", "bzip2", "-j" }, - { ct_lzma, 6, "\xFFLZMA", "lzma", "--lzma" }, - { ct_lzop, 4, "\211LZO", "lzop", "--lzop" }, - { ct_xz, 6, "\0xFD7zXZ", "xz", "-J" }, + { ct_compress, 2, "\037\235", COMPRESS_PROGRAM, "-Z" }, + { ct_gzip, 2, "\037\213", GZIP_PROGRAM, "-z" }, + { ct_bzip2, 3, "BZh", BZIP2_PROGRAM, "-j" }, + { ct_lzma, 6, "\xFFLZMA", LZMA_PROGRAM, "--lzma" }, + { ct_lzop, 4, "\211LZO", LZOP_PROGRAM, "--lzop" }, + { ct_xz, 6, "\0xFD7zXZ", XZ_PROGRAM, "-J" }, }; #define NMAGIC (sizeof(magic)/sizeof(magic[0])) diff --git a/src/suffix.c b/src/suffix.c index a044d5a..cd9c01a 100644 --- a/src/suffix.c +++ b/src/suffix.c @@ -27,21 +27,23 @@ struct compression_suffix }; static struct compression_suffix compression_suffixes[] = { -#define S(s,p) #s, sizeof (#s) - 1, #p - { S(gz, gzip) }, - { S(tgz, gzip) }, - { S(taz, gzip) }, - { S(Z, compress) }, - { S(taZ, compress) }, - { S(bz2, bzip2) }, - { S(tbz, bzip2) }, - { S(tbz2, bzip2) }, - { S(tz2, bzip2) }, - { S(lzma, lzma) }, - { S(tlz, lzma) }, - { S(lzo, lzop) }, - { S(xz, xz) }, +#define __CAT2__(a,b) a ## b +#define S(s,p) #s, sizeof (#s) - 1, __CAT2__(p,_PROGRAM) + { S(gz, GZIP) }, + { S(tgz, GZIP) }, + { S(taz, GZIP) }, + { S(Z, COMPRESS) }, + { S(taZ, COMPRESS) }, + { S(bz2, BZIP2) }, + { S(tbz, BZIP2) }, + { S(tbz2, BZIP2) }, + { S(tz2, BZIP2) }, + { S(lzma, LZMA) }, + { S(tlz, LZMA) }, + { S(lzo, LZOP) }, + { S(xz, XZ) }, #undef S +#undef __CAT2__ }; static int nsuffixes = sizeof (compression_suffixes) / -- 1.6.4.2 |
|
|
Re: tar patch to selectively divert named compressors to alternativesSergey Poznyakoff <gray@...> ha escrit:
> In the meantime, I have installed the attached patch, which roughly > corresponds to the 2nd alternative in Laszlo's terms. That patch contained typos. Attached is the corrected version. Apologies for the inconvenience. Regards, Sergey From a7e9b6a17b2c111f4afa5ae35e3a206483366693 Mon Sep 17 00:00:00 2001 From: Sergey Poznyakoff <gray@...> Date: Sat, 10 Oct 2009 17:29:18 +0300 Subject: [PATCH] Allow installers to specify alternative program names for compression programs. This adds --with-gzip, --with-bzip2 etc. switches to the configure, so that one can do, e.g. ./configure --with-bzip2=lbzip2 and have lbzip2 executed whenever user calls `tar --bzip2'. * acinclude.m4: New file. * configure.ac: Add TAR_COMPR_PROGRAM invocations for the supported compressors. * src/buffer.c (magic): Use *_COMPRESSOR defines instead of hardcoded program names. * src/suffix.c (compression_suffixes): Likewise. --- acinclude.m4 | 26 ++++++++++++++++++++++++++ configure.ac | 7 +++++++ src/buffer.c | 12 ++++++------ src/suffix.c | 30 ++++++++++++++++-------------- 4 files changed, 55 insertions(+), 20 deletions(-) create mode 100644 acinclude.m4 diff --git a/acinclude.m4 b/acinclude.m4 new file mode 100644 index 0000000..fff919a --- /dev/null +++ b/acinclude.m4 @@ -0,0 +1,26 @@ +dnl Special Autoconf macros for GNU Tar -*- autoconf -*- +dnl Copyright (C) 2009 Free Software Foundation, Inc. +dnl +dnl GNU tar is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 3, or (at your option) +dnl any later version. +dnl +dnl GNU tar is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License along +dnl with GNU tar. If not, see <http://www.gnu.org/licenses/>. + +AC_DEFUN([TAR_COMPR_PROGRAM],[ + m4_pushdef([tar_compr_define],translit($1,[a-z+-],[A-ZX_])[_PROGRAM]) + m4_pushdef([tar_compr_var],[tar_cv_compressor_]translit($1,[+-],[x_])) + AC_ARG_WITH($1, + AC_HELP_STRING([--with-]$1[=PROG], + [use PROG as ]$1[ compressor program]), + [tar_compr_var=${withval}], + [tar_compr_var=m4_if($2,,$1,$2)]) + AC_DEFINE_UNQUOTED(tar_compr_define, "$tar_compr_var", + [Define to the program name of ]$1[ compressor program])]) diff --git a/configure.ac b/configure.ac index 1b1831a..7521d64 100644 --- a/configure.ac +++ b/configure.ac @@ -121,6 +121,13 @@ else [Define to the full path of your rsh, if any.]) fi +TAR_COMPR_PROGRAM(compress) +TAR_COMPR_PROGRAM(gzip) +TAR_COMPR_PROGRAM(bzip2) +TAR_COMPR_PROGRAM(lzma) +TAR_COMPR_PROGRAM(lzop) +TAR_COMPR_PROGRAM(xz) + AC_MSG_CHECKING(for default archive format) AC_ARG_VAR([DEFAULT_ARCHIVE_FORMAT], diff --git a/src/buffer.c b/src/buffer.c index fa9ccc2..d7ff214 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -220,12 +220,12 @@ struct zip_magic static struct zip_magic const magic[] = { { ct_tar }, { ct_none, }, - { ct_compress, 2, "\037\235", "compress", "-Z" }, - { ct_gzip, 2, "\037\213", "gzip", "-z" }, - { ct_bzip2, 3, "BZh", "bzip2", "-j" }, - { ct_lzma, 6, "\xFFLZMA", "lzma", "--lzma" }, - { ct_lzop, 4, "\211LZO", "lzop", "--lzop" }, - { ct_xz, 6, "\0xFD7zXZ", "xz", "-J" }, + { ct_compress, 2, "\037\235", COMPRESS_PROGRAM, "-Z" }, + { ct_gzip, 2, "\037\213", GZIP_PROGRAM, "-z" }, + { ct_bzip2, 3, "BZh", BZIP2_PROGRAM, "-j" }, + { ct_lzma, 6, "\xFFLZMA", LZMA_PROGRAM, "--lzma" }, + { ct_lzop, 4, "\211LZO", LZOP_PROGRAM, "--lzop" }, + { ct_xz, 6, "\0xFD7zXZ", XZ_PROGRAM, "-J" }, }; #define NMAGIC (sizeof(magic)/sizeof(magic[0])) diff --git a/src/suffix.c b/src/suffix.c index a044d5a..cd9c01a 100644 --- a/src/suffix.c +++ b/src/suffix.c @@ -27,21 +27,23 @@ struct compression_suffix }; static struct compression_suffix compression_suffixes[] = { -#define S(s,p) #s, sizeof (#s) - 1, #p - { S(gz, gzip) }, - { S(tgz, gzip) }, - { S(taz, gzip) }, - { S(Z, compress) }, - { S(taZ, compress) }, - { S(bz2, bzip2) }, - { S(tbz, bzip2) }, - { S(tbz2, bzip2) }, - { S(tz2, bzip2) }, - { S(lzma, lzma) }, - { S(tlz, lzma) }, - { S(lzo, lzop) }, - { S(xz, xz) }, +#define __CAT2__(a,b) a ## b +#define S(s,p) #s, sizeof (#s) - 1, __CAT2__(p,_PROGRAM) + { S(gz, GZIP) }, + { S(tgz, GZIP) }, + { S(taz, GZIP) }, + { S(Z, COMPRESS) }, + { S(taZ, COMPRESS) }, + { S(bz2, BZIP2) }, + { S(tbz, BZIP2) }, + { S(tbz2, BZIP2) }, + { S(tz2, BZIP2) }, + { S(lzma, LZMA) }, + { S(tlz, LZMA) }, + { S(lzo, LZOP) }, + { S(xz, XZ) }, #undef S +#undef __CAT2__ }; static int nsuffixes = sizeof (compression_suffixes) / -- 1.6.4.2 |
| Free embeddable forum powered by Nabble | Forum Help |