|
View:
New views
3 Messages
—
Rating Filter:
Alert me
|
|
|
join with header line supportHello,
Here's an improved version of the '--header' feature for join, with tests, NEWS, doc updates. Reminder: with this option, one can join files even if they contain a header line as the first line. I'll be happy to provide more examples and use cases, if needed. The patch is also available here: http://cancan.cshl.edu/labmembers/gordon/coreutils8/join_header.patch Comments are welcomed, -gordon NEWS | 3 +++ doc/coreutils.texi | 4 ++++ src/join.c | 23 ++++++++++++++++++++++- tests/misc/join | 21 +++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletions(-) diff --git a/NEWS b/NEWS index 03ed83f..4a17a4d 100644 --- a/NEWS +++ b/NEWS @@ -66,6 +66,9 @@ GNU coreutils NEWS -*- outline -*- touch now accepts the option --no-dereference (-h), as a means to change symlink timestamps on platforms with enough support. + join now accepts the option --header, treating the first line of + each input file as a header lines - joining them and printing them + without checking for ordering. * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index ec5bcfb..62dfe55 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5511,6 +5511,10 @@ Do not check that both input files are in sorted order. This is the default. Replace those output fields that are missing in the input with @var{string}. +@item --header +@opindex --header +Treat the first line of each input file as a header line. The header lines will be joined and printed as the first output line. If @option{-o} is used to specify output format, the header line will be printed according to the specified format. Even if @option{--check-order} is used, the header lines will not be checked for ordering. + @item -i @itemx --ignore-case @opindex -i diff --git a/src/join.c b/src/join.c index d734a91..bb8009f 100644 --- a/src/join.c +++ b/src/join.c @@ -137,7 +137,8 @@ static enum enum { CHECK_ORDER_OPTION = CHAR_MAX + 1, - NOCHECK_ORDER_OPTION + NOCHECK_ORDER_OPTION, + HEADER_LINE_OPTION }; @@ -146,6 +147,7 @@ static struct option const longopts[] = {"ignore-case", no_argument, NULL, 'i'}, {"check-order", no_argument, NULL, CHECK_ORDER_OPTION}, {"nocheck-order", no_argument, NULL, NOCHECK_ORDER_OPTION}, + {"header", no_argument, NULL, HEADER_LINE_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} @@ -157,6 +159,10 @@ static struct line uni_blank; /* If nonzero, ignore case when comparing join fields. */ static bool ignore_case; +/* If nonzero, treat the first line of each file as column headers - + join them without checking for ordering */ +static bool join_header_lines; + void usage (int status) { @@ -191,6 +197,8 @@ by whitespace. When FILE1 or FILE2 (not both) is -, read standard input.\n\ --check-order check that the input is correctly sorted, even\n\ if all input lines are pairable\n\ --nocheck-order do not check that the input is correctly sorted\n\ + --header treat first line in each file as field header line,\n\ + print them without trying to pair them.\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -616,6 +624,15 @@ join (FILE *fp1, FILE *fp2) initseq (&seq2); getseq (fp2, &seq2, 2); + if (join_header_lines && seq1.count && seq2.count) + { + prjoin(seq1.lines[0], seq2.lines[0]); + prevline[0] = NULL; + prevline[1] = NULL; + advance_seq (fp1, &seq1, true, 1); + advance_seq (fp2, &seq2, true, 2); + } + while (seq1.count && seq2.count) { size_t i; @@ -1052,6 +1069,10 @@ main (int argc, char **argv) &nfiles, &prev_optc_status, &optc_status); break; + case HEADER_LINE_OPTION: + join_header_lines = true; + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); diff --git a/tests/misc/join b/tests/misc/join index d1f1677..6433e28 100755 --- a/tests/misc/join +++ b/tests/misc/join @@ -185,6 +185,27 @@ my @tv = ( # Before 6.10.143, this would mistakenly fail with the diagnostic: # join: File 1 is not in sorted order ['chkodr-7', '-12', ["2 a\n1 b\n", ""], "", 0], + +# Test '--header' feature +['header-1', '--header', + [ "ID Name\n1 A\n2 B\n", "ID Color\n1 red\n"], "ID Name Color\n1 A red\n", 0], + +# '--header' with '--check-order' : The header line is out-of-order but the +# actual data is in order. This join should succeed. +['header-2', '--header --check-order', + ["ID Name\n1 A\n2 B\n", "ID Color\n2 green\n"], "ID Name Color\n2 B green\n", 0], + +# '--header' with '--check-order' : The header line is out-of-order AND the +# actual data out-of-order. This join should fail. +['header-3', '--header --check-order', + ["ID Name\n2 B\n1 A\n", "ID Color\n2 blue\n"], "ID Name Color\n", 1, + "$prog: file 1 is not in sorted order\n"], + +# '--header' with specific output format '-o'. +# output header line should respect the requested format +['header-4', '--header -o "0,1.3,2.2"', + ["ID Group Name\n1 Foo A\n2 Bar B\n", "ID Color\n2 blue\n"], "ID Name Color\n2 B blue\n", 0], + ); # Convert the above old-style test vectors to the newer |
|
|
Re: join with header line supportAssaf Gordon wrote:
> Hello, > > Here's an improved version of the '--header' feature for join, with > tests, NEWS, doc updates. > > Reminder: with this option, one can join files even if they contain a > header line as the first line. > > I'll be happy to provide more examples and use cases, if needed. > > The patch is also available here: > http://cancan.cshl.edu/labmembers/gordon/coreutils8/join_header.patch Thanks for providing the download as thunderbird is mangling your patch again. I'll review it and expect to push it soon, unless there are objections. cheers, Pádraig. |
|
|
Re: join with header line supportPádraig Brady wrote:
... >> The patch is also available here: >> http://cancan.cshl.edu/labmembers/gordon/coreutils8/join_header.patch > > Thanks for providing the download as thunderbird is mangling your patch again. > I'll review it and expect to push it soon, unless there are objections. Thanks for handling that! |
| Free embeddable forum powered by Nabble | Forum Help |