This patch adds wildcard matching to the regular "prefix" matching for svndumpfilter. It is my first patch and feedback would be appreciated.
There is one place where I am unsure about: the wildcard matching uses "NXOR", so I am not sure if it was designed to only work with path prefixes (it seems to go well if I choose "*.txt" as a pattern for example).
Andrei Litvin
Index: subversion/svndumpfilter/main.c
===================================================================
--- subversion/svndumpfilter/main.c (revision 21719)
+++ subversion/svndumpfilter/main.c (working copy)
@@ -97,7 +97,118 @@
svn_stringbuf_appendbytes(*strbuf, "\n", 1);
}
+/* Match a string against a wildcard pattern.
+ * @param str - [in] the string to check
+ * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
+ * with '\\' as an escape character
+ * @return TRUE if pattern match is detected
+ */
+static svn_boolean_t
+wildcard_match(const char *str, const char *pat)
+{
+ svn_boolean_t wok = TRUE; /* wild card matchin accepted? */
+ while (*str && *pat)
+ {
+ if (wok)
+ {
+ switch(*pat)
+ {
+ case '?': /* matches any character */
+ break;
+ case '*': /* have to match 0 or more (expensive) */
+ {
+ /* Loop backwards because patterns of the type '*.ext'
+ * are often used (match longest first)
+ */
+ const char *skip_str = str;
+ skip_str += strlen(skip_str);
+
+ while (skip_str >= str)
+ {
+ if (wildcard_match(skip_str, pat+1))
+ return TRUE;
+ skip_str--;
+ }
+ return FALSE; /* nothing could be matched*/
+ }
+ break;
+ case '\\': /* this is an escape */
+ wok = FALSE;
+ pat++;
+ continue;
+ default: /* regular comparison */
+ if (*pat != *str)
+ return FALSE;
+ break;
+ }
+ }
+ else
+ {
+ if (*pat != *str)
+ return FALSE;
+ wok = TRUE;
+ }
+ str++;
+ pat++;
+ }
+ // check 100% match
+ return ((*pat == 0) && (*str == 0));
+}
+
+#define MAX_SAFE_PATTERN_STARS 100 /* avoid stack overflow on "**...*" */
+
+/* Since the wildcard matching function is recursive,
+ * we check the number of "*" in it so that the stack is
+ * not overrun on insane strings.
+ */
+static svn_error_t *
+check_wildcard_complexity(apr_array_header_t *pfxlist)
+{
+ int i;
+ int star_cnt;
+ const char *pattern;
+
+ for (i = 0; i < pfxlist->nelts; i++)
+ {
+ pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
+
+ star_cnt = 0;
+ while (*pattern)
+ {
+ if (*pattern == '*')
+ star_cnt++;
+ pattern++;
+ }
+
+ if (star_cnt >= MAX_SAFE_PATTERN_STARS)
+ return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+ _("Too many '*' characters in pattern"));
+ }
+
+ return NULL;
+}
+
+/* Wildcard matching function to compare a node-path with a
+ * specified set of wildcard patterns
+ */
+static svn_boolean_t
+ary_wildcard_match(apr_array_header_t *pfxlist, const char *path)
+{
+ int i;
+ const char *pattern;
+
+ for (i = 0; i < pfxlist->nelts; i++)
+ {
+ pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
+
+ if (wildcard_match(path, pattern))
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
/* Prefix matching function to compare node-path with set of prefixes. */
static svn_boolean_t
ary_prefix_match(apr_array_header_t *pfxlist, const char *path)
@@ -143,6 +254,8 @@
svn_boolean_t was_dropped; /* Was this revision dropped? */
};
+typedef svn_boolean_t (*match_function_t)(apr_array_header_t*, const char *);
+
struct parse_baton_t
{
/* Command-line options values. */
@@ -152,6 +265,7 @@
svn_boolean_t do_renumber_revs;
svn_boolean_t preserve_revprops;
apr_array_header_t *prefixes;
+ match_function_t match_function;
/* Input and output streams. */
svn_stream_t *in_stream;
@@ -460,7 +574,7 @@
copyfrom_path = svn_path_join("/", copyfrom_path, pool);
/* Shame, shame, shame ... this is NXOR. */
- nb->do_skip = (ary_prefix_match(pb->prefixes, node_path)
+ nb->do_skip = (pb->match_function(pb->prefixes, node_path)
? pb->do_exclude : (! pb->do_exclude));
/* If we're skipping the node, take note of path, discarding the
@@ -480,7 +594,7 @@
/* Test if this node was copied from dropped source. */
if (copyfrom_path &&
- (ary_prefix_match(pb->prefixes, copyfrom_path)
+ (pb->match_function(pb->prefixes, copyfrom_path)
? pb->do_exclude : (! pb->do_exclude)))
{
/* This node was copied from dropped source.
@@ -767,7 +881,8 @@
svndumpfilter__renumber_revs,
svndumpfilter__preserve_revprops,
svndumpfilter__quiet,
- svndumpfilter__version
+ svndumpfilter__version,
+ svndumpfilter__match_type
};
/* Option codes and descriptions.
@@ -792,9 +907,18 @@
N_("Renumber revisions left after filtering.") },
{"preserve-revprops", svndumpfilter__preserve_revprops, 0,
N_("Don't filter revision properties.") },
+ {"match-type", svndumpfilter__match_type, 1,
+ N_("Default: 'prefix'. Defines how the pattern matching is done\n"
+ " "
+ "'prefix' - match nodes with the fiven prefixes from dumpstream.\n"
+ " "
+ "'wildcard' - use '*' and '?' as wildcards (e.g. '*.pdf') .\n"
+ ) },
{NULL}
};
+#define MATCH_TYPE_PREFIX "prefix"
+#define MATCH_TYPE_WILDCARD "wildcard"
/* Array of available subcommands.
* The entire list must be terminated with an entry of nulls.
@@ -802,16 +926,18 @@
static const svn_opt_subcommand_desc_t cmd_table[] =
{
{"exclude", subcommand_exclude, {0},
- N_("Filter out nodes with given prefixes from dumpstream.\n"
- "usage: svndumpfilter exclude PATH_PREFIX...\n"),
+ N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+ "usage: svndumpfilter exclude PATTERN...\n"),
{svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
- svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+ svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+ svndumpfilter__match_type} },
{"include", subcommand_include, {0},
- N_("Filter out nodes without given prefixes from dumpstream.\n"
- "usage: svndumpfilter include PATH_PREFIX...\n"),
+ N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+ "usage: svndumpfilter include PATTERN...\n"),
{svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
- svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+ svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+ svndumpfilter__match_type} },
{"help", subcommand_help, {"?", "h"},
N_("Describe the usage of this program or its subcommands.\n"
@@ -833,6 +959,7 @@
svn_boolean_t help; /* --help or -? */
svn_boolean_t renumber_revs; /* --renumber-revs */
svn_boolean_t preserve_revprops; /* --preserve-revprops */
+ const char *match_type; /* --match-type */
apr_array_header_t *prefixes; /* mainargs. */
};
@@ -864,6 +991,23 @@
baton->renumber_history = apr_hash_make(pool);
baton->last_live_revision = SVN_INVALID_REVNUM;
+ if (opt_state->match_type == NULL) { /* just use the default */
+ baton->match_function = ary_prefix_match;
+ } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
+ baton->match_function = ary_prefix_match;
+ } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
+ svn_error_t *pComplexityCheck;
+
+ pComplexityCheck = check_wildcard_complexity(baton->prefixes);
+ if (pComplexityCheck)
+ return pComplexityCheck;
+
+ baton->match_function = ary_wildcard_match;
+ } else {
+ return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+ _("Invalid pattern matching type selected"));
+ }
+
/* This is non-ideal: We should pass through the version of the
* input dumpstream. However, our API currently doesn't allow that.
* Hardcoding version 2 is acceptable because:
@@ -1084,7 +1228,6 @@
apr_array_header_t *received_opts;
int i;
-
/* Initialize the app. */
if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
return EXIT_FAILURE;
@@ -1168,6 +1311,9 @@
case svndumpfilter__preserve_revprops:
opt_state.preserve_revprops = TRUE;
break;
+ case svndumpfilter__match_type:
+ opt_state.match_type = apr_pstrdup(pool, opt_arg);
+ break;
default:
{
subcommand_help(NULL, NULL, pool);
---------------------------------------------------------------------
To unsubscribe, e-mail:
dev-unsubscribe@...
For additional commands, e-mail:
dev-help@...