[PATCH] - svndumpfilter - add wildcard matchinv

View: New views
5 Messages — Rating Filter:   Alert me  

[PATCH] - svndumpfilter - add wildcard matchinv

by Andrei-18 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

This patch adds wildcard matching to the regular "prefix" matching for svndumpfilter. It is my first patch and feedback would be appreciated.

There is one place where I am unsure about: the wildcard matching uses "NXOR", so I am not sure if it was designed to only work with path prefixes (it seems to go well if I choose "*.txt" as a pattern for example).

Andrei Litvin

Index: subversion/svndumpfilter/main.c
===================================================================
--- subversion/svndumpfilter/main.c (revision 21719)
+++ subversion/svndumpfilter/main.c (working copy)
@@ -97,7 +97,118 @@
   svn_stringbuf_appendbytes(*strbuf, "\n", 1);
 }
 
+/* Match a string against a wildcard pattern.
+ * @param str - [in] the string to check
+ * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
+ *              with '\\' as an escape character
+ * @return TRUE if pattern match is detected
+ */
+static svn_boolean_t
+wildcard_match(const char *str, const char *pat)
+{
+  svn_boolean_t  wok = TRUE;  /* wild card matchin accepted? */
+  while (*str && *pat)
+    {
+      if (wok)
+        {
+          switch(*pat)
+            {
+              case '?':        /* matches any character */
+                break;
+              case '*':        /* have to match 0 or more (expensive) */
+                {
+                  /* Loop backwards because patterns of the type '*.ext'
+                   * are often used (match longest first)
+                   */
+                  const char *skip_str = str;
+                  skip_str += strlen(skip_str);
+                  
+                  while (skip_str >= str)
+                    {
+                      if (wildcard_match(skip_str, pat+1))
+                        return TRUE;
+                      skip_str--;  
+                    }
+                  return FALSE; /* nothing could be matched*/
+                }
+                break;
+              case '\\':       /* this is an escape */
+                wok = FALSE;
+                pat++;
+                continue;  
+              default:         /* regular comparison */  
+                if (*pat != *str)
+                  return FALSE;
+                break;  
+            }
+        }
+      else
+        {
+          if (*pat != *str)
+            return FALSE;
+          wok = TRUE;  
+        }
+      str++;
+      pat++;
+    }
 
+    // check 100% match
+    return ((*pat == 0) && (*str == 0));
+}
+
+#define MAX_SAFE_PATTERN_STARS   100  /* avoid stack overflow on "**...*" */
+
+/* Since the wildcard matching function is recursive,
+ * we check the number of "*" in it so that the stack is
+ * not overrun on insane strings.
+ */
+static svn_error_t *
+check_wildcard_complexity(apr_array_header_t *pfxlist)
+{
+  int i;
+  int star_cnt;
+  const char *pattern;
+
+  for (i = 0; i < pfxlist->nelts; i++)
+    {
+      pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
+
+      star_cnt = 0;
+      while (*pattern)
+        {
+          if (*pattern == '*')
+            star_cnt++;
+          pattern++;
+        }
+      
+      if (star_cnt >= MAX_SAFE_PATTERN_STARS)
+        return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+                                _("Too many '*' characters in pattern"));
+    }
+
+  return NULL;
+}
+
+/* Wildcard matching function to compare a node-path with a
+ * specified set of wildcard patterns
+ */
+static svn_boolean_t
+ary_wildcard_match(apr_array_header_t *pfxlist, const char *path)
+{
+  int i;
+  const char *pattern;
+
+  for (i = 0; i < pfxlist->nelts; i++)
+    {
+      pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
+      
+      if (wildcard_match(path, pattern))
+        return TRUE;
+    }
+
+  return FALSE;
+}
+
 /* Prefix matching function to compare node-path with set of prefixes. */
 static svn_boolean_t
 ary_prefix_match(apr_array_header_t *pfxlist, const char *path)
@@ -143,6 +254,8 @@
   svn_boolean_t was_dropped; /* Was this revision dropped? */
 };
 
+typedef svn_boolean_t (*match_function_t)(apr_array_header_t*, const char *);
+
 struct parse_baton_t
 {
   /* Command-line options values. */
@@ -152,6 +265,7 @@
   svn_boolean_t do_renumber_revs;
   svn_boolean_t preserve_revprops;
   apr_array_header_t *prefixes;
+  match_function_t match_function;
 
   /* Input and output streams. */
   svn_stream_t *in_stream;
@@ -460,7 +574,7 @@
     copyfrom_path = svn_path_join("/", copyfrom_path, pool);
 
   /* Shame, shame, shame ... this is NXOR. */
-  nb->do_skip = (ary_prefix_match(pb->prefixes, node_path)
+  nb->do_skip = (pb->match_function(pb->prefixes, node_path)
                  ? pb->do_exclude : (! pb->do_exclude));
 
   /* If we're skipping the node, take note of path, discarding the
@@ -480,7 +594,7 @@
 
       /* Test if this node was copied from dropped source. */
       if (copyfrom_path &&
-          (ary_prefix_match(pb->prefixes, copyfrom_path)
+          (pb->match_function(pb->prefixes, copyfrom_path)
            ? pb->do_exclude : (! pb->do_exclude)))
         {
           /* This node was copied from dropped source.
@@ -767,7 +881,8 @@
     svndumpfilter__renumber_revs,
     svndumpfilter__preserve_revprops,
     svndumpfilter__quiet,
-    svndumpfilter__version
+    svndumpfilter__version,
+    svndumpfilter__match_type
   };
 
 /* Option codes and descriptions.
@@ -792,9 +907,18 @@
      N_("Renumber revisions left after filtering.") },
     {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
      N_("Don't filter revision properties.") },
+    {"match-type", svndumpfilter__match_type, 1,
+     N_("Default: 'prefix'. Defines how the pattern matching is done\n"
+        "                            "
+        "'prefix'   - match nodes with the fiven prefixes from dumpstream.\n"
+        "                            "
+        "'wildcard' - use '*' and '?' as wildcards (e.g. '*.pdf') .\n"
+       ) },
     {NULL}
   };
 
+#define MATCH_TYPE_PREFIX      "prefix"
+#define MATCH_TYPE_WILDCARD    "wildcard"
 
 /* Array of available subcommands.
  * The entire list must be terminated with an entry of nulls.
@@ -802,16 +926,18 @@
 static const svn_opt_subcommand_desc_t cmd_table[] =
   {
     {"exclude", subcommand_exclude, {0},
-     N_("Filter out nodes with given prefixes from dumpstream.\n"
-        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
+     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+        "usage: svndumpfilter exclude PATTERN...\n"),
      {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
-      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+      svndumpfilter__match_type} },
 
     {"include", subcommand_include, {0},
-     N_("Filter out nodes without given prefixes from dumpstream.\n"
-        "usage: svndumpfilter include PATH_PREFIX...\n"),
+     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+        "usage: svndumpfilter include PATTERN...\n"),
      {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
-      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+      svndumpfilter__match_type} },
 
     {"help", subcommand_help, {"?", "h"},
      N_("Describe the usage of this program or its subcommands.\n"
@@ -833,6 +959,7 @@
   svn_boolean_t help;                    /* --help or -?        */
   svn_boolean_t renumber_revs;           /* --renumber-revs     */
   svn_boolean_t preserve_revprops;       /* --preserve-revprops */
+  const char *match_type;                /* --match-type        */
   apr_array_header_t *prefixes;          /* mainargs.           */
 };
 
@@ -864,6 +991,23 @@
   baton->renumber_history = apr_hash_make(pool);
   baton->last_live_revision = SVN_INVALID_REVNUM;
 
+  if (opt_state->match_type == NULL) {  /* just use the default */
+    baton->match_function = ary_prefix_match;
+  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
+    baton->match_function = ary_prefix_match;
+  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
+    svn_error_t *pComplexityCheck;
+    
+    pComplexityCheck = check_wildcard_complexity(baton->prefixes);
+    if (pComplexityCheck)
+      return pComplexityCheck;
+    
+    baton->match_function = ary_wildcard_match;
+  } else {
+    return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+                            _("Invalid pattern matching type selected"));
+  }
+
   /* This is non-ideal: We should pass through the version of the
    * input dumpstream.  However, our API currently doesn't allow that.
    * Hardcoding version 2 is acceptable because:
@@ -1084,7 +1228,6 @@
   apr_array_header_t *received_opts;
   int i;
 
-
   /* Initialize the app. */
   if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
     return EXIT_FAILURE;
@@ -1168,6 +1311,9 @@
         case svndumpfilter__preserve_revprops:
           opt_state.preserve_revprops = TRUE;
           break;
+        case svndumpfilter__match_type:
+          opt_state.match_type = apr_pstrdup(pool, opt_arg);
+          break;
         default:
           {
             subcommand_help(NULL, NULL, pool);


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@...
For additional commands, e-mail: dev-help@...

Re: [PATCH] - svndumpfilter - add wildcard matchinv

by C. Michael Pilato :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Andrei wrote:
> This patch adds wildcard matching to the regular "prefix" matching for
> svndumpfilter. It is my first patch and feedback would be appreciated.
>
> There is one place where I am unsure about: the wildcard matching uses
> "NXOR", so I am not sure if it was designed to only work with path
> prefixes (it seems to go well if I choose "*.txt" as a pattern for
> example).

Andrei, thanks for the generally good patch.  I have a few comments
which you can find inline below.

> Index: subversion/svndumpfilter/main.c
> ===================================================================
> --- subversion/svndumpfilter/main.c (revision 21719)
> +++ subversion/svndumpfilter/main.c (working copy)
> @@ -97,7 +97,118 @@
>    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
>  }
>  
> +/* Match a string against a wildcard pattern.
> + * @param str - [in] the string to check
> + * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
> + *              with '\\' as an escape character
> + * @return TRUE if pattern match is detected
> + */
> +static svn_boolean_t
> +wildcard_match(const char *str, const char *pat)
> +{
...

Could not this whole function be written using apr_fnmatch(), the same
matching interface we use elsewhere in Subversion?  You could do a
quicky pass to handle the backslash escape character (a UI quirk I'm not
totally sold on), and then hand the result off to apr_fnmatch().  Sure
would simplify the code, and keep this behavior consistent with that of
other areas of Subversion.

> @@ -864,6 +991,23 @@
>    baton->renumber_history = apr_hash_make(pool);
>    baton->last_live_revision = SVN_INVALID_REVNUM;
>  
> +  if (opt_state->match_type == NULL) {  /* just use the default */
> +    baton->match_function = ary_prefix_match;
> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
> +    baton->match_function = ary_prefix_match;
> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
> +    svn_error_t *pComplexityCheck;
> +    
> +    pComplexityCheck = check_wildcard_complexity(baton->prefixes);
> +    if (pComplexityCheck)
> +      return pComplexityCheck;
We don't do camel-case.  But if you use apr_fnmatch(), you needn't check
complexity anyway.

--
C. Michael Pilato <cmpilato@...>
CollabNet   <>   www.collab.net   <>   Distributed Development On Demand



signature.asc (198 bytes) Download Attachment

Parent Message unknown Re: [PATCH] - svndumpfilter - add wildcard matchinv

by Andrei-18 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Thank you for the feedback. Here is a new version using apr_fnmatch. Based on my tests, it seems to handle escapes ('\') ok and has '[' matching on top of the previous one. I did not see a case which would require me to handle escapes aditionally from apr_fnmatch .. did you
have any specific functionality in mind that requires this?

Andrei

On Mon, 2006-10-02 at 09:00 -0400, C. Michael Pilato wrote:
> Andrei wrote:
> > This patch adds wildcard matching to the regular "prefix" matching for
> > svndumpfilter. It is my first patch and feedback would be appreciated.
> >
> > There is one place where I am unsure about: the wildcard matching uses
> > "NXOR", so I am not sure if it was designed to only work with path
> > prefixes (it seems to go well if I choose "*.txt" as a pattern for
> > example).
>
> Andrei, thanks for the generally good patch.  I have a few comments
> which you can find inline below.
>
> > Index: subversion/svndumpfilter/main.c
> > ===================================================================
> > --- subversion/svndumpfilter/main.c (revision 21719)
> > +++ subversion/svndumpfilter/main.c (working copy)
> > @@ -97,7 +97,118 @@
> >    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
> >  }
> > 
> > +/* Match a string against a wildcard pattern.
> > + * @param str - [in] the string to check
> > + * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
> > + *              with '\\' as an escape character
> > + * @return TRUE if pattern match is detected
> > + */
> > +static svn_boolean_t
> > +wildcard_match(const char *str, const char *pat)
> > +{
>
> ...
>
> Could not this whole function be written using apr_fnmatch(), the same
> matching interface we use elsewhere in Subversion?  You could do a
> quicky pass to handle the backslash escape character (a UI quirk I'm not
> totally sold on), and then hand the result off to apr_fnmatch().  Sure
> would simplify the code, and keep this behavior consistent with that of
> other areas of Subversion.
>
> > @@ -864,6 +991,23 @@
> >    baton->renumber_history = apr_hash_make(pool);
> >    baton->last_live_revision = SVN_INVALID_REVNUM;
> > 
> > +  if (opt_state->match_type == NULL) {  /* just use the default */
> > +    baton->match_function = ary_prefix_match;
> > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
> > +    baton->match_function = ary_prefix_match;
> > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
> > +    svn_error_t *pComplexityCheck;
> > +   
> > +    pComplexityCheck = check_wildcard_complexity(baton->prefixes);
> > +    if (pComplexityCheck)
> > +      return pComplexityCheck;
>
> We don't do camel-case.  But if you use apr_fnmatch(), you needn't check
> complexity anyway.
>
Index: subversion/svndumpfilter/main.c
===================================================================
--- subversion/svndumpfilter/main.c (revision 21736)
+++ subversion/svndumpfilter/main.c (working copy)
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 
 #include <apr_file_io.h>
+#include <apr_fnmatch.h>
 
 #include "svn_private_config.h"
 #include "svn_cmdline.h"
@@ -97,7 +98,26 @@
   svn_stringbuf_appendbytes(*strbuf, "\n", 1);
 }
 
+/* Wildcard matching function to compare a node-path with a
+ * specified set of wildcard patterns
+ */
+static svn_boolean_t
+ary_wildcard_match(apr_array_header_t *pfxlist, const char *path)
+{
+  int i;
+  const char *pattern;
 
+  for (i = 0; i < pfxlist->nelts; i++)
+    {
+      pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
+
+      if (apr_fnmatch(pattern, path, 0) == APR_SUCCESS)
+        return TRUE;
+    }
+
+  return FALSE;
+}
+
 /* Prefix matching function to compare node-path with set of prefixes. */
 static svn_boolean_t
 ary_prefix_match(apr_array_header_t *pfxlist, const char *path)
@@ -143,6 +163,8 @@
   svn_boolean_t was_dropped; /* Was this revision dropped? */
 };
 
+typedef svn_boolean_t (*match_function_t)(apr_array_header_t*, const char *);
+
 struct parse_baton_t
 {
   /* Command-line options values. */
@@ -152,6 +174,7 @@
   svn_boolean_t do_renumber_revs;
   svn_boolean_t preserve_revprops;
   apr_array_header_t *prefixes;
+  match_function_t match_function;
 
   /* Input and output streams. */
   svn_stream_t *in_stream;
@@ -460,7 +483,7 @@
     copyfrom_path = svn_path_join("/", copyfrom_path, pool);
 
   /* Shame, shame, shame ... this is NXOR. */
-  nb->do_skip = (ary_prefix_match(pb->prefixes, node_path)
+  nb->do_skip = (pb->match_function(pb->prefixes, node_path)
                  ? pb->do_exclude : (! pb->do_exclude));
 
   /* If we're skipping the node, take note of path, discarding the
@@ -480,7 +503,7 @@
 
       /* Test if this node was copied from dropped source. */
       if (copyfrom_path &&
-          (ary_prefix_match(pb->prefixes, copyfrom_path)
+          (pb->match_function(pb->prefixes, copyfrom_path)
            ? pb->do_exclude : (! pb->do_exclude)))
         {
           /* This node was copied from dropped source.
@@ -767,7 +790,8 @@
     svndumpfilter__renumber_revs,
     svndumpfilter__preserve_revprops,
     svndumpfilter__quiet,
-    svndumpfilter__version
+    svndumpfilter__version,
+    svndumpfilter__match_type
   };
 
 /* Option codes and descriptions.
@@ -792,9 +816,18 @@
      N_("Renumber revisions left after filtering.") },
     {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
      N_("Don't filter revision properties.") },
+    {"match-type", svndumpfilter__match_type, 1,
+     N_("Default: 'prefix'. Defines how the pattern matching is done\n"
+        "                            "
+        "'prefix'   - match nodes with the fiven prefixes from dumpstream.\n"
+        "                            "
+        "'wildcard' - use '*' and '?' as wildcards (e.g. '*.pdf') .\n"
+       ) },
     {NULL}
   };
 
+#define MATCH_TYPE_PREFIX      "prefix"
+#define MATCH_TYPE_WILDCARD    "wildcard"
 
 /* Array of available subcommands.
  * The entire list must be terminated with an entry of nulls.
@@ -802,16 +835,18 @@
 static const svn_opt_subcommand_desc_t cmd_table[] =
   {
     {"exclude", subcommand_exclude, {0},
-     N_("Filter out nodes with given prefixes from dumpstream.\n"
-        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
+     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+        "usage: svndumpfilter exclude PATTERN...\n"),
      {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
-      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+      svndumpfilter__match_type} },
 
     {"include", subcommand_include, {0},
-     N_("Filter out nodes without given prefixes from dumpstream.\n"
-        "usage: svndumpfilter include PATH_PREFIX...\n"),
+     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
+        "usage: svndumpfilter include PATTERN...\n"),
      {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
-      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
+      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
+      svndumpfilter__match_type} },
 
     {"help", subcommand_help, {"?", "h"},
      N_("Describe the usage of this program or its subcommands.\n"
@@ -833,6 +868,7 @@
   svn_boolean_t help;                    /* --help or -?        */
   svn_boolean_t renumber_revs;           /* --renumber-revs     */
   svn_boolean_t preserve_revprops;       /* --preserve-revprops */
+  const char *match_type;                /* --match-type        */
   apr_array_header_t *prefixes;          /* mainargs.           */
 };
 
@@ -864,6 +900,17 @@
   baton->renumber_history = apr_hash_make(pool);
   baton->last_live_revision = SVN_INVALID_REVNUM;
 
+  if (opt_state->match_type == NULL) {  /* just use the default */
+    baton->match_function = ary_prefix_match;
+  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
+    baton->match_function = ary_prefix_match;
+  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
+    baton->match_function = ary_wildcard_match;
+  } else {
+    return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
+                            _("Invalid pattern matching type selected"));
+  }
+
   /* This is non-ideal: We should pass through the version of the
    * input dumpstream.  However, our API currently doesn't allow that.
    * Hardcoding version 2 is acceptable because:
@@ -1084,7 +1131,6 @@
   apr_array_header_t *received_opts;
   int i;
 
-
   /* Initialize the app. */
   if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
     return EXIT_FAILURE;
@@ -1168,6 +1214,9 @@
         case svndumpfilter__preserve_revprops:
           opt_state.preserve_revprops = TRUE;
           break;
+        case svndumpfilter__match_type:
+          opt_state.match_type = apr_pstrdup(pool, opt_arg);
+          break;
         default:
           {
             subcommand_help(NULL, NULL, pool);


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@...
For additional commands, e-mail: dev-help@...

Re: [PATCH] - svndumpfilter - add wildcard matchinv

by Hyrum K. Wright-3 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Andrei wrote:
> Thank you for the feedback. Here is a new version using apr_fnmatch. Based on my tests, it seems to handle escapes ('\') ok and has '[' matching on top of the previous one. I did not see a case which would require me to handle escapes aditionally from apr_fnmatch .. did you
>  have any specific functionality in mind that requires this?

Ping...

Has anybody looked at this most recent version of Andrei's patch?  If
nothing happens in the next few days, I'll file an issue.

Thanks,
-Hyrum

>  On Mon, 2006-10-02 at 09:00 -0400, C. Michael Pilato wrote:
>  > Andrei wrote:
>  > > This patch adds wildcard matching to the regular "prefix" matching for
>  > > svndumpfilter. It is my first patch and feedback would be appreciated.
>  > >
>  > > There is one place where I am unsure about: the wildcard matching uses
>  > > "NXOR", so I am not sure if it was designed to only work with path
>  > > prefixes (it seems to go well if I choose "*.txt" as a pattern for
>  > > example).
>  >
>  > Andrei, thanks for the generally good patch.  I have a few comments
>  > which you can find inline below.
>  >
>  > > Index: subversion/svndumpfilter/main.c
>  > > ===================================================================
>  > > --- subversion/svndumpfilter/main.c (revision 21719)
>  > > +++ subversion/svndumpfilter/main.c (working copy)
>  > > @@ -97,7 +97,118 @@
>  > >    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
>  > >  }
>  > >  
>  > > +/* Match a string against a wildcard pattern.
>  > > + * @param str - [in] the string to check
>  > > + * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
>  > > + *              with '\\' as an escape character
>  > > + * @return TRUE if pattern match is detected
>  > > + */
>  > > +static svn_boolean_t
>  > > +wildcard_match(const char *str, const char *pat)
>  > > +{
>  >
>  > ...
>  >
>  > Could not this whole function be written using apr_fnmatch(), the same
>  > matching interface we use elsewhere in Subversion?  You could do a
>  > quicky pass to handle the backslash escape character (a UI quirk I'm not
>  > totally sold on), and then hand the result off to apr_fnmatch().  Sure
>  > would simplify the code, and keep this behavior consistent with that of
>  > other areas of Subversion.
>  >
>  > > @@ -864,6 +991,23 @@
>  > >    baton->renumber_history = apr_hash_make(pool);
>  > >    baton->last_live_revision = SVN_INVALID_REVNUM;
>  > >  
>  > > +  if (opt_state->match_type == NULL) {  /* just use the default */
>  > > +    baton->match_function = ary_prefix_match;
>  > > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
>  > > +    baton->match_function = ary_prefix_match;
>  > > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
>  > > +    svn_error_t *pComplexityCheck;
>  > > +    
>  > > +    pComplexityCheck = check_wildcard_complexity(baton->prefixes);
>  > > +    if (pComplexityCheck)
>  > > +      return pComplexityCheck;
>  >
>  > We don't do camel-case.  But if you use apr_fnmatch(), you needn't check
>  > complexity anyway.
>  >  
>
>
> ------------------------------------------------------------------------
>
> Index: subversion/svndumpfilter/main.c
> ===================================================================
> --- subversion/svndumpfilter/main.c (revision 21736)
> +++ subversion/svndumpfilter/main.c (working copy)
> @@ -20,6 +20,7 @@
>  #include <stdlib.h>
>  
>  #include <apr_file_io.h>
> +#include <apr_fnmatch.h>
>  
>  #include "svn_private_config.h"
>  #include "svn_cmdline.h"
> @@ -97,7 +98,26 @@
>    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
>  }
>  
> +/* Wildcard matching function to compare a node-path with a
> + * specified set of wildcard patterns
> + */
> +static svn_boolean_t
> +ary_wildcard_match(apr_array_header_t *pfxlist, const char *path)
> +{
> +  int i;
> +  const char *pattern;
>  
> +  for (i = 0; i < pfxlist->nelts; i++)
> +    {
> +      pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
> +
> +      if (apr_fnmatch(pattern, path, 0) == APR_SUCCESS)
> +        return TRUE;
> +    }
> +
> +  return FALSE;
> +}
> +
>  /* Prefix matching function to compare node-path with set of prefixes. */
>  static svn_boolean_t
>  ary_prefix_match(apr_array_header_t *pfxlist, const char *path)
> @@ -143,6 +163,8 @@
>    svn_boolean_t was_dropped; /* Was this revision dropped? */
>  };
>  
> +typedef svn_boolean_t (*match_function_t)(apr_array_header_t*, const char *);
> +
>  struct parse_baton_t
>  {
>    /* Command-line options values. */
> @@ -152,6 +174,7 @@
>    svn_boolean_t do_renumber_revs;
>    svn_boolean_t preserve_revprops;
>    apr_array_header_t *prefixes;
> +  match_function_t match_function;
>  
>    /* Input and output streams. */
>    svn_stream_t *in_stream;
> @@ -460,7 +483,7 @@
>      copyfrom_path = svn_path_join("/", copyfrom_path, pool);
>  
>    /* Shame, shame, shame ... this is NXOR. */
> -  nb->do_skip = (ary_prefix_match(pb->prefixes, node_path)
> +  nb->do_skip = (pb->match_function(pb->prefixes, node_path)
>                   ? pb->do_exclude : (! pb->do_exclude));
>  
>    /* If we're skipping the node, take note of path, discarding the
> @@ -480,7 +503,7 @@
>  
>        /* Test if this node was copied from dropped source. */
>        if (copyfrom_path &&
> -          (ary_prefix_match(pb->prefixes, copyfrom_path)
> +          (pb->match_function(pb->prefixes, copyfrom_path)
>             ? pb->do_exclude : (! pb->do_exclude)))
>          {
>            /* This node was copied from dropped source.
> @@ -767,7 +790,8 @@
>      svndumpfilter__renumber_revs,
>      svndumpfilter__preserve_revprops,
>      svndumpfilter__quiet,
> -    svndumpfilter__version
> +    svndumpfilter__version,
> +    svndumpfilter__match_type
>    };
>  
>  /* Option codes and descriptions.
> @@ -792,9 +816,18 @@
>       N_("Renumber revisions left after filtering.") },
>      {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
>       N_("Don't filter revision properties.") },
> +    {"match-type", svndumpfilter__match_type, 1,
> +     N_("Default: 'prefix'. Defines how the pattern matching is done\n"
> +        "                            "
> +        "'prefix'   - match nodes with the fiven prefixes from dumpstream.\n"
> +        "                            "
> +        "'wildcard' - use '*' and '?' as wildcards (e.g. '*.pdf') .\n"
> +       ) },
>      {NULL}
>    };
>  
> +#define MATCH_TYPE_PREFIX      "prefix"
> +#define MATCH_TYPE_WILDCARD    "wildcard"
>  
>  /* Array of available subcommands.
>   * The entire list must be terminated with an entry of nulls.
> @@ -802,16 +835,18 @@
>  static const svn_opt_subcommand_desc_t cmd_table[] =
>    {
>      {"exclude", subcommand_exclude, {0},
> -     N_("Filter out nodes with given prefixes from dumpstream.\n"
> -        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
> +     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
> +        "usage: svndumpfilter exclude PATTERN...\n"),
>       {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
> -      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
> +      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
> +      svndumpfilter__match_type} },
>  
>      {"include", subcommand_include, {0},
> -     N_("Filter out nodes without given prefixes from dumpstream.\n"
> -        "usage: svndumpfilter include PATH_PREFIX...\n"),
> +     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
> +        "usage: svndumpfilter include PATTERN...\n"),
>       {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
> -      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
> +      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
> +      svndumpfilter__match_type} },
>  
>      {"help", subcommand_help, {"?", "h"},
>       N_("Describe the usage of this program or its subcommands.\n"
> @@ -833,6 +868,7 @@
>    svn_boolean_t help;                    /* --help or -?        */
>    svn_boolean_t renumber_revs;           /* --renumber-revs     */
>    svn_boolean_t preserve_revprops;       /* --preserve-revprops */
> +  const char *match_type;                /* --match-type        */
>    apr_array_header_t *prefixes;          /* mainargs.           */
>  };
>  
> @@ -864,6 +900,17 @@
>    baton->renumber_history = apr_hash_make(pool);
>    baton->last_live_revision = SVN_INVALID_REVNUM;
>  
> +  if (opt_state->match_type == NULL) {  /* just use the default */
> +    baton->match_function = ary_prefix_match;
> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
> +    baton->match_function = ary_prefix_match;
> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
> +    baton->match_function = ary_wildcard_match;
> +  } else {
> +    return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
> +                            _("Invalid pattern matching type selected"));
> +  }
> +
>    /* This is non-ideal: We should pass through the version of the
>     * input dumpstream.  However, our API currently doesn't allow that.
>     * Hardcoding version 2 is acceptable because:
> @@ -1084,7 +1131,6 @@
>    apr_array_header_t *received_opts;
>    int i;
>  
> -
>    /* Initialize the app. */
>    if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
>      return EXIT_FAILURE;
> @@ -1168,6 +1214,9 @@
>          case svndumpfilter__preserve_revprops:
>            opt_state.preserve_revprops = TRUE;
>            break;
> +        case svndumpfilter__match_type:
> +          opt_state.match_type = apr_pstrdup(pool, opt_arg);
> +          break;
>          default:
>            {
>              subcommand_help(NULL, NULL, pool);


signature.asc (260 bytes) Download Attachment

Re: [PATCH] - svndumpfilter - add wildcard matchinv

by Hyrum K. Wright-3 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hyrum K. Wright wrote:
> Andrei wrote:
>> Thank you for the feedback. Here is a new version using apr_fnmatch. Based on my tests, it seems to handle escapes ('\') ok and has '[' matching on top of the previous one. I did not see a case which would require me to handle escapes aditionally from apr_fnmatch .. did you
>>  have any specific functionality in mind that requires this?
>
> Ping...
>
> Has anybody looked at this most recent version of Andrei's patch?  If
> nothing happens in the next few days, I'll file an issue.

Filed as issue 2634.

-Hyrum

>>  On Mon, 2006-10-02 at 09:00 -0400, C. Michael Pilato wrote:
>>  > Andrei wrote:
>>  > > This patch adds wildcard matching to the regular "prefix" matching for
>>  > > svndumpfilter. It is my first patch and feedback would be appreciated.
>>  > >
>>  > > There is one place where I am unsure about: the wildcard matching uses
>>  > > "NXOR", so I am not sure if it was designed to only work with path
>>  > > prefixes (it seems to go well if I choose "*.txt" as a pattern for
>>  > > example).
>>  >
>>  > Andrei, thanks for the generally good patch.  I have a few comments
>>  > which you can find inline below.
>>  >
>>  > > Index: subversion/svndumpfilter/main.c
>>  > > ===================================================================
>>  > > --- subversion/svndumpfilter/main.c (revision 21719)
>>  > > +++ subversion/svndumpfilter/main.c (working copy)
>>  > > @@ -97,7 +97,118 @@
>>  > >    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
>>  > >  }
>>  > >  
>>  > > +/* Match a string against a wildcard pattern.
>>  > > + * @param str - [in] the string to check
>>  > > + * @param pat - [in] the pattern. '*' and '?' wildcards are accepted,
>>  > > + *              with '\\' as an escape character
>>  > > + * @return TRUE if pattern match is detected
>>  > > + */
>>  > > +static svn_boolean_t
>>  > > +wildcard_match(const char *str, const char *pat)
>>  > > +{
>>  >
>>  > ...
>>  >
>>  > Could not this whole function be written using apr_fnmatch(), the same
>>  > matching interface we use elsewhere in Subversion?  You could do a
>>  > quicky pass to handle the backslash escape character (a UI quirk I'm not
>>  > totally sold on), and then hand the result off to apr_fnmatch().  Sure
>>  > would simplify the code, and keep this behavior consistent with that of
>>  > other areas of Subversion.
>>  >
>>  > > @@ -864,6 +991,23 @@
>>  > >    baton->renumber_history = apr_hash_make(pool);
>>  > >    baton->last_live_revision = SVN_INVALID_REVNUM;
>>  > >  
>>  > > +  if (opt_state->match_type == NULL) {  /* just use the default */
>>  > > +    baton->match_function = ary_prefix_match;
>>  > > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
>>  > > +    baton->match_function = ary_prefix_match;
>>  > > +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
>>  > > +    svn_error_t *pComplexityCheck;
>>  > > +    
>>  > > +    pComplexityCheck = check_wildcard_complexity(baton->prefixes);
>>  > > +    if (pComplexityCheck)
>>  > > +      return pComplexityCheck;
>>  >
>>  > We don't do camel-case.  But if you use apr_fnmatch(), you needn't check
>>  > complexity anyway.
>>  >  
>>
>>
>> ------------------------------------------------------------------------
>>
>> Index: subversion/svndumpfilter/main.c
>> ===================================================================
>> --- subversion/svndumpfilter/main.c (revision 21736)
>> +++ subversion/svndumpfilter/main.c (working copy)
>> @@ -20,6 +20,7 @@
>>  #include <stdlib.h>
>>  
>>  #include <apr_file_io.h>
>> +#include <apr_fnmatch.h>
>>  
>>  #include "svn_private_config.h"
>>  #include "svn_cmdline.h"
>> @@ -97,7 +98,26 @@
>>    svn_stringbuf_appendbytes(*strbuf, "\n", 1);
>>  }
>>  
>> +/* Wildcard matching function to compare a node-path with a
>> + * specified set of wildcard patterns
>> + */
>> +static svn_boolean_t
>> +ary_wildcard_match(apr_array_header_t *pfxlist, const char *path)
>> +{
>> +  int i;
>> +  const char *pattern;
>>  
>> +  for (i = 0; i < pfxlist->nelts; i++)
>> +    {
>> +      pattern = APR_ARRAY_IDX(pfxlist, i, const char *);
>> +
>> +      if (apr_fnmatch(pattern, path, 0) == APR_SUCCESS)
>> +        return TRUE;
>> +    }
>> +
>> +  return FALSE;
>> +}
>> +
>>  /* Prefix matching function to compare node-path with set of prefixes. */
>>  static svn_boolean_t
>>  ary_prefix_match(apr_array_header_t *pfxlist, const char *path)
>> @@ -143,6 +163,8 @@
>>    svn_boolean_t was_dropped; /* Was this revision dropped? */
>>  };
>>  
>> +typedef svn_boolean_t (*match_function_t)(apr_array_header_t*, const char *);
>> +
>>  struct parse_baton_t
>>  {
>>    /* Command-line options values. */
>> @@ -152,6 +174,7 @@
>>    svn_boolean_t do_renumber_revs;
>>    svn_boolean_t preserve_revprops;
>>    apr_array_header_t *prefixes;
>> +  match_function_t match_function;
>>  
>>    /* Input and output streams. */
>>    svn_stream_t *in_stream;
>> @@ -460,7 +483,7 @@
>>      copyfrom_path = svn_path_join("/", copyfrom_path, pool);
>>  
>>    /* Shame, shame, shame ... this is NXOR. */
>> -  nb->do_skip = (ary_prefix_match(pb->prefixes, node_path)
>> +  nb->do_skip = (pb->match_function(pb->prefixes, node_path)
>>                   ? pb->do_exclude : (! pb->do_exclude));
>>  
>>    /* If we're skipping the node, take note of path, discarding the
>> @@ -480,7 +503,7 @@
>>  
>>        /* Test if this node was copied from dropped source. */
>>        if (copyfrom_path &&
>> -          (ary_prefix_match(pb->prefixes, copyfrom_path)
>> +          (pb->match_function(pb->prefixes, copyfrom_path)
>>             ? pb->do_exclude : (! pb->do_exclude)))
>>          {
>>            /* This node was copied from dropped source.
>> @@ -767,7 +790,8 @@
>>      svndumpfilter__renumber_revs,
>>      svndumpfilter__preserve_revprops,
>>      svndumpfilter__quiet,
>> -    svndumpfilter__version
>> +    svndumpfilter__version,
>> +    svndumpfilter__match_type
>>    };
>>  
>>  /* Option codes and descriptions.
>> @@ -792,9 +816,18 @@
>>       N_("Renumber revisions left after filtering.") },
>>      {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
>>       N_("Don't filter revision properties.") },
>> +    {"match-type", svndumpfilter__match_type, 1,
>> +     N_("Default: 'prefix'. Defines how the pattern matching is done\n"
>> +        "                            "
>> +        "'prefix'   - match nodes with the fiven prefixes from dumpstream.\n"
>> +        "                            "
>> +        "'wildcard' - use '*' and '?' as wildcards (e.g. '*.pdf') .\n"
>> +       ) },
>>      {NULL}
>>    };
>>  
>> +#define MATCH_TYPE_PREFIX      "prefix"
>> +#define MATCH_TYPE_WILDCARD    "wildcard"
>>  
>>  /* Array of available subcommands.
>>   * The entire list must be terminated with an entry of nulls.
>> @@ -802,16 +835,18 @@
>>  static const svn_opt_subcommand_desc_t cmd_table[] =
>>    {
>>      {"exclude", subcommand_exclude, {0},
>> -     N_("Filter out nodes with given prefixes from dumpstream.\n"
>> -        "usage: svndumpfilter exclude PATH_PREFIX...\n"),
>> +     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
>> +        "usage: svndumpfilter exclude PATTERN...\n"),
>>       {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
>> -      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
>> +      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
>> +      svndumpfilter__match_type} },
>>  
>>      {"include", subcommand_include, {0},
>> -     N_("Filter out nodes without given prefixes from dumpstream.\n"
>> -        "usage: svndumpfilter include PATH_PREFIX...\n"),
>> +     N_("Filter out nodes matching a given pattern from the dumpstream.\n"
>> +        "usage: svndumpfilter include PATTERN...\n"),
>>       {svndumpfilter__drop_empty_revs, svndumpfilter__renumber_revs,
>> -      svndumpfilter__preserve_revprops, svndumpfilter__quiet} },
>> +      svndumpfilter__preserve_revprops, svndumpfilter__quiet,
>> +      svndumpfilter__match_type} },
>>  
>>      {"help", subcommand_help, {"?", "h"},
>>       N_("Describe the usage of this program or its subcommands.\n"
>> @@ -833,6 +868,7 @@
>>    svn_boolean_t help;                    /* --help or -?        */
>>    svn_boolean_t renumber_revs;           /* --renumber-revs     */
>>    svn_boolean_t preserve_revprops;       /* --preserve-revprops */
>> +  const char *match_type;                /* --match-type        */
>>    apr_array_header_t *prefixes;          /* mainargs.           */
>>  };
>>  
>> @@ -864,6 +900,17 @@
>>    baton->renumber_history = apr_hash_make(pool);
>>    baton->last_live_revision = SVN_INVALID_REVNUM;
>>  
>> +  if (opt_state->match_type == NULL) {  /* just use the default */
>> +    baton->match_function = ary_prefix_match;
>> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_PREFIX) == 0) {
>> +    baton->match_function = ary_prefix_match;
>> +  } else if (strcmp(opt_state->match_type, MATCH_TYPE_WILDCARD) == 0) {
>> +    baton->match_function = ary_wildcard_match;
>> +  } else {
>> +    return svn_error_create(SVN_ERR_CL_ARG_PARSING_ERROR, NULL,
>> +                            _("Invalid pattern matching type selected"));
>> +  }
>> +
>>    /* This is non-ideal: We should pass through the version of the
>>     * input dumpstream.  However, our API currently doesn't allow that.
>>     * Hardcoding version 2 is acceptable because:
>> @@ -1084,7 +1131,6 @@
>>    apr_array_header_t *received_opts;
>>    int i;
>>  
>> -
>>    /* Initialize the app. */
>>    if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
>>      return EXIT_FAILURE;
>> @@ -1168,6 +1214,9 @@
>>          case svndumpfilter__preserve_revprops:
>>            opt_state.preserve_revprops = TRUE;
>>            break;
>> +        case svndumpfilter__match_type:
>> +          opt_state.match_type = apr_pstrdup(pool, opt_arg);
>> +          break;
>>          default:
>>            {
>>              subcommand_help(NULL, NULL, pool);
>



signature.asc (260 bytes) Download Attachment