The jabber_id_new() function in libpurple/protocols/jabber/jutil.c is
pretty expensive. It creates a JabberID struct given the string
version of a Jabber username (i.e. it splits
"
mark.doliner@.../Home" into "mark.doliner" "gmail.com" and
"Home"). It also lowercases the node and domain, does utf8
normalization, and does stringprep validation to ensure the JID is
comprised only of characters allowed by the XMPP RFC.
We've optimized this function at Meebo. In our testing we found that
the vast majority of JIDs are made of these characters: a-z A-Z 0-9 @
/ { | } ~ . [ \ ] ^ _ ; And so we do a quick first pass over the
given string. If the string contains only these characters than we
skip g_utf8_normalize() and skip stringprep and only lowercase the
node and domain. Otherwise we do everything.
How do people feel about me checking this change into the jabber code
in libpurple? Meebo probably has a larger percentage of
English-speaking users than Pidgin, so maybe our results are unfairly
biased. Does anyone know how common non-ASCII JIDs are?
I suspect that even for the case where the jid contains non-ASCII
characters our optimized version won't be very much slower, and might
even be faster (it makes one pass over the string to determine the
location of @ and / instead of calling g_utf8_strchr() twice (but
that's easy to fix on its own)).
In other words: How does everyone feel about the attached patch?
-Mark
[optimize_jabber_id_new.diff]
#
# old_revision [22e14265a47cdddb4c9eb1ee0d2ce2989f9fce61]
#
# patch "libpurple/protocols/jabber/jutil.c"
# from [912799b76983833354a4420afc830bbb9b370f21]
# to [31de021d4923c45a328aa1f6ae688d9b0a44e4d6]
#
============================================================
--- libpurple/protocols/jabber/jutil.c 912799b76983833354a4420afc830bbb9b370f21
+++ libpurple/protocols/jabber/jutil.c 31de021d4923c45a328aa1f6ae688d9b0a44e4d6
@@ -103,20 +103,86 @@ jabber_id_new(const char *str)
JabberID*
jabber_id_new(const char *str)
{
- char *at;
- char *slash;
+ const char *at = NULL;
+ const char *slash = NULL;
+ const char *c;
+ gboolean needs_validation = FALSE;
char *node = NULL;
char *domain;
JabberID *jid;
- if(!str || !g_utf8_validate(str, -1, NULL))
+ if (!str)
return NULL;
+ for (c = str; *c != '\0'; c++) {
+ switch (*c) {
+ case '@':
+ if (!at)
+ at = c;
+ break;
+ case '/':
+ if (!at) {
+ /*
+ * If at is NULL then c is still pointing to the node
+ * name, and node names can not contain forward slashes
+ */
+ return NULL;
+ }
+ if (!slash)
+ slash = c;
+ break;
+ default:
+ /* make sure this character falls within the allowed ascii characters
+ * specified in the nodeprep RFC. If it's outside of this range,
+ * the character is probably unicode and will be validated using the
+ * more expensive UTF-8 compliant nodeprep functions
+ */
+ if ( !( ('a' <= *c && *c <= '~') || /*a-z{|}~*/
+ ('.' <= *c && *c <= '9') || /*./0123456789*/
+ ('A' <= *c && *c <= '_') || /*A-Z[\]^_*/
+ (*c == ';') )) /*;*/
+ {
+ needs_validation = TRUE;
+ }
+ break;
+ }
+ }
+
jid = g_new0(JabberID, 1);
- at = g_utf8_strchr(str, -1, '@');
- slash = g_utf8_strchr(str, -1, '/');
+ if (!needs_validation) {
+ /* No UTF-8 characters in the jid--just lowercase and return */
+ if (at) {
+ jid->node = g_utf8_strdown(str, at-str);
+ if(slash) {
+ jid->domain = g_utf8_strdown(at + 1, slash - (at + 1));
+ jid->resource = g_strdup(slash + 1);
+ } else {
+ jid->domain = g_utf8_strdown(at + 1, -1);
+ }
+ } else {
+ if(slash) {
+ jid->domain = g_utf8_strdown(str, slash - str);
+ jid->resource = g_strdup(slash + 1);
+ } else {
+ jid->domain = g_utf8_strdown(str, -1);
+ }
+ }
+ return jid;
+ }
+ /*
+ * If we get here, there are some non-ASCII chars in the string, so
+ * we'll need to validate it, normalize, and finally do a full jabber
+ * nodeprep on the jid.
+ */
+
+ if (!g_utf8_validate(str, -1, NULL)) {
+ jabber_id_free(jid);
+ return NULL;
+ }
+
+ /* normalization */
if(at) {
node = g_utf8_normalize(str, at-str, G_NORMALIZE_NFKC);
if(slash) {
@@ -144,6 +210,7 @@ jabber_id_new(const char *str)
g_free(domain);
}
+ /* and finally the jabber nodeprep */
if(!jabber_nodeprep_validate(jid->node) ||
!jabber_nameprep_validate(jid->domain) ||
!jabber_resourceprep_validate(jid->resource)) {
_______________________________________________
Devel mailing list
Devel@...
http://pidgin.im/cgi-bin/mailman/listinfo/devel