[ruby-core:26492] HashWithIndifferentAccess to core

View: New views
20 Messages — Rating Filter:   Alert me  
< Prev | 1 - 2 | Next >

[ruby-core:26492] HashWithIndifferentAccess to core

by Urabe Shyouhei-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hello,

I got a pull request for a HashWithIndifferentAccess implementation to merge
into core.  I think it's almost OK to make it so (except few minor bugs that can
easily be handled).  How about it?  I'm also attaching a patch file for people who
are not used to git (mainly nobu).

-------- Original Message --------
Subject: [GitHub] methodmissing sent you a message
Date: Mon, 2 Nov 2009 12:40:24 -0800
From: GitHub <noreply@...>
To: shyouhei@...



methodmissing wants you to pull from methodmissing/ruby at hwia

Body: Hi,

I've played extensively with a HashWithIndifferentAccess (hwia / Mash)
implementation as an extension to both ruby 1.9.x and ruby 1.8.7 (
http://github.com/methodmissing/hwia and
http://blog.methodmissing.com/2009/08/29/alternative-hashwithindifferentaccess/)
before and recently took the leap to patch against 1.9.2 as a Hash subclass.

I've tried to conform to recommended code conventions, tried to not inject this
into the critical path of common Hash use cases and abstracted to macros where
possible.

Wycats suggested to send the pull request - naming conventions at the moment is
dangling - StrHash, IndifferentHash etc. I'll make myself available with
whatever time is required to have this conform to current ruby-core
standards.I'm also open to moving the recursive conversion to an extension and
implement a thin API as per identhash on Hash instead.

It passes against today's (Nov 2) trunk with "make check" and related.Feedback
much appreciated if and when there's a moment.

- Lourens

View repository: http://github.com/methodmissing/ruby/tree/hwia

-------- patch --------

% git diff --patch-with-stat origin/trunk..HEAD
 hash.c                   |  185 +++++++++++++++++++++++++++++++--
 include/ruby/intern.h    |    1 +
 string.c                 |   12 ++
 test/ruby/test_hash.rb   |  252 ++++++++++++++++++++++++++++++++++++++++++++++
 test/ruby/test_symbol.rb |    7 ++
 5 files changed, 445 insertions(+), 12 deletions(-)

diff --git a/hash.c b/hash.c
index ef8b9a8..332f89d 100644
--- a/hash.c
+++ b/hash.c
@@ -23,6 +23,27 @@ static VALUE rb_hash_s_try_convert(VALUE, VALUE);
 
 #define HASH_DELETED  FL_USER1
 #define HASH_PROC_DEFAULT FL_USER2
+#define STR_HASH FL_USER3
+
+static VALUE rb_hash_rehash(VALUE hash);
+static VALUE rb_hash_update(VALUE hash1, VALUE hash2);
+static VALUE rb_hash_strhash(VALUE hash);
+static void rb_strhash_convert(VALUE *value);
+
+#define NEW_STR_HASH(hash,other) do {\
+   FL_SET(RHASH(hash), STR_HASH); \
+   RHASH(hash)->ntbl->type = &strhash; \
+   RHASH(hash)->ifnone = RHASH(other)->ifnone; \
+   return rb_hash_rehash(rb_convert_type(hash, T_HASH, "StrHash", "to_hash")); \
+} while (0)
+
+#define STR_HASH_P(hash) (FL_TEST(RHASH(hash), STR_HASH) || RBASIC(hash)->klass == rb_cStrHash)
+
+#define CONVERT_STR_HASH(hash,value) do {\
+   if STR_HASH_P(hash){ \
+   rb_strhash_convert(&value); \
+   } \
+} while (0)
 
 VALUE
 rb_hash_freeze(VALUE hash)
@@ -31,6 +52,7 @@ rb_hash_freeze(VALUE hash)
 }
 
 VALUE rb_cHash;
+VALUE rb_cStrHash;
 
 static VALUE envtbl;
 static ID id_hash, id_yield, id_default;
@@ -54,6 +76,37 @@ rb_any_cmp(VALUE a, VALUE b)
     return !rb_eql(a, b);
 }
 
+int
+strhash_cmp(VALUE *s1,VALUE *s2)
+{
+    int s1_hash = SYMBOL_P(*s1) ? rb_sym_hash(*s1) : rb_str_hash(*s1);
+    int s2_hash = SYMBOL_P(*s2) ? rb_sym_hash(*s2) : rb_str_hash(*s2);
+    if (s1_hash == s2_hash) return 0;
+    if (s1_hash > s2_hash) return 1;
+    return -1;
+}
+
+static int
+rb_strhash_cmp(VALUE a, VALUE b)
+{
+    if (a == b) return 0;
+    if (FIXNUM_P(a) && FIXNUM_P(b)) {
+ return a != b;
+    }
+    if (a == Qundef || b == Qundef) return -1;
+    if (SYMBOL_P(a) && SYMBOL_P(b)) {
+ return a != b;
+    }
+    if ((TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString && SYMBOL_P(b)) || (TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString && SYMBOL_P(a))) {
+ return strhash_cmp(&a, &b);
+    }
+    if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString &&
+    TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) {
+ return rb_str_cmp(a, b);
+    }
+    return !rb_eql(a, b);
+}
+
 VALUE
 rb_hash(VALUE obj)
 {
@@ -99,11 +152,44 @@ rb_any_hash(VALUE a)
     return (st_index_t)RSHIFT(hnum, 1);
 }
 
+static st_index_t
+rb_strhash_hash(VALUE a)
+{
+    VALUE hval;
+    st_index_t hnum;
+
+    switch (TYPE(a)) {
+      case T_FIXNUM:
+      case T_NIL:
+      case T_FALSE:
+      case T_TRUE:
+ hnum = rb_hash_end(rb_hash_start((unsigned int)a));
+ break;
+      case T_SYMBOL:
+ hnum = rb_sym_hash(a);  
+ break;
+      case T_STRING:
+ hnum = rb_str_hash(a);
+ break;
+
+      default:
+        hval = rb_hash(a);
+ hnum = FIX2LONG(hval);
+    }
+    hnum <<= 1;
+    return (st_index_t)RSHIFT(hnum, 1);
+}
+
 static const struct st_hash_type objhash = {
     rb_any_cmp,
     rb_any_hash,
 };
 
+static const struct st_hash_type strhash = {
+    rb_strhash_cmp,
+    rb_strhash_hash,
+};
+
 static const struct st_hash_type identhash = {
     st_numcmp,
     st_numhash,
@@ -219,7 +305,7 @@ hash_alloc(VALUE klass)
     OBJSETUP(hash, klass, T_HASH);
 
     hash->ifnone = Qnil;
-
+    if (klass == rb_cStrHash) FL_SET(hash, STR_HASH);
     return (VALUE)hash;
 }
 
@@ -230,6 +316,12 @@ rb_hash_new(void)
 }
 
 VALUE
+rb_strhash_new(void)
+{
+    return hash_alloc(rb_cStrHash);
+}
+
+VALUE
 rb_hash_dup(VALUE hash)
 {
     NEWOBJ(ret, struct RHash);
@@ -240,8 +332,33 @@ rb_hash_dup(VALUE hash)
     if (FL_TEST(hash, HASH_PROC_DEFAULT)) {
         FL_SET(ret, HASH_PROC_DEFAULT);
     }
-    ret->ifnone = RHASH(hash)->ifnone;
-    return (VALUE)ret;
+    if STR_HASH_P(hash){
+       NEW_STR_HASH(ret,hash);
+    }else{
+      ret->ifnone = RHASH(hash)->ifnone;
+      return (VALUE)ret;
+    }
+}
+
+static void
+rb_strhash_convert(VALUE *val)
+{
+    int i;
+    VALUE values;
+
+    switch (TYPE(*val)) {
+      case T_HASH:
+           *val = rb_hash_strhash(*val);
+           break;  
+      case T_ARRAY:
+            values = rb_ary_new2(RARRAY_LEN(*val));
+            for (i = 0; i < RARRAY_LEN(*val); i++) {
+               VALUE el = RARRAY_PTR(*val)[i];
+               rb_ary_push(values, (TYPE(el) == T_HASH) ? rb_hash_strhash(el) : el);
+            }
+           *val = values;
+           break;
+    }
 }
 
 static void
@@ -256,7 +373,7 @@ struct st_table *
 rb_hash_tbl(VALUE hash)
 {
     if (!RHASH(hash)->ntbl) {
-        RHASH(hash)->ntbl = st_init_table(&objhash);
+ RHASH(hash)->ntbl = STR_HASH_P(hash) ? st_init_table(&strhash) : st_init_table(&objhash);
     }
     return RHASH(hash)->ntbl;
 }
@@ -318,7 +435,9 @@ static VALUE
 rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
 {
     VALUE ifnone;
-
+    VALUE constructor;
+    rb_scan_args(argc, argv, "01", &constructor);
+    if(TYPE(constructor) == T_HASH) return rb_hash_update(hash,constructor);
     rb_hash_modify(hash);
     if (rb_block_given_p()) {
  if (argc > 0) {
@@ -333,7 +452,6 @@ rb_hash_initialize(int argc, VALUE *argv, VALUE hash)
  rb_scan_args(argc, argv, "01", &ifnone);
  RHASH(hash)->ifnone = ifnone;
     }
-
     return hash;
 }
 
@@ -366,8 +484,11 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
     hash = hash_alloc(klass);
     if (RHASH(tmp)->ntbl) {
  RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl);
+ if (FL_TEST(RHASH(tmp), STR_HASH) || klass == rb_cStrHash){
+  NEW_STR_HASH(hash,tmp);
+ }else
+ return hash;
     }
-    return hash;
  }
 
  tmp = rb_check_array_type(argv[0]);
@@ -426,12 +547,21 @@ rb_hash_s_try_convert(VALUE dummy, VALUE hash)
     return rb_check_convert_type(hash, T_HASH, "Hash", "to_hash");
 }
 
+static VALUE
+rb_strhash_s_try_convert(VALUE dummy, VALUE hash)
+{
+    return rb_check_convert_type(hash, T_HASH, "StrHash", "to_hash");
+}
+
 static int
 rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg)
 {
     st_table *tbl = (st_table *)arg;
 
-    if (key != Qundef) st_insert(tbl, key, value);
+    if (key != Qundef){
+      if (tbl->type == &strhash) rb_strhash_convert(&value);
+      st_insert(tbl, key, value);
+    }
     return ST_CONTINUE;
 }
 
@@ -459,7 +589,6 @@ static VALUE
 rb_hash_rehash(VALUE hash)
 {
     st_table *tbl;
-
     if (RHASH(hash)->iter_lev > 0) {
  rb_raise(rb_eRuntimeError, "rehash during iteration");
     }
@@ -470,7 +599,6 @@ rb_hash_rehash(VALUE hash)
     rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl);
     st_free_table(RHASH(hash)->ntbl);
     RHASH(hash)->ntbl = tbl;
-
     return hash;
 }
 
@@ -974,7 +1102,7 @@ rb_hash_select(VALUE hash)
     VALUE result;
 
     RETURN_ENUMERATOR(hash, 0, 0);
-    result = rb_hash_new();
+    result = STR_HASH_P(hash) ? rb_strhash_new() : rb_hash_new();
     rb_hash_foreach(hash, select_i, result);
     return result;
 }
@@ -1046,6 +1174,13 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val)
     return val;
 }
 
+VALUE
+rb_strhash_aset(VALUE hash, VALUE key, VALUE val)
+{
+    CONVERT_STR_HASH(hash,val);
+    rb_hash_aset(hash, key, val);
+}
+
 static int
 replace_i(VALUE key, VALUE val, VALUE hash)
 {
@@ -1617,7 +1752,7 @@ rb_hash_invert_i(VALUE key, VALUE value, VALUE hash)
 static VALUE
 rb_hash_invert(VALUE hash)
 {
-    VALUE h = rb_hash_new();
+ VALUE h = STR_HASH_P(hash) ? rb_strhash_new() : rb_hash_new();
 
     rb_hash_foreach(hash, rb_hash_invert_i, h);
     return h;
@@ -1627,6 +1762,7 @@ static int
 rb_hash_update_i(VALUE key, VALUE value, VALUE hash)
 {
     if (key == Qundef) return ST_CONTINUE;
+    CONVERT_STR_HASH(hash,value);
     st_insert(RHASH(hash)->ntbl, key, value);
     return ST_CONTINUE;
 }
@@ -1637,6 +1773,7 @@ rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash)
     if (key == Qundef) return ST_CONTINUE;
     if (rb_hash_has_key(hash, key)) {
  value = rb_yield_values(3, key, rb_hash_aref(hash, key), value);
+    CONVERT_STR_HASH(hash,value);
     }
     st_insert(RHASH(hash)->ntbl, key, value);
     return ST_CONTINUE;
@@ -1852,6 +1989,24 @@ rb_hash_compare_by_id_p(VALUE hash)
     return Qfalse;
 }
 
+static VALUE
+rb_hash_strhash(VALUE hash)
+{
+    if STR_HASH_P(hash)
+    return hash;
+    VALUE args[1];
+    args[0] = hash;
+    return rb_hash_s_create(1, (VALUE *)args, rb_cStrHash);
+}
+
+static VALUE
+rb_strhash_to_hash(VALUE hash)
+{
+    VALUE args[1];
+    args[0] = hash;
+    return rb_hash_s_create(1, (VALUE *)args, rb_cHash);
+}
+
 static int path_tainted = -1;
 
 static char **origenviron;
@@ -2649,6 +2804,8 @@ Init_Hash(void)
     id_default = rb_intern("default");
 
     rb_cHash = rb_define_class("Hash", rb_cObject);
+    rb_cStrHash = rb_define_class("StrHash", rb_cHash);
+    rb_define_singleton_method(rb_cStrHash, "try_convert", rb_strhash_s_try_convert, 1);
 
     rb_include_module(rb_cHash, rb_mEnumerable);
 
@@ -2715,6 +2872,10 @@ Init_Hash(void)
 
     rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0);
     rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0);
+    rb_define_method(rb_cHash, "strhash", rb_hash_strhash, 0);
+    rb_define_method(rb_cStrHash, "to_hash", rb_strhash_to_hash, 0);
+    rb_define_method(rb_cStrHash, "[]=", rb_strhash_aset, 2);
+    rb_define_method(rb_cStrHash, "store", rb_strhash_aset, 2);
 
     origenviron = environ;
     envtbl = rb_obj_alloc(rb_cObject);
diff --git a/include/ruby/intern.h b/include/ruby/intern.h
index fd37eca..c97caef 100644
--- a/include/ruby/intern.h
+++ b/include/ruby/intern.h
@@ -660,6 +660,7 @@ void rb_str_associate(VALUE, VALUE);
 VALUE rb_str_associated(VALUE);
 void rb_str_setter(VALUE, ID, VALUE*);
 VALUE rb_str_intern(VALUE);
+st_index_t rb_sym_hash(VALUE);
 VALUE rb_sym_to_s(VALUE);
 VALUE rb_str_length(VALUE);
 long rb_str_offset(VALUE, long);
diff --git a/string.c b/string.c
index e7f7e86..cc4ff34 100644
--- a/string.c
+++ b/string.c
@@ -7025,6 +7025,17 @@ sym_equal(VALUE sym1, VALUE sym2)
     return Qfalse;
 }
 
+st_index_t
+rb_sym_hash(VALUE sym){
+   ID id = SYM2ID(sym);
+   return rb_str_hash(rb_id2str(id));  
+}
+
+static VALUE
+rb_sym_hash_m(VALUE sym){
+    st_index_t hval = rb_sym_hash(sym);
+    return INT2FIX(hval);
+}
 
 static int
 sym_printable(const char *s, const char *send, rb_encoding *enc)
@@ -7508,6 +7519,7 @@ Init_String(void)
     rb_define_method(rb_cSymbol, "===", sym_equal, 1);
     rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
     rb_define_method(rb_cSymbol, "to_s", rb_sym_to_s, 0);
+    rb_define_method(rb_cSymbol, "hash", rb_sym_hash_m, 0);
     rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
     rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
     rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index c860b25..b3145e9 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -872,3 +872,255 @@ class TestHash < Test::Unit::TestCase
     assert_equal({x=>1}.hash, {x=>1}.hash)
   end
 end
+
+class HashToStrHash < Test::Unit::TestCase
+  def test_strhash
+    hash = { 'a' => 1, 'b' => 2 }
+    assert_instance_of StrHash, hash.strhash
+    assert_equal %w(a b), hash.keys
+    assert_equal [1,2], hash.values
+  end  
+end
+
+class TestStrHash < Test::Unit::TestCase
+  def setup
+    @strings = { 'a' => 1, 'b' => 2 }.strhash
+    @symbols = { :a  => 1, :b  => 2 }.strhash
+    @mixed   = { :a  => 1, 'b' => 2 }.strhash
+    @fixnums = {  0  => 1,  1  => 2 }.strhash
+  end
+
+  def test_inherits_hash
+    assert_equal Hash, StrHash.superclass
+  end
+
+  def test_strhash
+    assert_equal @strings.object_id, @strings.strhash.object_id
+    assert_instance_of StrHash, { 'a' => 1, 'b' => 2 }.strhash
+  end
+  
+  def test_initialize
+    strhash = StrHash.new({ 'a' => 1, 'b' => 2 })
+    assert_equal 1, strhash[:a]
+    strhash = StrHash.new
+    strhash[:a] = 'a'
+    assert_equal 'a', strhash[:a]
+  end
+
+  def test_set
+    array = [{ 'a' => 1, 'b' => 2 }, [:a,:b,:c]]
+    @strings[:array] = array
+    assert_instance_of StrHash, @strings[:array].shift
+    assert_instance_of Array, @strings[:array] = array    
+    @strings[:hash] = { 'a' => 1, 'b' => 2 }
+    assert_instance_of StrHash, @strings[:hash]
+  end
+  
+  def test_dup
+    assert_equal @strings, @strings.dup
+    assert_equal @mixed, @mixed.dup
+    assert_not_equal @mixed.object_id, @mixed.dup.object_id
+  end
+
+  def test_keys
+    assert_equal ["a", "b"], @strings.keys
+    assert_equal [:a, :b], @symbols.keys
+    assert_equal [:a, "b"], @mixed.keys
+    assert_equal [0, 1], @fixnums.keys            
+  end
+  
+  def test_values
+    assert_equal [1, 2], @strings.values
+    assert_equal [1, 2], @symbols.values
+    assert_equal [1, 2], @mixed.values
+    assert_equal [1, 2], @fixnums.values        
+  end  
+
+  def test_fetch
+    assert_equal 1, @strings.fetch('a')
+    assert_equal 1, @strings.fetch(:a.to_s)
+    assert_equal 1, @strings.fetch(:a)
+  end
+
+  def test_key?
+    assert @strings.key?(:a)
+    assert @strings.include?('a')
+    assert @mixed.has_key?('b')
+  end
+  
+  def test_delete
+    @strings.delete('a')
+    assert !@strings.key?(:a)
+  end
+
+  def test_assorted
+    hashes = { :@strings => @strings, :@symbols => @symbols, :@mixed => @mixed }
+    method_map = { :'[]' => 1, :fetch => 1, :values_at => [1],
+      :has_key? => true, :include? => true, :key? => true,
+      :member? => true }
+
+    hashes.each do |name, hash|
+      method_map.sort_by { |m| m.to_s }.each do |meth, expected|
+        assert_equal(expected, hash.__send__(meth, 'a'),
+                     "Calling #{name}.#{meth} 'a'")
+        assert_equal(expected, hash.__send__(meth, :a),
+                     "Calling #{name}.#{meth} :a")
+      end
+    end
+
+    assert_equal [1, 2], @strings.values_at('a', 'b')
+    assert_equal [1, 2], @strings.values_at(:a, :b)
+    assert_equal [1, 2], @symbols.values_at('a', 'b')
+    assert_equal [1, 2], @symbols.values_at(:a, :b)
+    assert_equal [1, 2], @mixed.values_at('a', 'b')
+    assert_equal [1, 2], @mixed.values_at(:a, :b)
+  end
+  
+  def test_reading
+    hash = StrHash.new
+    hash["a"] = 1
+    hash["b"] = true
+    hash["c"] = false
+    hash["d"] = nil
+
+    assert_equal 1, hash[:a]
+    assert_equal true, hash[:b]
+    assert_equal false, hash[:c]
+    assert_equal nil, hash[:d]
+    assert_equal nil, hash[:e]
+  end  
+
+  def test_reading_with_nonnil_default
+    hash = StrHash.new(1)
+    hash["a"] = 1
+    hash["b"] = true
+    hash["c"] = false
+    hash["d"] = nil
+
+    assert_equal 1, hash[:a]
+    assert_equal true, hash[:b]
+    assert_equal false, hash[:c]
+    assert_equal nil, hash[:d]
+    assert_equal 1, hash[:e]
+  end
+  
+  def test_writing
+    hash = StrHash.new
+    hash[:a] = 1
+    hash['b'] = 2
+    hash[3] = 3
+
+    assert_equal hash['a'], 1
+    assert_equal hash['b'], 2
+    assert_equal hash[:a], 1
+    assert_equal hash[:b], 2
+    assert_equal hash[3], 3
+  end  
+
+  def test_update
+    hash = StrHash.new
+    hash[:a] = 'a'
+    hash['b'] = 'b'
+
+    updated_with_strings = hash.update(@strings)
+    updated_with_symbols = hash.update(@symbols)
+    updated_with_mixed = hash.update(@mixed)
+
+    assert_equal updated_with_strings[:a], 1
+    assert_equal updated_with_strings['a'], 1
+    assert_equal updated_with_strings['b'], 2
+
+    assert_equal updated_with_symbols[:a], 1
+    assert_equal updated_with_symbols['b'], 2
+    assert_equal updated_with_symbols[:b], 2
+
+    assert_equal updated_with_mixed[:a], 1
+    assert_equal updated_with_mixed['b'], 2
+
+    assert [updated_with_strings, updated_with_symbols, updated_with_mixed].all? { |h| h.keys.size == 2 }
+  end  
+
+  def test_merging
+    hash = StrHash.new
+    hash[:a] = 'failure'
+    hash['b'] = 'failure'
+
+    other = { 'a' => 1, :b => 2 }
+
+    merged = hash.merge(other)
+
+    assert_equal StrHash, merged.class
+    assert_equal 1, merged[:a]
+    assert_equal 2, merged['b']
+
+    hash.update(other)
+
+    assert_equal 1, hash[:a]
+    assert_equal 2, hash['b']
+  end  
+
+  def test_deleting
+    get_hash = proc{ StrHash[ :a => 'foo' ] }
+    hash = get_hash.call
+    assert_equal hash.delete(:a), 'foo'
+    assert_equal hash.delete(:a), nil
+    hash = get_hash.call
+    assert_equal hash.delete('a'), 'foo'
+    assert_equal hash.delete('a'), nil
+  end  
+
+  def test_to_hash
+    assert_instance_of Hash, @strings.to_hash
+    assert_equal %w(a b), @strings.to_hash.keys
+    # Should convert to a Hash with String keys.
+    assert_equal @strings, @mixed.strhash.to_hash
+
+    # Should preserve the default value.
+    mixed_with_default = @mixed.dup
+    mixed_with_default.default = '1234'
+    roundtrip = mixed_with_default.strhash.to_hash
+    assert_equal @strings, roundtrip
+    assert_equal '1234', roundtrip.default    
+  end
+
+  def test_hash_with_array_of_hashes
+    hash = { "urls" => { "url" => [ { "address" => "1" }, { "address" => "2" } ] }}
+    strhash = StrHash[hash]
+    assert_equal "1", strhash[:urls][:url].first[:address]
+  end
+
+  def test_indifferent_subhashes
+    h = {'user' => {'id' => 5}}.strhash
+    ['user', :user].each {|user| [:id, 'id'].each {|id| assert_equal 5, h[user][id], "h[#{user.inspect}][#{id.inspect}] should be 5"}}
+
+    h = {:user => {:id => 5}}.strhash
+    ['user', :user].each {|user| [:id, 'id'].each {|id| assert_equal 5, h[user][id], "h[#{user.inspect}][#{id.inspect}] should be 5"}}
+  end  
+
+  def test_assorted_keys_not_stringified
+    original = {Object.new => 2, 1 => 2, [] => true}
+    indiff = original.strhash
+    assert(!indiff.keys.any? {|k| k.kind_of? String}, "A key was converted to a string!")
+  end
+
+  def test_should_use_default_value_for_unknown_key
+    strhash = StrHash.new(3)
+    assert_equal 3, strhash[:new_key]
+  end
+
+  def test_should_use_default_value_if_no_key_is_supplied
+    strhash = StrHash.new(3)
+    assert_equal 3, strhash.default
+  end
+
+  def test_should_nil_if_no_default_value_is_supplied
+    strhash = StrHash.new
+    assert_nil strhash.default
+  end
+
+  def test_should_copy_the_default_value_when_converting_to_hash_with_indifferent_access
+    hash = Hash.new(3)
+    strhash = hash.strhash
+    assert_equal 3, strhash.default
+  end  
+end
\ No newline at end of file
diff --git a/test/ruby/test_symbol.rb b/test/ruby/test_symbol.rb
index f402da3..c181b19 100644
--- a/test/ruby/test_symbol.rb
+++ b/test/ruby/test_symbol.rb
@@ -1,6 +1,13 @@
 require 'test/unit'
 
 class TestSymbol < Test::Unit::TestCase
+  def test_hash
+    assert_instance_of Fixnum, :symbol.hash
+    assert_equal 'symbol'.hash, :symbol.hash
+    assert_equal :"!".hash, :"!".hash
+    assert_not_equal :"$1".hash, :"@@1".hash
+  end
+  
   # [ruby-core:3573]
 
   def assert_eval_inspected(sym)



signature.asc (268 bytes) Download Attachment

[ruby-core:26495] Re: HashWithIndifferentAccess to core

by Rick DeNatale :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Tue, Nov 3, 2009 at 6:12 AM, Urabe Shyouhei <shyouhei@...> wrote:
> Hello,
>
> I got a pull request for a HashWithIndifferentAccess implementation to merge
> into core.  I think it's almost OK to make it so (except few minor bugs that can
> easily be handled).  How about it?  I'm also attaching a patch file for people who
> are not used to git (mainly nobu).

> +++ b/test/ruby/test_hash.rb

> +class TestStrHash < Test::Unit::TestCase
> +  def setup
> +    @strings = { 'a' => 1, 'b' => 2 }.strhash
> +    @symbols = { :a  => 1, :b  => 2 }.strhash
> +    @mixed   = { :a  => 1, 'b' => 2 }.strhash
> +    @fixnums = {  0  => 1,  1  => 2 }.strhash
> +  end

> +
> +  def test_keys
> +    assert_equal ["a", "b"], @strings.keys
> +    assert_equal [:a, :b], @symbols.keys
> +    assert_equal [:a, "b"], @mixed.keys
> +    assert_equal [0, 1], @fixnums.keys
> +  end

So judging by the tests, and without going through the implementation
in detail. It appears that this proposal goes to lengths to preserve
the class of a key in the hash.

I'm not sure that this is a real requirement.  I contrast consider the
current HashWithIndifferentAccess(HWIA) in Rails/ActiveSupport, here's
a snippet from a rails console irb session:

=> {}
>> h[:a] = 1
=> 1
>> h['b'] = 2
=> 2
>> h.keys
=> ["a", "b"]

HWIA converts symbol arguments to keys in the access methods, and
stores the keys internally as strings.

I think picking a single internal representation for the keys is
simpler, and probably a bit faster.  The main decision is whether to
pick strings or symbols for that internal representation.

ActiveSupport picked strings, primarily from what I understand over a
concern that symbols once interned can't be GCed and the key values in
the Rails use cases come from arbitrary user inputs (e.g. query
parameters in URIs). So converting such strings to symbols could be
the basis of a DOS attack.  As a coincidencs I just posted about this
in reply to a thread on the ruby-lang group.

I think that if the key class is preserved there are other
ramifications to ponder, for example what should h.keys be after

h =  { 'a' => 1, 'b' => 2 }.strhash
h[:a] = 3

I think that any of  ['a', 'b'] or [:a, 'b'] or ['b', :a]  all of
which might seem to be plausible answers, would surprise someone, and
that the POLS would be better preserved by conversion to a consistent
class of key.

If the current proposal were accepted, I'm not sure that it would be
useful to Rails as a replacement implementation of HWIA, but I'm
hoping that Yehuda or one of his cohorts would comment on that.


--
Rick DeNatale

Blog: http://talklikeaduck.denhaven2.com/
Twitter: http://twitter.com/RickDeNatale
WWR: http://www.workingwithrails.com/person/9021-rick-denatale
LinkedIn: http://www.linkedin.com/in/rickdenatale


[ruby-core:26496] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core"
    on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei <shyouhei@...> writes:

|I got a pull request for a HashWithIndifferentAccess implementation to merge
|into core.  I think it's almost OK to make it so (except few minor bugs that can
|easily be handled).  How about it?  I'm also attaching a patch file for people who
|are not used to git (mainly nobu).

I am negative.

  * the class name is too long, and not clear, at least for me.
  * it might be useful for some cases, but not so common to make it
    built-in, I think.
  * if it's not for built-in, it can be implemented by external
    library, such as a gem, or active_support.

                                                        matz.


[ruby-core:26499] Re: HashWithIndifferentAccess to core

by Urabe Shyouhei-4 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Yukihiro Matsumoto wrote:
> I am negative.
>
>   * the class name is too long, and not clear, at least for me.

You mean HashWithIndifferentAccess?  That longer name is for an equivalent code
from ActiveSupport.  The author of this request names his class "StrHash".

>   * it might be useful for some cases, but not so common to make it
>     built-in, I think.

But that "some" case is actually HUGE.  Almost all Rails application uses this
(sort of) as if it were a built-in ruby feature.  It would be great when this
was a true built-in.

>   * if it's not for built-in, it can be implemented by external
>     library, such as a gem, or active_support.

The author of this request does have a gem for it.
http://github.com/methodmissing/hwia



signature.asc (268 bytes) Download Attachment

[ruby-core:26500] Re: HashWithIndifferentAccess to core

by Lourens Naude-3 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

Appreciate the feedback.

At the moment most Ruby web frameworks running on an MRI VM is  
affected by Symbol's immutability and the risk of DoS attacks
( sizeof(VALUE), 4 bytes ) as mentioned by Rick in an earlier post.

I've seen various implementations of this pattern :  
ActiveSupport::HashWithIndifferentAccess (Rails), Mash (Merb), Sinatra  
etc.

This patch is the result of spending some of an afternoon getting  
familiar with the VM again in preparation for an upcoming talk
and in light of a push for this from Rails core, extracted something  
cleaner than the mixed 1.8 / 1.9 hwia gem.

The gem also includes a lot of code duplication for not having all the  
required core Hash methods exposed via intern.h

I do however understand that just because nested query params happen  
to map well to a Hash, doesn't mean a core class should
be enhanced to support this feature.There's the less obtrusive option of

* exposing Symbol#hash == String#hash for "string" == :string
* Implement hashing very similar to the identhash scheme in 1.9,  
hooked through Hash#strhash
* extract a UrlParam or similar gem / extension etc. that piggy backs  
off this feature ( iow. delegate the nested conversion macro elsewhere )

Thoughts ?

- Lourens

On 2009/11/03, at 16:27, Urabe Shyouhei wrote:

> Yukihiro Matsumoto wrote:
>> I am negative.
>>
>>  * the class name is too long, and not clear, at least for me.
>
> You mean HashWithIndifferentAccess?  That longer name is for an  
> equivalent code
> from ActiveSupport.  The author of this request names his class  
> "StrHash".
>
>>  * it might be useful for some cases, but not so common to make it
>>    built-in, I think.
>
> But that "some" case is actually HUGE.  Almost all Rails application  
> uses this
> (sort of) as if it were a built-in ruby feature.  It would be great  
> when this
> was a true built-in.
>
>>  * if it's not for built-in, it can be implemented by external
>>    library, such as a gem, or active_support.
>
> The author of this request does have a gem for it.
> http://github.com/methodmissing/hwia
>



[ruby-core:26503] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26499] Re: HashWithIndifferentAccess to core"
    on Wed, 4 Nov 2009 01:27:32 +0900, Urabe Shyouhei <shyouhei@...> writes:

|>   * the class name is too long, and not clear, at least for me.
|
|You mean HashWithIndifferentAccess?  That longer name is for an equivalent code
|from ActiveSupport.  The author of this request names his class "StrHash".

Yes, but I don't prefer StrHash either, which is shorter but still
does not describe the role of the class.

|>   * it might be useful for some cases, but not so common to make it
|>     built-in, I think.
|
|But that "some" case is actually HUGE.  Almost all Rails application uses this
|(sort of) as if it were a built-in ruby feature.  It would be great when this
|was a true built-in.

With making huge hole for confusion?  Putting 1.8 compatibility aside,
I believe all 1.9 code should use symbols for such cases, so the needs
for HashWithIndifferentAccess (or whatever name) will become less.

                                                        matz.


[ruby-core:26504] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26500] Re: HashWithIndifferentAccess to core"
    on Wed, 4 Nov 2009 01:51:18 +0900, Lourens Naudé <lourens@...> writes:

|* exposing Symbol#hash == String#hash for "string" == :string

Please no.  Symbols and strings are not same.

|* Implement hashing very similar to the identhash scheme in 1.9,  
|hooked through Hash#strhash

Probably.  Depends on how much need it would have.

|* extract a UrlParam or similar gem / extension etc. that piggy backs  
|off this feature ( iow. delegate the nested conversion macro elsewhere )

I am sorry but I don't get this one.

                                                        matz.


[ruby-core:26506] Re: HashWithIndifferentAccess to core

by Kornelius Kalnbach :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Yukihiro Matsumoto wrote:
> I am negative.
>   * the class name is too long, and not clear, at least for me.
absolutely. it's a really bad name.

[murphy]


[ruby-core:26507] Re: HashWithIndifferentAccess to core

by Jeremy Kemper :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Tue, Nov 3, 2009 at 6:48 AM, Yukihiro Matsumoto <matz@...> wrote:

> Hi,
>
> In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core"
>    on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei <shyouhei@...> writes:
>
> |I got a pull request for a HashWithIndifferentAccess implementation to merge
> |into core.  I think it's almost OK to make it so (except few minor bugs that can
> |easily be handled).  How about it?  I'm also attaching a patch file for people who
> |are not used to git (mainly nobu).
>
> I am negative.
>
>  * the class name is too long, and not clear, at least for me.
>  * it might be useful for some cases, but not so common to make it
>    built-in, I think.

It encapsulates a common pattern:
 * consume arbitrary key/value data from "outside world," where
strings are best choice
 * work with these data using meaningful keys, where symbols are best choice

Such cases are command-line options, database result sets, HTTP header
values, etc. Anywhere Ruby works with external data (strings) using
some known, consistent semantics (symbols).

(The long name is legacy, a class name meant to discourage public use.
But it caught on so it's been reused extensively anyway. Any
appropriate name is fine.)

>  * if it's not for built-in, it can be implemented by external
>    library, such as a gem, or active_support.

That's true. It's been implemented several places, usually duplicated
code with subtle differences, and usually in pure Ruby. This code is
often in the hot path so it's best to be highly performant. Living as
a Ruby builtin gives very high speed and unifies the various
implementations of this data type.

Best,
jeremy


[ruby-core:26508] Re: HashWithIndifferentAccess to core

by Jeremy Kemper :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Tue, Nov 3, 2009 at 9:42 AM, Yukihiro Matsumoto <matz@...> wrote:

> Hi,
>
> In message "Re: [ruby-core:26499] Re: HashWithIndifferentAccess to core"
>    on Wed, 4 Nov 2009 01:27:32 +0900, Urabe Shyouhei <shyouhei@...> writes:
>
> |>   * the class name is too long, and not clear, at least for me.
> |
> |You mean HashWithIndifferentAccess?  That longer name is for an equivalent code
> |from ActiveSupport.  The author of this request names his class "StrHash".
>
> Yes, but I don't prefer StrHash either, which is shorter but still
> does not describe the role of the class.
>
> |>   * it might be useful for some cases, but not so common to make it
> |>     built-in, I think.
> |
> |But that "some" case is actually HUGE.  Almost all Rails application uses this
> |(sort of) as if it were a built-in ruby feature.  It would be great when this
> |was a true built-in.
>
> With making huge hole for confusion?  Putting 1.8 compatibility aside,
> I believe all 1.9 code should use symbols for such cases, so the needs
> for HashWithIndifferentAccess (or whatever name) will become less.

Ah, your preference is for Ruby 1.9 code to convert external key/value
data from String => String to Symbol => String? This does address the
core concern of addressing external k/v data with string keys using
meaningful symbols. But it's slower: all keys must be converted to
symbols up-front rather than lazily, when accessed.

jeremy


[ruby-core:26513] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26508] Re: HashWithIndifferentAccess to core"
    on Wed, 4 Nov 2009 05:33:06 +0900, Jeremy Kemper <jeremy@...> writes:

|> With making huge hole for confusion?  Putting 1.8 compatibility aside,
|> I believe all 1.9 code should use symbols for such cases, so the needs
|> for HashWithIndifferentAccess (or whatever name) will become less.
|
|Ah, your preference is for Ruby 1.9 code to convert external key/value
|data from String => String to Symbol => String? This does address the
|core concern of addressing external k/v data with string keys using
|meaningful symbols. But it's slower: all keys must be converted to
|symbols up-front rather than lazily, when accessed.

Don't take me too serious (although I know it's difficult, since I am
the responsible).  I have never felt I need HashWithIndifferentAccess
in my programming life, so my opinion is biased.  You've explained
somewhat in [ruby-core:26507], but I still need more comprehension.
In general, blurring type differences (string and symbols in this
case), is a code smell.

                                                        matz.


[ruby-core:26514] Re: HashWithIndifferentAccess to core

by "Martin J. Dürst" :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Just a thought: What about implementing this with an option on Hash:new,
rather than as a separate class? That at least would solve the problem
with the class name.

Regards,    Martin.

On 2009/11/04 5:27, Jeremy Kemper wrote:

> On Tue, Nov 3, 2009 at 6:48 AM, Yukihiro Matsumoto<matz@...>  wrote:
>> Hi,
>>
>> In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core"
>>     on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei<shyouhei@...>  writes:
>>
>> |I got a pull request for a HashWithIndifferentAccess implementation to merge
>> |into core.  I think it's almost OK to make it so (except few minor bugs that can
>> |easily be handled).  How about it?  I'm also attaching a patch file for people who
>> |are not used to git (mainly nobu).
>>
>> I am negative.
>>
>>   * the class name is too long, and not clear, at least for me.
>>   * it might be useful for some cases, but not so common to make it
>>     built-in, I think.
>
> It encapsulates a common pattern:
>   * consume arbitrary key/value data from "outside world," where
> strings are best choice
>   * work with these data using meaningful keys, where symbols are best choice
>
> Such cases are command-line options, database result sets, HTTP header
> values, etc. Anywhere Ruby works with external data (strings) using
> some known, consistent semantics (symbols).
>
> (The long name is legacy, a class name meant to discourage public use.
> But it caught on so it's been reused extensively anyway. Any
> appropriate name is fine.)
>
>>   * if it's not for built-in, it can be implemented by external
>>     library, such as a gem, or active_support.
>
> That's true. It's been implemented several places, usually duplicated
> code with subtle differences, and usually in pure Ruby. This code is
> often in the hot path so it's best to be highly performant. Living as
> a Ruby builtin gives very high speed and unifies the various
> implementations of this data type.
>
> Best,
> jeremy
>
>

--
#-# Martin J. Dürst, Professor, Aoyama Gakuin University
#-# http://www.sw.it.aoyama.ac.jp   mailto:duerst@...


[ruby-core:26522] Re: HashWithIndifferentAccess to core

by Yusuke ENDOH :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

2009/11/4 "Martin J. Durst" <duerst@...>:
> Just a thought: What about implementing this with an option on Hash:new,
> rather than as a separate class? That at least would solve the problem with
> the class name.

This sounds good if HWIA is really imported.

How about "Hash#compare_as_string_key" following in
"Hash#compare_by_indentity"?


  h = {}
  h.compare_as_string_key

  h[:a] = :foo
  h["b"] = :bar

  p h["a"] #=> :foo
  p h[:b]  #=> :bar
  p h      #=> {:a=>:foo, "b"=>:bar}

  h[1] = 1 #=> neither string nor symbol key in strhash (RuntimeError)
           #   (should it be assumed as "1" by using to_s?)


Here is a patch (I didn't refer the original patch, sorry):


diff --git a/hash.c b/hash.c
index ef8b9a8..e26312c 100644
--- a/hash.c
+++ b/hash.c
@@ -99,6 +99,44 @@ rb_any_hash(VALUE a)
     return (st_index_t)RSHIFT(hnum, 1);
 }

+static int
+rb_strsym_cmp(VALUE a, VALUE b)
+{
+    if (a == b) return 0;
+    switch (TYPE(a)) {
+      case T_SYMBOL:
+ a = rb_id2str(SYM2ID(a));
+      case T_STRING:
+ switch (TYPE(b)) {
+  case T_SYMBOL:
+    b = rb_id2str(SYM2ID(b));
+  case T_STRING:
+    return rb_str_hash_cmp(a, b);
+ }
+    }
+    rb_raise(rb_eRuntimeError, "not string nor symbol key in strhash");
+}
+
+static st_index_t
+rb_strsym_hash(VALUE a)
+{
+    VALUE hval;
+    st_index_t hnum;
+
+    switch (TYPE(a)) {
+      case T_SYMBOL:
+ a = rb_id2str(SYM2ID(a));
+      case T_STRING:
+ hnum = rb_str_hash(a);
+ break;
+
+      default:
+ rb_raise(rb_eRuntimeError, "neither string nor symbol key in strhash");
+    }
+    hnum <<= 1;
+    return (st_index_t)RSHIFT(hnum, 1);
+}
+
 static const struct st_hash_type objhash = {
     rb_any_cmp,
     rb_any_hash,
@@ -109,6 +147,11 @@ static const struct st_hash_type identhash = {
     st_numhash,
 };

+static const struct st_hash_type strsymhash = {
+    rb_strsym_cmp,
+    rb_strsym_hash,
+};
+
 typedef int st_foreach_func(st_data_t, st_data_t, st_data_t);

 struct foreach_safe_arg {
@@ -1852,6 +1895,40 @@ rb_hash_compare_by_id_p(VALUE hash)
     return Qfalse;
 }

+/*
+ *  call-seq:
+ *    hsh.compare_as_string_key => hsh
+ *
+ *  TBD
+ */
+
+static VALUE
+rb_hash_compare_as_string_key(VALUE hash)
+{
+    rb_hash_modify(hash);
+    RHASH(hash)->ntbl->type = &strsymhash;
+    rb_hash_rehash(hash);
+    return hash;
+}
+
+/*
+ *  call-seq:
+ *    hsh.compare_as_string_key? => true or false
+ *
+ *  TBD
+ */
+
+static VALUE
+rb_hash_compare_as_string_key_p(VALUE hash)
+{
+    if (!RHASH(hash)->ntbl)
+        return Qfalse;
+    if (RHASH(hash)->ntbl->type == &strsymhash) {
+ return Qtrue;
+    }
+    return Qfalse;
+}
+
 static int path_tainted = -1;

 static char **origenviron;
@@ -2715,6 +2792,8 @@ Init_Hash(void)

     rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0);
     rb_define_method(rb_cHash,"compare_by_identity?",
rb_hash_compare_by_id_p, 0);
+    rb_define_method(rb_cHash,"compare_as_string_key",
rb_hash_compare_as_string_key, 0);
+    rb_define_method(rb_cHash,"compare_as_string_key?",
rb_hash_compare_as_string_key_p, 0);

     origenviron = environ;
     envtbl = rb_obj_alloc(rb_cObject);

--
Yusuke ENDOH <mame@...>


[ruby-core:26555] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26522] Re: HashWithIndifferentAccess to core"
    on Wed, 4 Nov 2009 12:50:12 +0900, Yusuke ENDOH <mame@...> writes:

|How about "Hash#compare_as_string_key" following in
|"Hash#compare_by_indentity"?

It's much better than HashWithIndifferentAccess.

                                                        matz.


[ruby-core:26584] Re: HashWithIndifferentAccess to core

by Yugui (Yuki Sonoda) :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

2009/11/6 Yukihiro Matsumoto <matz@...>:
> |How about "Hash#compare_as_string_key" following in
> |"Hash#compare_by_indentity"?
>
> It's much better than HashWithIndifferentAccess.

I agree.

I'm strongly against StrHash itself because of the reasons which matz mentioned.
String is not Symbol, Symbol is not String. It is useful in a
particular case but Ruby is a general purpose language.
When you need different-access, you can implement "your language" on
top of Ruby like HashWithDifferentAccess. This is a good point of
Ruby.

compare_as_string_key is much more acceptable. But it still seems to
be just for Rails.
How about Hash::Type class as an API for rb_hash_type?
It means, generalizing compare_by_identity and HashWithDifferentAccess
to strategy object that represents how to compare the keys.

Thanks,
-- Yuki Sonoda (Yugui)


[ruby-core:26589] Re: HashWithIndifferentAccess to core

by Yukihiro Matsumoto :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

In message "Re: [ruby-core:26584] Re: HashWithIndifferentAccess to core"
    on Sat, 7 Nov 2009 20:06:07 +0900, Yugui <yugui@...> writes:

|How about Hash::Type class as an API for rb_hash_type?

Depends on API, and performance.

                                                        matz.


[ruby-core:26593] Re: HashWithIndifferentAccess to core

by Lourens Naude-3 :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

Hi,

I've been toying with allowing custom hashing from the Ruby side for  
the past 2 days and have a semi-functional version (patch attached  
against subversion trunk 25671 - http://gist.github.com/228773),  
however my lack of VM control frame knowledge pretty much blocks  
progress atm.

Experimental API :

Hash#index_with(Object)

The general expected contract being the object implements 2 methods,  
hash(a) and compare(a,b)

Here's a rough HashWithIndifferentAccess ( referenced as "hwia" moving  
forward) implementation :

   module Hwia
     def hash(obj)
       case obj
         when Symbol, String
           obj.to_s.hash
         else
           obj.hash
       end
     end

     def compare(a,b)
       if String === a && Symbol === b || String === b && Symbol === a
         a.to_s <=> b.to_s
       else
         a <=> b
       end
     end
     extend self
   end

h = { "foo" => "bar" }
h.index_with(Hwia)
h[:foo] # "bar"

Hash#custom_index? #=> true | false

Hash#indexed_with #=> Object

Given the arguments accepted by st_hash_type members being restricted  
to 1 and 2 arguments respectively, I opted for falling back to the  
current
ruby control frame after consulting with Aman Gupta, who also uses  
this in perftools.rb to get to self :

     rb_thread_t *th = GET_THREAD();
     VALUE hash = th->cfp->self;

Which works OK initially, but results in

   2) Error:
test_index_with(TestHash):
TypeError: wrong argument type TestHash (expected Hash)
     test/ruby/test_hash.rb:906:in `test_index_with'

after passing through rb_hash_aref in :

     h.index_with(Hwia)
     assert_equal Hwia, h.indexed_with
     assert h.custom_index?
     h["foo"] # fails

as th->cfp->self becomes TestHash in that scope.

Any thoughts / feedback much appreciated.Apologies for the naming  
confusion in the original mailing list post as well.

The current source tree is available at : http://github.com/methodmissing/ruby/tree/custom_hashing

- Lourens

methodmissing:ruby lourens$ git diff --patch-with-stat github/
trunk..HEAD
  gc.c                   |    1 +
  hash.c                 |   70 +++++++++++++++++++++++++++++++++++++++
+++++++-
  include/ruby/ruby.h    |    2 +
  test/ruby/test_hash.rb |   36 ++++++++++++++++++++++++
  4 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/gc.c b/gc.c
index 7711101..55cdb85 100644
--- a/gc.c
+++ b/gc.c
@@ -1697,6 +1697,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE  
ptr, int lev)

        case T_HASH:
         mark_hash(objspace, obj->as.hash.ntbl, lev);
+       gc_mark(objspace, obj->as.hash.index_with, lev);
         ptr = obj->as.hash.ifnone;
         goto again;

diff --git a/hash.c b/hash.c
index ef8b9a8..66280b6 100644
--- a/hash.c
+++ b/hash.c
@@ -14,6 +14,7 @@
  #include "ruby/ruby.h"
  #include "ruby/st.h"
  #include "ruby/util.h"
+#include "vm_core.h"

  #ifdef __APPLE__
  #include <crt_externs.h>
@@ -33,7 +34,7 @@ rb_hash_freeze(VALUE hash)
  VALUE rb_cHash;

  static VALUE envtbl;
-static ID id_hash, id_yield, id_default;
+static ID id_hash, id_yield, id_default, id_compare;

  static int
  rb_any_cmp(VALUE a, VALUE b)
@@ -99,11 +100,37 @@ rb_any_hash(VALUE a)
      return (st_index_t)RSHIFT(hnum, 1);
  }

+static int
+rb_custom_cmp(VALUE a, VALUE b)
+{
+    rb_thread_t *th = GET_THREAD();
+    VALUE hash = th->cfp->self;
+    Check_Type(hash, T_HASH);
+    return FIX2INT(rb_funcall(RHASH(hash)->index_with, id_compare, 2,  
a, b));
+}
+
+static st_index_t
+rb_custom_hash(VALUE a)
+{
+    st_index_t hnum;
+    rb_thread_t *th = GET_THREAD();
+    VALUE hash = th->cfp->self;
+    Check_Type(hash, T_HASH);
+    hnum = FIX2LONG(rb_funcall(RHASH(hash)->index_with, id_hash, 1,  
a));
+    hnum <<= 1;
+    return (st_index_t)RSHIFT(hnum, 1);
+}
+
  static const struct st_hash_type objhash = {
      rb_any_cmp,
      rb_any_hash,
  };

+static const struct st_hash_type customhash = {
+    rb_custom_cmp,
+    rb_custom_hash,
+};
+
  static const struct st_hash_type identhash = {
      st_numcmp,
      st_numhash,
@@ -219,6 +246,7 @@ hash_alloc(VALUE klass)
      OBJSETUP(hash, klass, T_HASH);

      hash->ifnone = Qnil;
+    hash->index_with = Qnil;

      return (VALUE)hash;
  }
@@ -241,6 +269,7 @@ rb_hash_dup(VALUE hash)
          FL_SET(ret, HASH_PROC_DEFAULT);
      }
      ret->ifnone = RHASH(hash)->ifnone;
+    ret->index_with = RHASH(hash)->index_with;
      return (VALUE)ret;
  }

@@ -492,7 +521,6 @@ VALUE
  rb_hash_aref(VALUE hash, VALUE key)
  {
      VALUE val;
-
      if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key,  
&val)) {
         return rb_funcall(hash, id_default, 1, key);
      }
@@ -1080,6 +1108,7 @@ rb_hash_replace(VALUE hash, VALUE hash2)
      }
      rb_hash_foreach(hash2, replace_i, hash);
      RHASH(hash)->ifnone = RHASH(hash2)->ifnone;
+    RHASH(hash)->index_with = RHASH(hash2)->index_with;
      if (FL_TEST(hash2, HASH_PROC_DEFAULT)) {
         FL_SET(hash, HASH_PROC_DEFAULT);
      }
@@ -1852,6 +1881,37 @@ rb_hash_compare_by_id_p(VALUE hash)
      return Qfalse;
  }

+static VALUE
+rb_hash_index_with(VALUE hash, VALUE idx)
+{
+    if (rb_respond_to(idx, id_hash) && rb_respond_to(idx, id_compare)){
+      rb_hash_modify(hash);
+      RHASH(hash)->index_with = idx;
+      RHASH(hash)->ntbl->type = &customhash;
+      rb_hash_rehash(hash);
+      return hash;
+    }else{
+      rb_raise(rb_eRuntimeError, "a custom hashing scheme should  
implement #hash(a) and #compare(a,b)");
+    }
+}
+
+static VALUE
+rb_hash_custom_index_p(VALUE hash)
+{
+    if (!RHASH(hash)->ntbl)
+        return Qfalse;
+    if (RHASH(hash)->ntbl->type == &customhash) {
+       return Qtrue;
+    }
+    return Qfalse;
+}
+
+static VALUE
+rb_hash_indexed_with(VALUE hash)
+{
+       return RHASH(hash)->index_with;
+}
+
  static int path_tainted = -1;

  static char **origenviron;
@@ -2647,6 +2707,7 @@ Init_Hash(void)
      id_hash = rb_intern("hash");
      id_yield = rb_intern("yield");
      id_default = rb_intern("default");
+    id_compare = rb_intern("compare");

      rb_cHash = rb_define_class("Hash", rb_cObject);

@@ -2716,6 +2777,11 @@ Init_Hash(void)
      rb_define_method(rb_cHash,"compare_by_identity",  
rb_hash_compare_by_id, 0);
      rb_define_method(rb_cHash,"compare_by_identity?",  
rb_hash_compare_by_id_p, 0);

+    rb_define_method(rb_cHash,"index_with", rb_hash_index_with, 1);
+    rb_define_method(rb_cHash,"indexed_with", rb_hash_indexed_with, 0);
+
+    rb_define_method(rb_cHash,"custom_index?",  
rb_hash_custom_index_p, 0);
+#define RHASH_IDX_WITH(h) (RHASH(h)->index_with)
  #define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl-
 >num_entries : 0)
  #define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0)

diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index c860b25..4e3f7f9 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -871,4 +871,40 @@ class TestHash < Test::Unit::TestCase
      def o.hash; 2<<100; end
      assert_equal({x=>1}.hash, {x=>1}.hash)
    end
+
+  module Hwia
+    def hash(obj)
+      case obj
+        when Symbol, String
+          obj.to_s.hash
+        else
+          obj.hash
+      end
+    end
+
+    def compare(a,b)
+      if String === a && Symbol === b || String === b && Symbol === a
+        a.to_s <=> b.to_s
+      else
+        a <=> b
+      end
+    end
+    extend self
+  end
+
+  def test_index_with
+    a = "foo"
+    assert(!{}.custom_index?)
+    h = { a => "bar" }
+    assert_equal nil, h.indexed_with
+    assert_raises(RuntimeError) do
+      h.index_with(Object.new)
+    end
+    h.index_with(Hwia)
+    assert_equal Hwia, h.indexed_with
+    assert h.custom_index?
+    h["foo"]
+    #assert_equal "bar", h["foo"]
+    #assert_equal "bar", h[:foo]
+  end
  end


On 2009/11/07, at 13:10, Yukihiro Matsumoto wrote:

> Hi,
>
> In message "Re: [ruby-core:26584] Re: HashWithIndifferentAccess to  
> core"
>    on Sat, 7 Nov 2009 20:06:07 +0900, Yugui <yugui@...> writes:
>
> |How about Hash::Type class as an API for rb_hash_type?
>
> Depends on API, and performance.
>
> matz.
>



[ruby-core:26595] Re: HashWithIndifferentAccess to core

by Rick DeNatale :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

I'm not sure that it really makes sense to add any of this to core.

So far the argument goes something like this.

Several important ruby based web frameworks have some form of
HashWithIndifferentAccess class, Rails, Merb and Sinatra have been
mentioned. Did I miss any?

Since it seems to be a common use case, perhaps it should be in Ruby core.

And implementations have been given which either:

    store keys in the hash which are the same type as that given as
the key parameter to WhateverTheHeckWeCallThisThing#[]=
   convert the key to a symbol if it's a string
   convert the key to a string if it's a symbol.

Then the arguments start over what to name it.


So I've just now looked at Merb, Rails and Sinatra to see what's up.

The ONLY difference between Merb and Rails here, as far as I can tell
is the name of WhateverTheHeckWeCallThisThing.  Rails (actually
ActiveSupport) calls it HashWithIndifferent access, Merb (actually the
extlib gem) calls it Mash. Other than the name the code appears to be
identical, including the initial comment:

# This class has dubious semantics and we only have it so that
# people can write params[:key] instead of params['key']
# and they get the same value for both keys.

And what is the implementation? Basically it

   1) overrides the initialize method to convert its keys to strings
after being initialized if the parameter to new is a hash.

   2) overrides Hash#default(key=nil) to efffectively return
self[key.to_s] if the key arg is a symbol and key.to_s is included in
the hash.

   3) overrides Hash#[]=(key,value) to effectively call
         super[convert_key(key)] = convert_value(value)
       where convert_key changes symbols to strings and leaves any
other key alone
       and convert_value converts an ordinary hash value to a
HashWithIndifferentAccess/Mash and leaves anything else alone.  This
is because one of the main uses of this thing is for nested parameters
in a http request.

    4) overrides any other method which takes a key argument to call
convert_key on it before using it.

Sinatra, is much more minimal, as one might expect.  It doesn't have
such a class, rather it defines a  Sinatra::Base#indifferent_hash
method

    def indifferent_hash
      Hash.new {|hash,key| hash[key.to_s] if Symbol === key }
    end

Which simply makes

   indifferent_hash[:a]
return the same value as
  indifferent_hash['a']
if :a isn't a key in the hash.

Sinatra apparently relies on the code which uses these hashes not to
use both a symbol and it's string form together for []=


Personally I think that as implemented in both Rails/Merb and Sinatra,
this is much ado about nothing, It wouldn't really buy much to pull
this code down to core whatever you name it, and if something with
different semantics, such as some of the proposals on this thread was
adopted, it's likely that the frameworks, would ignore it, unless it
were given a name which clashed.

As for the decision made by all three of these frameworks to
consistenly use strings rather than symbols for this, I think it makes
sense.   When I first realized that HashWIthIndifferentAccess did it
this way it didn't make sense to me, since I perceived the motivation
to be performance and one a symbol was interned looking up symbol keys
should be faster since Symbol#hash is O(1), and String#hash is
O(string.length).

But doing it the other way, and using Symbols rather than Strings for
the actual, stored, keys in the hash has a time and space cost,
because every time a String is used as a key in a method, it must be
converted to a Symbol, which requires the equivalent of looking it up
to see if it has already been interned, and making a copy non-gcable.

And taking a laissez-faire approach and storing either strings or
symbols and trying to make them come out the same when the Hash is
accessed, would seem to require additional overhead for each lookup,
as well as introducing ambiguities of specification if both :key and
'key' were used to store in the hash, as I pointed out earlier in this
thread.

To summarize, I'd vote for leaving things in core as they are, and
letting frameworks implement such extensions the way they see fit.
--
Rick DeNatale

Blog: http://talklikeaduck.denhaven2.com/
Twitter: http://twitter.com/RickDeNatale
WWR: http://www.workingwithrails.com/person/9021-rick-denatale
LinkedIn: http://www.linkedin.com/in/rickdenatale


[ruby-core:26599] Re: HashWithIndifferentAccess to core

by Jeremy Kemper :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Sat, Nov 7, 2009 at 10:21 AM, Rick DeNatale <rick.denatale@...> wrote:
> So far the argument goes something like this.
> Since it seems to be a common use case, perhaps it should be in Ruby core.

This is a motivating factor. Generally (repeating myself from above)
it encapsulates a common pattern:
 * consume arbitrary key/value data from "outside world," where
strings are best choice
 * work with these data using meaningful keys, where symbols are best choice

Such cases are command-line options, database result sets, HTTP header
values, etc. Anywhere Ruby works with external data (strings) using
some known, consistent semantics (symbols).

This can be done in pure Ruby and as a separate gem, but living in
Ruby itself gives very high speed and unifies the various behaviors of
this data type. The particular choice of implementation and name are
less important.

jeremy


[ruby-core:26600] Re: HashWithIndifferentAccess to core

by James Edward Gray II :: Rate this Message:

Reply to Author | View Threaded | Show Only this Message

On Nov 7, 2009, at 1:57 PM, Jeremy Kemper wrote:

> On Sat, Nov 7, 2009 at 10:21 AM, Rick DeNatale <rick.denatale@...
> > wrote:
>> So far the argument goes something like this.
>> Since it seems to be a common use case, perhaps it should be in  
>> Ruby core.
>
> This is a motivating factor. Generally (repeating myself from above)
> it encapsulates a common pattern:
> * consume arbitrary key/value data from "outside world," where
> strings are best choice
> * work with these data using meaningful keys, where symbols are best  
> choice
>
> Such cases are command-line options, database result sets, HTTP header
> values, etc. Anywhere Ruby works with external data (strings) using
> some known, consistent semantics (symbols).

I agree with Jeremy that this happens a lot.  It's not just Rails.  In  
fact, we tend to criticize any library that doesn't allow us to use  
String and Symbol interchangeably as keys.

There was a presentation on rufus-tokyo at RubyKaigi this year.  
Someone mentioned that it didn't do this in the Q and A.  The author  
modified his library shortly after.

I can name at least three projects I've used similar code in, all  
written just this year.  I think it's a pretty common occurrence.

James Edward Gray II

< Prev | 1 - 2 | Next >