|
View:
New views
20 Messages
—
Rating Filter:
Alert me
|
| < Prev | 1 - 2 | Next > |
|
|
[ruby-core:26492] HashWithIndifferentAccess to coreHello,
I got a pull request for a HashWithIndifferentAccess implementation to merge into core. I think it's almost OK to make it so (except few minor bugs that can easily be handled). How about it? I'm also attaching a patch file for people who are not used to git (mainly nobu). -------- Original Message -------- Subject: [GitHub] methodmissing sent you a message Date: Mon, 2 Nov 2009 12:40:24 -0800 From: GitHub <noreply@...> To: shyouhei@... methodmissing wants you to pull from methodmissing/ruby at hwia Body: Hi, I've played extensively with a HashWithIndifferentAccess (hwia / Mash) implementation as an extension to both ruby 1.9.x and ruby 1.8.7 ( http://github.com/methodmissing/hwia and http://blog.methodmissing.com/2009/08/29/alternative-hashwithindifferentaccess/) before and recently took the leap to patch against 1.9.2 as a Hash subclass. I've tried to conform to recommended code conventions, tried to not inject this into the critical path of common Hash use cases and abstracted to macros where possible. Wycats suggested to send the pull request - naming conventions at the moment is dangling - StrHash, IndifferentHash etc. I'll make myself available with whatever time is required to have this conform to current ruby-core standards.I'm also open to moving the recursive conversion to an extension and implement a thin API as per identhash on Hash instead. It passes against today's (Nov 2) trunk with "make check" and related.Feedback much appreciated if and when there's a moment. - Lourens View repository: http://github.com/methodmissing/ruby/tree/hwia -------- patch -------- % git diff --patch-with-stat origin/trunk..HEAD hash.c | 185 +++++++++++++++++++++++++++++++-- include/ruby/intern.h | 1 + string.c | 12 ++ test/ruby/test_hash.rb | 252 ++++++++++++++++++++++++++++++++++++++++++++++ test/ruby/test_symbol.rb | 7 ++ 5 files changed, 445 insertions(+), 12 deletions(-) diff --git a/hash.c b/hash.c index ef8b9a8..332f89d 100644 --- a/hash.c +++ b/hash.c @@ -23,6 +23,27 @@ static VALUE rb_hash_s_try_convert(VALUE, VALUE); #define HASH_DELETED FL_USER1 #define HASH_PROC_DEFAULT FL_USER2 +#define STR_HASH FL_USER3 + +static VALUE rb_hash_rehash(VALUE hash); +static VALUE rb_hash_update(VALUE hash1, VALUE hash2); +static VALUE rb_hash_strhash(VALUE hash); +static void rb_strhash_convert(VALUE *value); + +#define NEW_STR_HASH(hash,other) do {\ + FL_SET(RHASH(hash), STR_HASH); \ + RHASH(hash)->ntbl->type = &strhash; \ + RHASH(hash)->ifnone = RHASH(other)->ifnone; \ + return rb_hash_rehash(rb_convert_type(hash, T_HASH, "StrHash", "to_hash")); \ +} while (0) + +#define STR_HASH_P(hash) (FL_TEST(RHASH(hash), STR_HASH) || RBASIC(hash)->klass == rb_cStrHash) + +#define CONVERT_STR_HASH(hash,value) do {\ + if STR_HASH_P(hash){ \ + rb_strhash_convert(&value); \ + } \ +} while (0) VALUE rb_hash_freeze(VALUE hash) @@ -31,6 +52,7 @@ rb_hash_freeze(VALUE hash) } VALUE rb_cHash; +VALUE rb_cStrHash; static VALUE envtbl; static ID id_hash, id_yield, id_default; @@ -54,6 +76,37 @@ rb_any_cmp(VALUE a, VALUE b) return !rb_eql(a, b); } +int +strhash_cmp(VALUE *s1,VALUE *s2) +{ + int s1_hash = SYMBOL_P(*s1) ? rb_sym_hash(*s1) : rb_str_hash(*s1); + int s2_hash = SYMBOL_P(*s2) ? rb_sym_hash(*s2) : rb_str_hash(*s2); + if (s1_hash == s2_hash) return 0; + if (s1_hash > s2_hash) return 1; + return -1; +} + +static int +rb_strhash_cmp(VALUE a, VALUE b) +{ + if (a == b) return 0; + if (FIXNUM_P(a) && FIXNUM_P(b)) { + return a != b; + } + if (a == Qundef || b == Qundef) return -1; + if (SYMBOL_P(a) && SYMBOL_P(b)) { + return a != b; + } + if ((TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString && SYMBOL_P(b)) || (TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString && SYMBOL_P(a))) { + return strhash_cmp(&a, &b); + } + if (TYPE(a) == T_STRING && RBASIC(a)->klass == rb_cString && + TYPE(b) == T_STRING && RBASIC(b)->klass == rb_cString) { + return rb_str_cmp(a, b); + } + return !rb_eql(a, b); +} + VALUE rb_hash(VALUE obj) { @@ -99,11 +152,44 @@ rb_any_hash(VALUE a) return (st_index_t)RSHIFT(hnum, 1); } +static st_index_t +rb_strhash_hash(VALUE a) +{ + VALUE hval; + st_index_t hnum; + + switch (TYPE(a)) { + case T_FIXNUM: + case T_NIL: + case T_FALSE: + case T_TRUE: + hnum = rb_hash_end(rb_hash_start((unsigned int)a)); + break; + case T_SYMBOL: + hnum = rb_sym_hash(a); + break; + case T_STRING: + hnum = rb_str_hash(a); + break; + + default: + hval = rb_hash(a); + hnum = FIX2LONG(hval); + } + hnum <<= 1; + return (st_index_t)RSHIFT(hnum, 1); +} + static const struct st_hash_type objhash = { rb_any_cmp, rb_any_hash, }; +static const struct st_hash_type strhash = { + rb_strhash_cmp, + rb_strhash_hash, +}; + static const struct st_hash_type identhash = { st_numcmp, st_numhash, @@ -219,7 +305,7 @@ hash_alloc(VALUE klass) OBJSETUP(hash, klass, T_HASH); hash->ifnone = Qnil; - + if (klass == rb_cStrHash) FL_SET(hash, STR_HASH); return (VALUE)hash; } @@ -230,6 +316,12 @@ rb_hash_new(void) } VALUE +rb_strhash_new(void) +{ + return hash_alloc(rb_cStrHash); +} + +VALUE rb_hash_dup(VALUE hash) { NEWOBJ(ret, struct RHash); @@ -240,8 +332,33 @@ rb_hash_dup(VALUE hash) if (FL_TEST(hash, HASH_PROC_DEFAULT)) { FL_SET(ret, HASH_PROC_DEFAULT); } - ret->ifnone = RHASH(hash)->ifnone; - return (VALUE)ret; + if STR_HASH_P(hash){ + NEW_STR_HASH(ret,hash); + }else{ + ret->ifnone = RHASH(hash)->ifnone; + return (VALUE)ret; + } +} + +static void +rb_strhash_convert(VALUE *val) +{ + int i; + VALUE values; + + switch (TYPE(*val)) { + case T_HASH: + *val = rb_hash_strhash(*val); + break; + case T_ARRAY: + values = rb_ary_new2(RARRAY_LEN(*val)); + for (i = 0; i < RARRAY_LEN(*val); i++) { + VALUE el = RARRAY_PTR(*val)[i]; + rb_ary_push(values, (TYPE(el) == T_HASH) ? rb_hash_strhash(el) : el); + } + *val = values; + break; + } } static void @@ -256,7 +373,7 @@ struct st_table * rb_hash_tbl(VALUE hash) { if (!RHASH(hash)->ntbl) { - RHASH(hash)->ntbl = st_init_table(&objhash); + RHASH(hash)->ntbl = STR_HASH_P(hash) ? st_init_table(&strhash) : st_init_table(&objhash); } return RHASH(hash)->ntbl; } @@ -318,7 +435,9 @@ static VALUE rb_hash_initialize(int argc, VALUE *argv, VALUE hash) { VALUE ifnone; - + VALUE constructor; + rb_scan_args(argc, argv, "01", &constructor); + if(TYPE(constructor) == T_HASH) return rb_hash_update(hash,constructor); rb_hash_modify(hash); if (rb_block_given_p()) { if (argc > 0) { @@ -333,7 +452,6 @@ rb_hash_initialize(int argc, VALUE *argv, VALUE hash) rb_scan_args(argc, argv, "01", &ifnone); RHASH(hash)->ifnone = ifnone; } - return hash; } @@ -366,8 +484,11 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) hash = hash_alloc(klass); if (RHASH(tmp)->ntbl) { RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); + if (FL_TEST(RHASH(tmp), STR_HASH) || klass == rb_cStrHash){ + NEW_STR_HASH(hash,tmp); + }else + return hash; } - return hash; } tmp = rb_check_array_type(argv[0]); @@ -426,12 +547,21 @@ rb_hash_s_try_convert(VALUE dummy, VALUE hash) return rb_check_convert_type(hash, T_HASH, "Hash", "to_hash"); } +static VALUE +rb_strhash_s_try_convert(VALUE dummy, VALUE hash) +{ + return rb_check_convert_type(hash, T_HASH, "StrHash", "to_hash"); +} + static int rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg) { st_table *tbl = (st_table *)arg; - if (key != Qundef) st_insert(tbl, key, value); + if (key != Qundef){ + if (tbl->type == &strhash) rb_strhash_convert(&value); + st_insert(tbl, key, value); + } return ST_CONTINUE; } @@ -459,7 +589,6 @@ static VALUE rb_hash_rehash(VALUE hash) { st_table *tbl; - if (RHASH(hash)->iter_lev > 0) { rb_raise(rb_eRuntimeError, "rehash during iteration"); } @@ -470,7 +599,6 @@ rb_hash_rehash(VALUE hash) rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); st_free_table(RHASH(hash)->ntbl); RHASH(hash)->ntbl = tbl; - return hash; } @@ -974,7 +1102,7 @@ rb_hash_select(VALUE hash) VALUE result; RETURN_ENUMERATOR(hash, 0, 0); - result = rb_hash_new(); + result = STR_HASH_P(hash) ? rb_strhash_new() : rb_hash_new(); rb_hash_foreach(hash, select_i, result); return result; } @@ -1046,6 +1174,13 @@ rb_hash_aset(VALUE hash, VALUE key, VALUE val) return val; } +VALUE +rb_strhash_aset(VALUE hash, VALUE key, VALUE val) +{ + CONVERT_STR_HASH(hash,val); + rb_hash_aset(hash, key, val); +} + static int replace_i(VALUE key, VALUE val, VALUE hash) { @@ -1617,7 +1752,7 @@ rb_hash_invert_i(VALUE key, VALUE value, VALUE hash) static VALUE rb_hash_invert(VALUE hash) { - VALUE h = rb_hash_new(); + VALUE h = STR_HASH_P(hash) ? rb_strhash_new() : rb_hash_new(); rb_hash_foreach(hash, rb_hash_invert_i, h); return h; @@ -1627,6 +1762,7 @@ static int rb_hash_update_i(VALUE key, VALUE value, VALUE hash) { if (key == Qundef) return ST_CONTINUE; + CONVERT_STR_HASH(hash,value); st_insert(RHASH(hash)->ntbl, key, value); return ST_CONTINUE; } @@ -1637,6 +1773,7 @@ rb_hash_update_block_i(VALUE key, VALUE value, VALUE hash) if (key == Qundef) return ST_CONTINUE; if (rb_hash_has_key(hash, key)) { value = rb_yield_values(3, key, rb_hash_aref(hash, key), value); + CONVERT_STR_HASH(hash,value); } st_insert(RHASH(hash)->ntbl, key, value); return ST_CONTINUE; @@ -1852,6 +1989,24 @@ rb_hash_compare_by_id_p(VALUE hash) return Qfalse; } +static VALUE +rb_hash_strhash(VALUE hash) +{ + if STR_HASH_P(hash) + return hash; + VALUE args[1]; + args[0] = hash; + return rb_hash_s_create(1, (VALUE *)args, rb_cStrHash); +} + +static VALUE +rb_strhash_to_hash(VALUE hash) +{ + VALUE args[1]; + args[0] = hash; + return rb_hash_s_create(1, (VALUE *)args, rb_cHash); +} + static int path_tainted = -1; static char **origenviron; @@ -2649,6 +2804,8 @@ Init_Hash(void) id_default = rb_intern("default"); rb_cHash = rb_define_class("Hash", rb_cObject); + rb_cStrHash = rb_define_class("StrHash", rb_cHash); + rb_define_singleton_method(rb_cStrHash, "try_convert", rb_strhash_s_try_convert, 1); rb_include_module(rb_cHash, rb_mEnumerable); @@ -2715,6 +2872,10 @@ Init_Hash(void) rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + rb_define_method(rb_cHash, "strhash", rb_hash_strhash, 0); + rb_define_method(rb_cStrHash, "to_hash", rb_strhash_to_hash, 0); + rb_define_method(rb_cStrHash, "[]=", rb_strhash_aset, 2); + rb_define_method(rb_cStrHash, "store", rb_strhash_aset, 2); origenviron = environ; envtbl = rb_obj_alloc(rb_cObject); diff --git a/include/ruby/intern.h b/include/ruby/intern.h index fd37eca..c97caef 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -660,6 +660,7 @@ void rb_str_associate(VALUE, VALUE); VALUE rb_str_associated(VALUE); void rb_str_setter(VALUE, ID, VALUE*); VALUE rb_str_intern(VALUE); +st_index_t rb_sym_hash(VALUE); VALUE rb_sym_to_s(VALUE); VALUE rb_str_length(VALUE); long rb_str_offset(VALUE, long); diff --git a/string.c b/string.c index e7f7e86..cc4ff34 100644 --- a/string.c +++ b/string.c @@ -7025,6 +7025,17 @@ sym_equal(VALUE sym1, VALUE sym2) return Qfalse; } +st_index_t +rb_sym_hash(VALUE sym){ + ID id = SYM2ID(sym); + return rb_str_hash(rb_id2str(id)); +} + +static VALUE +rb_sym_hash_m(VALUE sym){ + st_index_t hval = rb_sym_hash(sym); + return INT2FIX(hval); +} static int sym_printable(const char *s, const char *send, rb_encoding *enc) @@ -7508,6 +7519,7 @@ Init_String(void) rb_define_method(rb_cSymbol, "===", sym_equal, 1); rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0); rb_define_method(rb_cSymbol, "to_s", rb_sym_to_s, 0); + rb_define_method(rb_cSymbol, "hash", rb_sym_hash_m, 0); rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0); rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0); rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0); diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index c860b25..b3145e9 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -872,3 +872,255 @@ class TestHash < Test::Unit::TestCase assert_equal({x=>1}.hash, {x=>1}.hash) end end + +class HashToStrHash < Test::Unit::TestCase + def test_strhash + hash = { 'a' => 1, 'b' => 2 } + assert_instance_of StrHash, hash.strhash + assert_equal %w(a b), hash.keys + assert_equal [1,2], hash.values + end +end + +class TestStrHash < Test::Unit::TestCase + def setup + @strings = { 'a' => 1, 'b' => 2 }.strhash + @symbols = { :a => 1, :b => 2 }.strhash + @mixed = { :a => 1, 'b' => 2 }.strhash + @fixnums = { 0 => 1, 1 => 2 }.strhash + end + + def test_inherits_hash + assert_equal Hash, StrHash.superclass + end + + def test_strhash + assert_equal @strings.object_id, @strings.strhash.object_id + assert_instance_of StrHash, { 'a' => 1, 'b' => 2 }.strhash + end + + def test_initialize + strhash = StrHash.new({ 'a' => 1, 'b' => 2 }) + assert_equal 1, strhash[:a] + strhash = StrHash.new + strhash[:a] = 'a' + assert_equal 'a', strhash[:a] + end + + def test_set + array = [{ 'a' => 1, 'b' => 2 }, [:a,:b,:c]] + @strings[:array] = array + assert_instance_of StrHash, @strings[:array].shift + assert_instance_of Array, @strings[:array] = array + @strings[:hash] = { 'a' => 1, 'b' => 2 } + assert_instance_of StrHash, @strings[:hash] + end + + def test_dup + assert_equal @strings, @strings.dup + assert_equal @mixed, @mixed.dup + assert_not_equal @mixed.object_id, @mixed.dup.object_id + end + + def test_keys + assert_equal ["a", "b"], @strings.keys + assert_equal [:a, :b], @symbols.keys + assert_equal [:a, "b"], @mixed.keys + assert_equal [0, 1], @fixnums.keys + end + + def test_values + assert_equal [1, 2], @strings.values + assert_equal [1, 2], @symbols.values + assert_equal [1, 2], @mixed.values + assert_equal [1, 2], @fixnums.values + end + + def test_fetch + assert_equal 1, @strings.fetch('a') + assert_equal 1, @strings.fetch(:a.to_s) + assert_equal 1, @strings.fetch(:a) + end + + def test_key? + assert @strings.key?(:a) + assert @strings.include?('a') + assert @mixed.has_key?('b') + end + + def test_delete + @strings.delete('a') + assert !@strings.key?(:a) + end + + def test_assorted + hashes = { :@strings => @strings, :@symbols => @symbols, :@mixed => @mixed } + method_map = { :'[]' => 1, :fetch => 1, :values_at => [1], + :has_key? => true, :include? => true, :key? => true, + :member? => true } + + hashes.each do |name, hash| + method_map.sort_by { |m| m.to_s }.each do |meth, expected| + assert_equal(expected, hash.__send__(meth, 'a'), + "Calling #{name}.#{meth} 'a'") + assert_equal(expected, hash.__send__(meth, :a), + "Calling #{name}.#{meth} :a") + end + end + + assert_equal [1, 2], @strings.values_at('a', 'b') + assert_equal [1, 2], @strings.values_at(:a, :b) + assert_equal [1, 2], @symbols.values_at('a', 'b') + assert_equal [1, 2], @symbols.values_at(:a, :b) + assert_equal [1, 2], @mixed.values_at('a', 'b') + assert_equal [1, 2], @mixed.values_at(:a, :b) + end + + def test_reading + hash = StrHash.new + hash["a"] = 1 + hash["b"] = true + hash["c"] = false + hash["d"] = nil + + assert_equal 1, hash[:a] + assert_equal true, hash[:b] + assert_equal false, hash[:c] + assert_equal nil, hash[:d] + assert_equal nil, hash[:e] + end + + def test_reading_with_nonnil_default + hash = StrHash.new(1) + hash["a"] = 1 + hash["b"] = true + hash["c"] = false + hash["d"] = nil + + assert_equal 1, hash[:a] + assert_equal true, hash[:b] + assert_equal false, hash[:c] + assert_equal nil, hash[:d] + assert_equal 1, hash[:e] + end + + def test_writing + hash = StrHash.new + hash[:a] = 1 + hash['b'] = 2 + hash[3] = 3 + + assert_equal hash['a'], 1 + assert_equal hash['b'], 2 + assert_equal hash[:a], 1 + assert_equal hash[:b], 2 + assert_equal hash[3], 3 + end + + def test_update + hash = StrHash.new + hash[:a] = 'a' + hash['b'] = 'b' + + updated_with_strings = hash.update(@strings) + updated_with_symbols = hash.update(@symbols) + updated_with_mixed = hash.update(@mixed) + + assert_equal updated_with_strings[:a], 1 + assert_equal updated_with_strings['a'], 1 + assert_equal updated_with_strings['b'], 2 + + assert_equal updated_with_symbols[:a], 1 + assert_equal updated_with_symbols['b'], 2 + assert_equal updated_with_symbols[:b], 2 + + assert_equal updated_with_mixed[:a], 1 + assert_equal updated_with_mixed['b'], 2 + + assert [updated_with_strings, updated_with_symbols, updated_with_mixed].all? { |h| h.keys.size == 2 } + end + + def test_merging + hash = StrHash.new + hash[:a] = 'failure' + hash['b'] = 'failure' + + other = { 'a' => 1, :b => 2 } + + merged = hash.merge(other) + + assert_equal StrHash, merged.class + assert_equal 1, merged[:a] + assert_equal 2, merged['b'] + + hash.update(other) + + assert_equal 1, hash[:a] + assert_equal 2, hash['b'] + end + + def test_deleting + get_hash = proc{ StrHash[ :a => 'foo' ] } + hash = get_hash.call + assert_equal hash.delete(:a), 'foo' + assert_equal hash.delete(:a), nil + hash = get_hash.call + assert_equal hash.delete('a'), 'foo' + assert_equal hash.delete('a'), nil + end + + def test_to_hash + assert_instance_of Hash, @strings.to_hash + assert_equal %w(a b), @strings.to_hash.keys + # Should convert to a Hash with String keys. + assert_equal @strings, @mixed.strhash.to_hash + + # Should preserve the default value. + mixed_with_default = @mixed.dup + mixed_with_default.default = '1234' + roundtrip = mixed_with_default.strhash.to_hash + assert_equal @strings, roundtrip + assert_equal '1234', roundtrip.default + end + + def test_hash_with_array_of_hashes + hash = { "urls" => { "url" => [ { "address" => "1" }, { "address" => "2" } ] }} + strhash = StrHash[hash] + assert_equal "1", strhash[:urls][:url].first[:address] + end + + def test_indifferent_subhashes + h = {'user' => {'id' => 5}}.strhash + ['user', :user].each {|user| [:id, 'id'].each {|id| assert_equal 5, h[user][id], "h[#{user.inspect}][#{id.inspect}] should be 5"}} + + h = {:user => {:id => 5}}.strhash + ['user', :user].each {|user| [:id, 'id'].each {|id| assert_equal 5, h[user][id], "h[#{user.inspect}][#{id.inspect}] should be 5"}} + end + + def test_assorted_keys_not_stringified + original = {Object.new => 2, 1 => 2, [] => true} + indiff = original.strhash + assert(!indiff.keys.any? {|k| k.kind_of? String}, "A key was converted to a string!") + end + + def test_should_use_default_value_for_unknown_key + strhash = StrHash.new(3) + assert_equal 3, strhash[:new_key] + end + + def test_should_use_default_value_if_no_key_is_supplied + strhash = StrHash.new(3) + assert_equal 3, strhash.default + end + + def test_should_nil_if_no_default_value_is_supplied + strhash = StrHash.new + assert_nil strhash.default + end + + def test_should_copy_the_default_value_when_converting_to_hash_with_indifferent_access + hash = Hash.new(3) + strhash = hash.strhash + assert_equal 3, strhash.default + end +end \ No newline at end of file diff --git a/test/ruby/test_symbol.rb b/test/ruby/test_symbol.rb index f402da3..c181b19 100644 --- a/test/ruby/test_symbol.rb +++ b/test/ruby/test_symbol.rb @@ -1,6 +1,13 @@ require 'test/unit' class TestSymbol < Test::Unit::TestCase + def test_hash + assert_instance_of Fixnum, :symbol.hash + assert_equal 'symbol'.hash, :symbol.hash + assert_equal :"!".hash, :"!".hash + assert_not_equal :"$1".hash, :"@@1".hash + end + # [ruby-core:3573] def assert_eval_inspected(sym) |
|
|
[ruby-core:26495] Re: HashWithIndifferentAccess to coreOn Tue, Nov 3, 2009 at 6:12 AM, Urabe Shyouhei <shyouhei@...> wrote:
> Hello, > > I got a pull request for a HashWithIndifferentAccess implementation to merge > into core. I think it's almost OK to make it so (except few minor bugs that can > easily be handled). How about it? I'm also attaching a patch file for people who > are not used to git (mainly nobu). > +++ b/test/ruby/test_hash.rb > +class TestStrHash < Test::Unit::TestCase > + def setup > + @strings = { 'a' => 1, 'b' => 2 }.strhash > + @symbols = { :a => 1, :b => 2 }.strhash > + @mixed = { :a => 1, 'b' => 2 }.strhash > + @fixnums = { 0 => 1, 1 => 2 }.strhash > + end > + > + def test_keys > + assert_equal ["a", "b"], @strings.keys > + assert_equal [:a, :b], @symbols.keys > + assert_equal [:a, "b"], @mixed.keys > + assert_equal [0, 1], @fixnums.keys > + end So judging by the tests, and without going through the implementation in detail. It appears that this proposal goes to lengths to preserve the class of a key in the hash. I'm not sure that this is a real requirement. I contrast consider the current HashWithIndifferentAccess(HWIA) in Rails/ActiveSupport, here's a snippet from a rails console irb session: => {} >> h[:a] = 1 => 1 >> h['b'] = 2 => 2 >> h.keys => ["a", "b"] HWIA converts symbol arguments to keys in the access methods, and stores the keys internally as strings. I think picking a single internal representation for the keys is simpler, and probably a bit faster. The main decision is whether to pick strings or symbols for that internal representation. ActiveSupport picked strings, primarily from what I understand over a concern that symbols once interned can't be GCed and the key values in the Rails use cases come from arbitrary user inputs (e.g. query parameters in URIs). So converting such strings to symbols could be the basis of a DOS attack. As a coincidencs I just posted about this in reply to a thread on the ruby-lang group. I think that if the key class is preserved there are other ramifications to ponder, for example what should h.keys be after h = { 'a' => 1, 'b' => 2 }.strhash h[:a] = 3 I think that any of ['a', 'b'] or [:a, 'b'] or ['b', :a] all of which might seem to be plausible answers, would surprise someone, and that the POLS would be better preserved by conversion to a consistent class of key. If the current proposal were accepted, I'm not sure that it would be useful to Rails as a replacement implementation of HWIA, but I'm hoping that Yehuda or one of his cohorts would comment on that. -- Rick DeNatale Blog: http://talklikeaduck.denhaven2.com/ Twitter: http://twitter.com/RickDeNatale WWR: http://www.workingwithrails.com/person/9021-rick-denatale LinkedIn: http://www.linkedin.com/in/rickdenatale |
|
|
[ruby-core:26496] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core" on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei <shyouhei@...> writes: |I got a pull request for a HashWithIndifferentAccess implementation to merge |into core. I think it's almost OK to make it so (except few minor bugs that can |easily be handled). How about it? I'm also attaching a patch file for people who |are not used to git (mainly nobu). I am negative. * the class name is too long, and not clear, at least for me. * it might be useful for some cases, but not so common to make it built-in, I think. * if it's not for built-in, it can be implemented by external library, such as a gem, or active_support. matz. |
|
|
[ruby-core:26499] Re: HashWithIndifferentAccess to coreYukihiro Matsumoto wrote:
> I am negative. > > * the class name is too long, and not clear, at least for me. You mean HashWithIndifferentAccess? That longer name is for an equivalent code from ActiveSupport. The author of this request names his class "StrHash". > * it might be useful for some cases, but not so common to make it > built-in, I think. But that "some" case is actually HUGE. Almost all Rails application uses this (sort of) as if it were a built-in ruby feature. It would be great when this was a true built-in. > * if it's not for built-in, it can be implemented by external > library, such as a gem, or active_support. The author of this request does have a gem for it. http://github.com/methodmissing/hwia |
|
|
[ruby-core:26500] Re: HashWithIndifferentAccess to coreHi,
Appreciate the feedback. At the moment most Ruby web frameworks running on an MRI VM is affected by Symbol's immutability and the risk of DoS attacks ( sizeof(VALUE), 4 bytes ) as mentioned by Rick in an earlier post. I've seen various implementations of this pattern : ActiveSupport::HashWithIndifferentAccess (Rails), Mash (Merb), Sinatra etc. This patch is the result of spending some of an afternoon getting familiar with the VM again in preparation for an upcoming talk and in light of a push for this from Rails core, extracted something cleaner than the mixed 1.8 / 1.9 hwia gem. The gem also includes a lot of code duplication for not having all the required core Hash methods exposed via intern.h I do however understand that just because nested query params happen to map well to a Hash, doesn't mean a core class should be enhanced to support this feature.There's the less obtrusive option of * exposing Symbol#hash == String#hash for "string" == :string * Implement hashing very similar to the identhash scheme in 1.9, hooked through Hash#strhash * extract a UrlParam or similar gem / extension etc. that piggy backs off this feature ( iow. delegate the nested conversion macro elsewhere ) Thoughts ? - Lourens On 2009/11/03, at 16:27, Urabe Shyouhei wrote: > Yukihiro Matsumoto wrote: >> I am negative. >> >> * the class name is too long, and not clear, at least for me. > > You mean HashWithIndifferentAccess? That longer name is for an > equivalent code > from ActiveSupport. The author of this request names his class > "StrHash". > >> * it might be useful for some cases, but not so common to make it >> built-in, I think. > > But that "some" case is actually HUGE. Almost all Rails application > uses this > (sort of) as if it were a built-in ruby feature. It would be great > when this > was a true built-in. > >> * if it's not for built-in, it can be implemented by external >> library, such as a gem, or active_support. > > The author of this request does have a gem for it. > http://github.com/methodmissing/hwia > |
|
|
[ruby-core:26503] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26499] Re: HashWithIndifferentAccess to core" on Wed, 4 Nov 2009 01:27:32 +0900, Urabe Shyouhei <shyouhei@...> writes: |> * the class name is too long, and not clear, at least for me. | |You mean HashWithIndifferentAccess? That longer name is for an equivalent code |from ActiveSupport. The author of this request names his class "StrHash". Yes, but I don't prefer StrHash either, which is shorter but still does not describe the role of the class. |> * it might be useful for some cases, but not so common to make it |> built-in, I think. | |But that "some" case is actually HUGE. Almost all Rails application uses this |(sort of) as if it were a built-in ruby feature. It would be great when this |was a true built-in. With making huge hole for confusion? Putting 1.8 compatibility aside, I believe all 1.9 code should use symbols for such cases, so the needs for HashWithIndifferentAccess (or whatever name) will become less. matz. |
|
|
[ruby-core:26504] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26500] Re: HashWithIndifferentAccess to core" on Wed, 4 Nov 2009 01:51:18 +0900, Lourens Naudé <lourens@...> writes: |* exposing Symbol#hash == String#hash for "string" == :string Please no. Symbols and strings are not same. |* Implement hashing very similar to the identhash scheme in 1.9, |hooked through Hash#strhash Probably. Depends on how much need it would have. |* extract a UrlParam or similar gem / extension etc. that piggy backs |off this feature ( iow. delegate the nested conversion macro elsewhere ) I am sorry but I don't get this one. matz. |
|
|
[ruby-core:26506] Re: HashWithIndifferentAccess to coreYukihiro Matsumoto wrote:
> I am negative. > * the class name is too long, and not clear, at least for me. absolutely. it's a really bad name. [murphy] |
|
|
[ruby-core:26507] Re: HashWithIndifferentAccess to coreOn Tue, Nov 3, 2009 at 6:48 AM, Yukihiro Matsumoto <matz@...> wrote:
> Hi, > > In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core" > on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei <shyouhei@...> writes: > > |I got a pull request for a HashWithIndifferentAccess implementation to merge > |into core. I think it's almost OK to make it so (except few minor bugs that can > |easily be handled). How about it? I'm also attaching a patch file for people who > |are not used to git (mainly nobu). > > I am negative. > > * the class name is too long, and not clear, at least for me. > * it might be useful for some cases, but not so common to make it > built-in, I think. It encapsulates a common pattern: * consume arbitrary key/value data from "outside world," where strings are best choice * work with these data using meaningful keys, where symbols are best choice Such cases are command-line options, database result sets, HTTP header values, etc. Anywhere Ruby works with external data (strings) using some known, consistent semantics (symbols). (The long name is legacy, a class name meant to discourage public use. But it caught on so it's been reused extensively anyway. Any appropriate name is fine.) > * if it's not for built-in, it can be implemented by external > library, such as a gem, or active_support. That's true. It's been implemented several places, usually duplicated code with subtle differences, and usually in pure Ruby. This code is often in the hot path so it's best to be highly performant. Living as a Ruby builtin gives very high speed and unifies the various implementations of this data type. Best, jeremy |
|
|
[ruby-core:26508] Re: HashWithIndifferentAccess to coreOn Tue, Nov 3, 2009 at 9:42 AM, Yukihiro Matsumoto <matz@...> wrote:
> Hi, > > In message "Re: [ruby-core:26499] Re: HashWithIndifferentAccess to core" > on Wed, 4 Nov 2009 01:27:32 +0900, Urabe Shyouhei <shyouhei@...> writes: > > |> * the class name is too long, and not clear, at least for me. > | > |You mean HashWithIndifferentAccess? That longer name is for an equivalent code > |from ActiveSupport. The author of this request names his class "StrHash". > > Yes, but I don't prefer StrHash either, which is shorter but still > does not describe the role of the class. > > |> * it might be useful for some cases, but not so common to make it > |> built-in, I think. > | > |But that "some" case is actually HUGE. Almost all Rails application uses this > |(sort of) as if it were a built-in ruby feature. It would be great when this > |was a true built-in. > > With making huge hole for confusion? Putting 1.8 compatibility aside, > I believe all 1.9 code should use symbols for such cases, so the needs > for HashWithIndifferentAccess (or whatever name) will become less. Ah, your preference is for Ruby 1.9 code to convert external key/value data from String => String to Symbol => String? This does address the core concern of addressing external k/v data with string keys using meaningful symbols. But it's slower: all keys must be converted to symbols up-front rather than lazily, when accessed. jeremy |
|
|
[ruby-core:26513] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26508] Re: HashWithIndifferentAccess to core" on Wed, 4 Nov 2009 05:33:06 +0900, Jeremy Kemper <jeremy@...> writes: |> With making huge hole for confusion? Putting 1.8 compatibility aside, |> I believe all 1.9 code should use symbols for such cases, so the needs |> for HashWithIndifferentAccess (or whatever name) will become less. | |Ah, your preference is for Ruby 1.9 code to convert external key/value |data from String => String to Symbol => String? This does address the |core concern of addressing external k/v data with string keys using |meaningful symbols. But it's slower: all keys must be converted to |symbols up-front rather than lazily, when accessed. Don't take me too serious (although I know it's difficult, since I am the responsible). I have never felt I need HashWithIndifferentAccess in my programming life, so my opinion is biased. You've explained somewhat in [ruby-core:26507], but I still need more comprehension. In general, blurring type differences (string and symbols in this case), is a code smell. matz. |
|
|
[ruby-core:26514] Re: HashWithIndifferentAccess to coreJust a thought: What about implementing this with an option on Hash:new,
rather than as a separate class? That at least would solve the problem with the class name. Regards, Martin. On 2009/11/04 5:27, Jeremy Kemper wrote: > On Tue, Nov 3, 2009 at 6:48 AM, Yukihiro Matsumoto<matz@...> wrote: >> Hi, >> >> In message "Re: [ruby-core:26492] HashWithIndifferentAccess to core" >> on Tue, 3 Nov 2009 20:12:08 +0900, Urabe Shyouhei<shyouhei@...> writes: >> >> |I got a pull request for a HashWithIndifferentAccess implementation to merge >> |into core. I think it's almost OK to make it so (except few minor bugs that can >> |easily be handled). How about it? I'm also attaching a patch file for people who >> |are not used to git (mainly nobu). >> >> I am negative. >> >> * the class name is too long, and not clear, at least for me. >> * it might be useful for some cases, but not so common to make it >> built-in, I think. > > It encapsulates a common pattern: > * consume arbitrary key/value data from "outside world," where > strings are best choice > * work with these data using meaningful keys, where symbols are best choice > > Such cases are command-line options, database result sets, HTTP header > values, etc. Anywhere Ruby works with external data (strings) using > some known, consistent semantics (symbols). > > (The long name is legacy, a class name meant to discourage public use. > But it caught on so it's been reused extensively anyway. Any > appropriate name is fine.) > >> * if it's not for built-in, it can be implemented by external >> library, such as a gem, or active_support. > > That's true. It's been implemented several places, usually duplicated > code with subtle differences, and usually in pure Ruby. This code is > often in the hot path so it's best to be highly performant. Living as > a Ruby builtin gives very high speed and unifies the various > implementations of this data type. > > Best, > jeremy > > -- #-# Martin J. Dürst, Professor, Aoyama Gakuin University #-# http://www.sw.it.aoyama.ac.jp mailto:duerst@... |
|
|
[ruby-core:26522] Re: HashWithIndifferentAccess to coreHi,
2009/11/4 "Martin J. Durst" <duerst@...>: > Just a thought: What about implementing this with an option on Hash:new, > rather than as a separate class? That at least would solve the problem with > the class name. This sounds good if HWIA is really imported. How about "Hash#compare_as_string_key" following in "Hash#compare_by_indentity"? h = {} h.compare_as_string_key h[:a] = :foo h["b"] = :bar p h["a"] #=> :foo p h[:b] #=> :bar p h #=> {:a=>:foo, "b"=>:bar} h[1] = 1 #=> neither string nor symbol key in strhash (RuntimeError) # (should it be assumed as "1" by using to_s?) Here is a patch (I didn't refer the original patch, sorry): diff --git a/hash.c b/hash.c index ef8b9a8..e26312c 100644 --- a/hash.c +++ b/hash.c @@ -99,6 +99,44 @@ rb_any_hash(VALUE a) return (st_index_t)RSHIFT(hnum, 1); } +static int +rb_strsym_cmp(VALUE a, VALUE b) +{ + if (a == b) return 0; + switch (TYPE(a)) { + case T_SYMBOL: + a = rb_id2str(SYM2ID(a)); + case T_STRING: + switch (TYPE(b)) { + case T_SYMBOL: + b = rb_id2str(SYM2ID(b)); + case T_STRING: + return rb_str_hash_cmp(a, b); + } + } + rb_raise(rb_eRuntimeError, "not string nor symbol key in strhash"); +} + +static st_index_t +rb_strsym_hash(VALUE a) +{ + VALUE hval; + st_index_t hnum; + + switch (TYPE(a)) { + case T_SYMBOL: + a = rb_id2str(SYM2ID(a)); + case T_STRING: + hnum = rb_str_hash(a); + break; + + default: + rb_raise(rb_eRuntimeError, "neither string nor symbol key in strhash"); + } + hnum <<= 1; + return (st_index_t)RSHIFT(hnum, 1); +} + static const struct st_hash_type objhash = { rb_any_cmp, rb_any_hash, @@ -109,6 +147,11 @@ static const struct st_hash_type identhash = { st_numhash, }; +static const struct st_hash_type strsymhash = { + rb_strsym_cmp, + rb_strsym_hash, +}; + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -1852,6 +1895,40 @@ rb_hash_compare_by_id_p(VALUE hash) return Qfalse; } +/* + * call-seq: + * hsh.compare_as_string_key => hsh + * + * TBD + */ + +static VALUE +rb_hash_compare_as_string_key(VALUE hash) +{ + rb_hash_modify(hash); + RHASH(hash)->ntbl->type = &strsymhash; + rb_hash_rehash(hash); + return hash; +} + +/* + * call-seq: + * hsh.compare_as_string_key? => true or false + * + * TBD + */ + +static VALUE +rb_hash_compare_as_string_key_p(VALUE hash) +{ + if (!RHASH(hash)->ntbl) + return Qfalse; + if (RHASH(hash)->ntbl->type == &strsymhash) { + return Qtrue; + } + return Qfalse; +} + static int path_tainted = -1; static char **origenviron; @@ -2715,6 +2792,8 @@ Init_Hash(void) rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + rb_define_method(rb_cHash,"compare_as_string_key", rb_hash_compare_as_string_key, 0); + rb_define_method(rb_cHash,"compare_as_string_key?", rb_hash_compare_as_string_key_p, 0); origenviron = environ; envtbl = rb_obj_alloc(rb_cObject); -- Yusuke ENDOH <mame@...> |
|
|
[ruby-core:26555] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26522] Re: HashWithIndifferentAccess to core" on Wed, 4 Nov 2009 12:50:12 +0900, Yusuke ENDOH <mame@...> writes: |How about "Hash#compare_as_string_key" following in |"Hash#compare_by_indentity"? It's much better than HashWithIndifferentAccess. matz. |
|
|
[ruby-core:26584] Re: HashWithIndifferentAccess to core2009/11/6 Yukihiro Matsumoto <matz@...>:
> |How about "Hash#compare_as_string_key" following in > |"Hash#compare_by_indentity"? > > It's much better than HashWithIndifferentAccess. I agree. I'm strongly against StrHash itself because of the reasons which matz mentioned. String is not Symbol, Symbol is not String. It is useful in a particular case but Ruby is a general purpose language. When you need different-access, you can implement "your language" on top of Ruby like HashWithDifferentAccess. This is a good point of Ruby. compare_as_string_key is much more acceptable. But it still seems to be just for Rails. How about Hash::Type class as an API for rb_hash_type? It means, generalizing compare_by_identity and HashWithDifferentAccess to strategy object that represents how to compare the keys. Thanks, -- Yuki Sonoda (Yugui) |
|
|
[ruby-core:26589] Re: HashWithIndifferentAccess to coreHi,
In message "Re: [ruby-core:26584] Re: HashWithIndifferentAccess to core" on Sat, 7 Nov 2009 20:06:07 +0900, Yugui <yugui@...> writes: |How about Hash::Type class as an API for rb_hash_type? Depends on API, and performance. matz. |
|
|
[ruby-core:26593] Re: HashWithIndifferentAccess to coreHi,
I've been toying with allowing custom hashing from the Ruby side for the past 2 days and have a semi-functional version (patch attached against subversion trunk 25671 - http://gist.github.com/228773), however my lack of VM control frame knowledge pretty much blocks progress atm. Experimental API : Hash#index_with(Object) The general expected contract being the object implements 2 methods, hash(a) and compare(a,b) Here's a rough HashWithIndifferentAccess ( referenced as "hwia" moving forward) implementation : module Hwia def hash(obj) case obj when Symbol, String obj.to_s.hash else obj.hash end end def compare(a,b) if String === a && Symbol === b || String === b && Symbol === a a.to_s <=> b.to_s else a <=> b end end extend self end h = { "foo" => "bar" } h.index_with(Hwia) h[:foo] # "bar" Hash#custom_index? #=> true | false Hash#indexed_with #=> Object Given the arguments accepted by st_hash_type members being restricted to 1 and 2 arguments respectively, I opted for falling back to the current ruby control frame after consulting with Aman Gupta, who also uses this in perftools.rb to get to self : rb_thread_t *th = GET_THREAD(); VALUE hash = th->cfp->self; Which works OK initially, but results in 2) Error: test_index_with(TestHash): TypeError: wrong argument type TestHash (expected Hash) test/ruby/test_hash.rb:906:in `test_index_with' after passing through rb_hash_aref in : h.index_with(Hwia) assert_equal Hwia, h.indexed_with assert h.custom_index? h["foo"] # fails as th->cfp->self becomes TestHash in that scope. Any thoughts / feedback much appreciated.Apologies for the naming confusion in the original mailing list post as well. The current source tree is available at : http://github.com/methodmissing/ruby/tree/custom_hashing - Lourens methodmissing:ruby lourens$ git diff --patch-with-stat github/ trunk..HEAD gc.c | 1 + hash.c | 70 +++++++++++++++++++++++++++++++++++++++ +++++++- include/ruby/ruby.h | 2 + test/ruby/test_hash.rb | 36 ++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 2 deletions(-) diff --git a/gc.c b/gc.c index 7711101..55cdb85 100644 --- a/gc.c +++ b/gc.c @@ -1697,6 +1697,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr, int lev) case T_HASH: mark_hash(objspace, obj->as.hash.ntbl, lev); + gc_mark(objspace, obj->as.hash.index_with, lev); ptr = obj->as.hash.ifnone; goto again; diff --git a/hash.c b/hash.c index ef8b9a8..66280b6 100644 --- a/hash.c +++ b/hash.c @@ -14,6 +14,7 @@ #include "ruby/ruby.h" #include "ruby/st.h" #include "ruby/util.h" +#include "vm_core.h" #ifdef __APPLE__ #include <crt_externs.h> @@ -33,7 +34,7 @@ rb_hash_freeze(VALUE hash) VALUE rb_cHash; static VALUE envtbl; -static ID id_hash, id_yield, id_default; +static ID id_hash, id_yield, id_default, id_compare; static int rb_any_cmp(VALUE a, VALUE b) @@ -99,11 +100,37 @@ rb_any_hash(VALUE a) return (st_index_t)RSHIFT(hnum, 1); } +static int +rb_custom_cmp(VALUE a, VALUE b) +{ + rb_thread_t *th = GET_THREAD(); + VALUE hash = th->cfp->self; + Check_Type(hash, T_HASH); + return FIX2INT(rb_funcall(RHASH(hash)->index_with, id_compare, 2, a, b)); +} + +static st_index_t +rb_custom_hash(VALUE a) +{ + st_index_t hnum; + rb_thread_t *th = GET_THREAD(); + VALUE hash = th->cfp->self; + Check_Type(hash, T_HASH); + hnum = FIX2LONG(rb_funcall(RHASH(hash)->index_with, id_hash, 1, a)); + hnum <<= 1; + return (st_index_t)RSHIFT(hnum, 1); +} + static const struct st_hash_type objhash = { rb_any_cmp, rb_any_hash, }; +static const struct st_hash_type customhash = { + rb_custom_cmp, + rb_custom_hash, +}; + static const struct st_hash_type identhash = { st_numcmp, st_numhash, @@ -219,6 +246,7 @@ hash_alloc(VALUE klass) OBJSETUP(hash, klass, T_HASH); hash->ifnone = Qnil; + hash->index_with = Qnil; return (VALUE)hash; } @@ -241,6 +269,7 @@ rb_hash_dup(VALUE hash) FL_SET(ret, HASH_PROC_DEFAULT); } ret->ifnone = RHASH(hash)->ifnone; + ret->index_with = RHASH(hash)->index_with; return (VALUE)ret; } @@ -492,7 +521,6 @@ VALUE rb_hash_aref(VALUE hash, VALUE key) { VALUE val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { return rb_funcall(hash, id_default, 1, key); } @@ -1080,6 +1108,7 @@ rb_hash_replace(VALUE hash, VALUE hash2) } rb_hash_foreach(hash2, replace_i, hash); RHASH(hash)->ifnone = RHASH(hash2)->ifnone; + RHASH(hash)->index_with = RHASH(hash2)->index_with; if (FL_TEST(hash2, HASH_PROC_DEFAULT)) { FL_SET(hash, HASH_PROC_DEFAULT); } @@ -1852,6 +1881,37 @@ rb_hash_compare_by_id_p(VALUE hash) return Qfalse; } +static VALUE +rb_hash_index_with(VALUE hash, VALUE idx) +{ + if (rb_respond_to(idx, id_hash) && rb_respond_to(idx, id_compare)){ + rb_hash_modify(hash); + RHASH(hash)->index_with = idx; + RHASH(hash)->ntbl->type = &customhash; + rb_hash_rehash(hash); + return hash; + }else{ + rb_raise(rb_eRuntimeError, "a custom hashing scheme should implement #hash(a) and #compare(a,b)"); + } +} + +static VALUE +rb_hash_custom_index_p(VALUE hash) +{ + if (!RHASH(hash)->ntbl) + return Qfalse; + if (RHASH(hash)->ntbl->type == &customhash) { + return Qtrue; + } + return Qfalse; +} + +static VALUE +rb_hash_indexed_with(VALUE hash) +{ + return RHASH(hash)->index_with; +} + static int path_tainted = -1; static char **origenviron; @@ -2647,6 +2707,7 @@ Init_Hash(void) id_hash = rb_intern("hash"); id_yield = rb_intern("yield"); id_default = rb_intern("default"); + id_compare = rb_intern("compare"); rb_cHash = rb_define_class("Hash", rb_cObject); @@ -2716,6 +2777,11 @@ Init_Hash(void) rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + rb_define_method(rb_cHash,"index_with", rb_hash_index_with, 1); + rb_define_method(rb_cHash,"indexed_with", rb_hash_indexed_with, 0); + + rb_define_method(rb_cHash,"custom_index?", rb_hash_custom_index_p, 0); +#define RHASH_IDX_WITH(h) (RHASH(h)->index_with) #define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl- >num_entries : 0) #define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0) diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index c860b25..4e3f7f9 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -871,4 +871,40 @@ class TestHash < Test::Unit::TestCase def o.hash; 2<<100; end assert_equal({x=>1}.hash, {x=>1}.hash) end + + module Hwia + def hash(obj) + case obj + when Symbol, String + obj.to_s.hash + else + obj.hash + end + end + + def compare(a,b) + if String === a && Symbol === b || String === b && Symbol === a + a.to_s <=> b.to_s + else + a <=> b + end + end + extend self + end + + def test_index_with + a = "foo" + assert(!{}.custom_index?) + h = { a => "bar" } + assert_equal nil, h.indexed_with + assert_raises(RuntimeError) do + h.index_with(Object.new) + end + h.index_with(Hwia) + assert_equal Hwia, h.indexed_with + assert h.custom_index? + h["foo"] + #assert_equal "bar", h["foo"] + #assert_equal "bar", h[:foo] + end end On 2009/11/07, at 13:10, Yukihiro Matsumoto wrote: > Hi, > > In message "Re: [ruby-core:26584] Re: HashWithIndifferentAccess to > core" > on Sat, 7 Nov 2009 20:06:07 +0900, Yugui <yugui@...> writes: > > |How about Hash::Type class as an API for rb_hash_type? > > Depends on API, and performance. > > matz. > |
|
|
[ruby-core:26595] Re: HashWithIndifferentAccess to coreI'm not sure that it really makes sense to add any of this to core.
So far the argument goes something like this. Several important ruby based web frameworks have some form of HashWithIndifferentAccess class, Rails, Merb and Sinatra have been mentioned. Did I miss any? Since it seems to be a common use case, perhaps it should be in Ruby core. And implementations have been given which either: store keys in the hash which are the same type as that given as the key parameter to WhateverTheHeckWeCallThisThing#[]= convert the key to a symbol if it's a string convert the key to a string if it's a symbol. Then the arguments start over what to name it. So I've just now looked at Merb, Rails and Sinatra to see what's up. The ONLY difference between Merb and Rails here, as far as I can tell is the name of WhateverTheHeckWeCallThisThing. Rails (actually ActiveSupport) calls it HashWithIndifferent access, Merb (actually the extlib gem) calls it Mash. Other than the name the code appears to be identical, including the initial comment: # This class has dubious semantics and we only have it so that # people can write params[:key] instead of params['key'] # and they get the same value for both keys. And what is the implementation? Basically it 1) overrides the initialize method to convert its keys to strings after being initialized if the parameter to new is a hash. 2) overrides Hash#default(key=nil) to efffectively return self[key.to_s] if the key arg is a symbol and key.to_s is included in the hash. 3) overrides Hash#[]=(key,value) to effectively call super[convert_key(key)] = convert_value(value) where convert_key changes symbols to strings and leaves any other key alone and convert_value converts an ordinary hash value to a HashWithIndifferentAccess/Mash and leaves anything else alone. This is because one of the main uses of this thing is for nested parameters in a http request. 4) overrides any other method which takes a key argument to call convert_key on it before using it. Sinatra, is much more minimal, as one might expect. It doesn't have such a class, rather it defines a Sinatra::Base#indifferent_hash method def indifferent_hash Hash.new {|hash,key| hash[key.to_s] if Symbol === key } end Which simply makes indifferent_hash[:a] return the same value as indifferent_hash['a'] if :a isn't a key in the hash. Sinatra apparently relies on the code which uses these hashes not to use both a symbol and it's string form together for []= Personally I think that as implemented in both Rails/Merb and Sinatra, this is much ado about nothing, It wouldn't really buy much to pull this code down to core whatever you name it, and if something with different semantics, such as some of the proposals on this thread was adopted, it's likely that the frameworks, would ignore it, unless it were given a name which clashed. As for the decision made by all three of these frameworks to consistenly use strings rather than symbols for this, I think it makes sense. When I first realized that HashWIthIndifferentAccess did it this way it didn't make sense to me, since I perceived the motivation to be performance and one a symbol was interned looking up symbol keys should be faster since Symbol#hash is O(1), and String#hash is O(string.length). But doing it the other way, and using Symbols rather than Strings for the actual, stored, keys in the hash has a time and space cost, because every time a String is used as a key in a method, it must be converted to a Symbol, which requires the equivalent of looking it up to see if it has already been interned, and making a copy non-gcable. And taking a laissez-faire approach and storing either strings or symbols and trying to make them come out the same when the Hash is accessed, would seem to require additional overhead for each lookup, as well as introducing ambiguities of specification if both :key and 'key' were used to store in the hash, as I pointed out earlier in this thread. To summarize, I'd vote for leaving things in core as they are, and letting frameworks implement such extensions the way they see fit. -- Rick DeNatale Blog: http://talklikeaduck.denhaven2.com/ Twitter: http://twitter.com/RickDeNatale WWR: http://www.workingwithrails.com/person/9021-rick-denatale LinkedIn: http://www.linkedin.com/in/rickdenatale |
|
|
[ruby-core:26599] Re: HashWithIndifferentAccess to coreOn Sat, Nov 7, 2009 at 10:21 AM, Rick DeNatale <rick.denatale@...> wrote:
> So far the argument goes something like this. > Since it seems to be a common use case, perhaps it should be in Ruby core. This is a motivating factor. Generally (repeating myself from above) it encapsulates a common pattern: * consume arbitrary key/value data from "outside world," where strings are best choice * work with these data using meaningful keys, where symbols are best choice Such cases are command-line options, database result sets, HTTP header values, etc. Anywhere Ruby works with external data (strings) using some known, consistent semantics (symbols). This can be done in pure Ruby and as a separate gem, but living in Ruby itself gives very high speed and unifies the various behaviors of this data type. The particular choice of implementation and name are less important. jeremy |
|
|
[ruby-core:26600] Re: HashWithIndifferentAccess to coreOn Nov 7, 2009, at 1:57 PM, Jeremy Kemper wrote:
> On Sat, Nov 7, 2009 at 10:21 AM, Rick DeNatale <rick.denatale@... > > wrote: >> So far the argument goes something like this. >> Since it seems to be a common use case, perhaps it should be in >> Ruby core. > > This is a motivating factor. Generally (repeating myself from above) > it encapsulates a common pattern: > * consume arbitrary key/value data from "outside world," where > strings are best choice > * work with these data using meaningful keys, where symbols are best > choice > > Such cases are command-line options, database result sets, HTTP header > values, etc. Anywhere Ruby works with external data (strings) using > some known, consistent semantics (symbols). I agree with Jeremy that this happens a lot. It's not just Rails. In fact, we tend to criticize any library that doesn't allow us to use String and Symbol interchangeably as keys. There was a presentation on rufus-tokyo at RubyKaigi this year. Someone mentioned that it didn't do this in the Q and A. The author modified his library shortly after. I can name at least three projects I've used similar code in, all written just this year. I think it's a pretty common occurrence. James Edward Gray II |
| < Prev | 1 - 2 | Next > |
| Free embeddable forum powered by Nabble | Forum Help |