|
View:
New views
20 Messages
—
Rating Filter:
Alert me
|
| < Prev | 1 - 2 - 3 - 4 | Next > |
|
|
hash index improving v3There's minor change against the previous one( http://archives.postgresql.org/pgsql-hackers/2008-07/msg01183.php ).
* merge branch master(Aug 16) into the patch * clean code and make some comment Performance result is here http://wiki.postgresql.org/wiki/Gsoc08-hashindex It seems hash index is a little better on index creation and selection. But maybe it's in the range of noise, I'm not sure. I'd like to try it with a bigger dataset (e.g. table with 10GB) but there is not enough space in my computer. Anyone interest can make a test on a bigger data set. -- Best Regards, Xiao Meng DKERC, Harbin Institute of Technology, China Gtalk: mx.cogito@... MSN: cnEnder@... http://xiaomeng.yo2.cn [hash-v3.patch] *** a/src/backend/access/hash/hash.c --- b/src/backend/access/hash/hash.c *************** *** 129,135 **** hashbuildCallback(Relation index, IndexTuple itup; /* form an index tuple and point it at the heap tuple */ ! itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ --- 129,135 ---- IndexTuple itup; /* form an index tuple and point it at the heap tuple */ ! itup = _hash_form_tuple(index, values,isnull); itup->t_tid = htup->t_self; /* Hash indexes don't index nulls, see notes in hashinsert */ *************** *** 153,160 **** hashbuildCallback(Relation index, /* * hashinsert() -- insert an index tuple into a hash table. * ! * Hash on the index tuple's key, find the appropriate location ! * for the new tuple, and put it there. */ Datum hashinsert(PG_FUNCTION_ARGS) --- 153,160 ---- /* * hashinsert() -- insert an index tuple into a hash table. * ! * Hash on the heap tuple's key, form an index tuple with hash code. ! * Find the appropriate location for the new tuple, and put it there. */ Datum hashinsert(PG_FUNCTION_ARGS) *************** *** 171,177 **** hashinsert(PG_FUNCTION_ARGS) IndexTuple itup; /* generate an index tuple */ ! itup = index_form_tuple(RelationGetDescr(rel), values, isnull); itup->t_tid = *ht_ctid; /* --- 171,177 ---- IndexTuple itup; /* generate an index tuple */ ! itup = _hash_form_tuple(rel, values, isnull); itup->t_tid = *ht_ctid; /* *************** *** 211,218 **** hashgettuple(PG_FUNCTION_ARGS) OffsetNumber offnum; bool res; ! /* Hash indexes are never lossy (at the moment anyway) */ ! scan->xs_recheck = false; /* * We hold pin but not lock on current buffer while outside the hash AM. --- 211,218 ---- OffsetNumber offnum; bool res; ! /* Hash indexes maybe lossy since we store hash code only */ ! scan->xs_recheck = true; /* * We hold pin but not lock on current buffer while outside the hash AM. *** a/src/backend/access/hash/hashinsert.c --- b/src/backend/access/hash/hashinsert.c *************** *** 44,60 **** _hash_doinsert(Relation rel, IndexTuple itup) uint32 hashkey; Bucket bucket; Datum datum; - bool isnull; /* ! * Compute the hash key for the item. We do this first so as not to need ! * to hold any locks while running the hash function. */ if (rel->rd_rel->relnatts != 1) elog(ERROR, "hash indexes support only one index key"); ! datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull); ! Assert(!isnull); ! hashkey = _hash_datum2hashkey(rel, datum); /* compute item size too */ itemsz = IndexTupleDSize(*itup); --- 44,58 ---- uint32 hashkey; Bucket bucket; Datum datum; /* ! * Get the hash key for the item. */ if (rel->rd_rel->relnatts != 1) elog(ERROR, "hash indexes support only one index key"); ! ! datum = _hash_get_datum(itup); ! hashkey = DatumGetUInt32(datum); /* compute item size too */ itemsz = IndexTupleDSize(*itup); *************** *** 197,207 **** _hash_pgaddtup(Relation rel, { OffsetNumber itup_off; Page page; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); ! itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", --- 195,210 ---- { OffsetNumber itup_off; Page page; + Datum datum; + uint32 hashkey; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); ! datum = _hash_get_datum(itup); ! hashkey = DatumGetUInt32(datum); ! itup_off = _hash_binsearch(page, hashkey); ! if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", *** a/src/backend/access/hash/hashpage.c --- b/src/backend/access/hash/hashpage.c *************** *** 348,358 **** _hash_metapinit(Relation rel, double num_tuples) * Determine the target fill factor (in tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about as full * as the user-settable fillfactor parameter says. We can compute it ! * exactly if the index datatype is fixed-width, but for var-width there's ! * some guessing involved. */ ! data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, ! RelationGetDescr(rel)->attrs[0]->atttypmod); item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width; --- 348,356 ---- * Determine the target fill factor (in tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about as full * as the user-settable fillfactor parameter says. We can compute it ! * exactly since the index datatype (i.e. hash code of uint32 ) is fixed-width. */ ! data_width = 4; item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width; *************** *** 785,791 **** _hash_splitbucket(Relation rel, OffsetNumber omaxoffnum; Page opage; Page npage; ! TupleDesc itupdesc = RelationGetDescr(rel); /* * It should be okay to simultaneously write-lock pages from each bucket, --- 783,789 ---- OffsetNumber omaxoffnum; Page opage; Page npage; ! /* * It should be okay to simultaneously write-lock pages from each bucket, *************** *** 848,861 **** _hash_splitbucket(Relation rel, /* * Re-hash the tuple to determine which bucket it now belongs in. * ! * It is annoying to call the hash function while holding locks, but ! * releasing and relocking the page for each tuple is unappealing too. */ itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum)); ! datum = index_getattr(itup, 1, itupdesc, &null); ! Assert(!null); ! ! bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum), maxbucket, highmask, lowmask); if (bucket == nbucket) --- 846,856 ---- /* * Re-hash the tuple to determine which bucket it now belongs in. * ! * We needn't call hash function since we store hash code in the index tuple */ itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum)); ! datum = _hash_get_datum(itup); ! bucket = _hash_hashkey2bucket(DatumGetUInt32(datum), maxbucket, highmask, lowmask); if (bucket == nbucket) *** a/src/backend/access/hash/hashsearch.c --- b/src/backend/access/hash/hashsearch.c *************** *** 178,183 **** _hash_first(IndexScanDesc scan, ScanDirection dir) --- 178,184 ---- hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); + so->hashso_sk_hash = hashkey; /* * Acquire shared split lock so we can compute the target bucket safely * (see README). *************** *** 289,370 **** _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ ! do ! { ! switch (dir) { ! case ForwardScanDirection: ! if (offnum != InvalidOffsetNumber) ! offnum = OffsetNumberNext(offnum); /* move forward */ ! else ! offnum = FirstOffsetNumber; /* new page */ ! ! while (offnum > maxoff) ! { ! /* ! * either this page is empty (maxoff == ! * InvalidOffsetNumber) or we ran off the end. ! */ ! _hash_readnext(rel, &buf, &page, &opaque); ! if (BufferIsValid(buf)) ! { ! maxoff = PageGetMaxOffsetNumber(page); ! offnum = FirstOffsetNumber; ! } ! else ! { ! /* end of bucket */ ! maxoff = offnum = InvalidOffsetNumber; ! break; /* exit while */ ! } ! } ! break; ! ! case BackwardScanDirection: ! if (offnum != InvalidOffsetNumber) ! offnum = OffsetNumberPrev(offnum); /* move back */ ! else ! offnum = maxoff; /* new page */ ! ! while (offnum < FirstOffsetNumber) ! { ! /* ! * either this page is empty (offnum == ! * InvalidOffsetNumber) or we ran off the end. ! */ ! _hash_readprev(rel, &buf, &page, &opaque); ! if (BufferIsValid(buf)) ! maxoff = offnum = PageGetMaxOffsetNumber(page); ! else ! { ! /* end of bucket */ ! maxoff = offnum = InvalidOffsetNumber; ! break; /* exit while */ ! } ! } ! break; ! ! default: ! /* NoMovementScanDirection */ ! /* this should not be reached */ ! break; } ! /* we ran off the end of the world without finding a match */ ! if (offnum == InvalidOffsetNumber) { ! *bufP = so->hashso_curbuf = InvalidBuffer; ! ItemPointerSetInvalid(current); ! return false; } ! ! /* get ready to check this tuple */ ! itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); ! } while (!_hash_checkqual(scan, itup)); ! ! /* if we made it to here, we've found a valid tuple */ ! blkno = BufferGetBlockNumber(buf); ! *bufP = so->hashso_curbuf = buf; ! ItemPointerSet(current, blkno, offnum); ! return true; } --- 290,340 ---- * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ ! for (;;) ! { ! if (offnum == InvalidOffsetNumber) ! { ! /* ! * This is the first time we're scanning this particular ! * page of the bucket, so jump to the right spot via ! * binary search. ! */ ! offnum = _hash_binsearch(page, so->hashso_sk_hash); ! } ! else { ! /* Advance to the next tuple */ ! offnum = OffsetNumberNext(offnum); } ! if (offnum <= maxoff) itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); ! if (offnum <= maxoff && _hash_checkqual(scan, itup)) { ! /* Found a matching tuple */ ! *bufP = so->hashso_curbuf = buf; ! ItemPointerSet(current, BufferGetBlockNumber(buf), offnum); ! return true; } ! else ! { ! /* No more matches on this page, so go on to next page */ ! if (ScanDirectionIsForward(dir)) ! _hash_readnext(rel, &buf, &page, &opaque); ! else ! _hash_readprev(rel, &buf, &page, &opaque); ! ! if (BufferIsValid(buf)) ! { ! maxoff = PageGetMaxOffsetNumber(page); ! offnum = InvalidOffsetNumber; ! } ! else ! { ! /* Ran out of pages, so we're done */ ! *bufP = so->hashso_curbuf = InvalidBuffer; ! ItemPointerSetInvalid(current); ! return false; ! } ! } ! } } *** a/src/backend/access/hash/hashutil.c --- b/src/backend/access/hash/hashutil.c *************** *** 20,53 **** #include "executor/execdebug.h" #include "storage/bufmgr.h" #include "utils/lsyscache.h" /* * _hash_checkqual -- does the index tuple satisfy the scan conditions? */ bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { - TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); ScanKey key = scan->keyData; int scanKeySize = scan->numberOfKeys; IncrIndexProcessed(); while (scanKeySize > 0) { - Datum datum; - bool isNull; Datum test; ! ! datum = index_getattr(itup, ! key->sk_attno, ! tupdesc, ! &isNull); /* assume sk_func is strict */ - if (isNull) - return false; if (key->sk_flags & SK_ISNULL) return false; --- 20,59 ---- #include "executor/execdebug.h" #include "storage/bufmgr.h" #include "utils/lsyscache.h" + #include "utils/memutils.h" + #include "catalog/pg_type.h" /* + * hardcoded tuple descriptors. see include/access/hash.h + */ + static FormData_pg_attribute Desc_hash[1] = {Schema_Hash}; + /* * _hash_checkqual -- does the index tuple satisfy the scan conditions? */ bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup) { ScanKey key = scan->keyData; int scanKeySize = scan->numberOfKeys; + Datum datum; + HashScanOpaque so = scan->opaque; IncrIndexProcessed(); + + datum = _hash_get_datum(itup); + if( so->hashso_sk_hash != DatumGetUInt32(datum) ) + return false; + key++; + scanKeySize--; while (scanKeySize > 0) { Datum test; ! ! datum = _hash_get_datum(itup); /* assume sk_func is strict */ if (key->sk_flags & SK_ISNULL) return false; *************** *** 221,223 **** hashoptions(PG_FUNCTION_ARGS) --- 227,326 ---- PG_RETURN_BYTEA_P(result); PG_RETURN_NULL(); } + + /* + * _get_hash_desc - get the hash index tuple descriptor + * + * The hash index tuple descriptor is a hard-coded tuple descriptor with only int32 attribute. + */ + TupleDesc _get_hash_desc() + { + static TupleDesc hashdesc = NULL; + + /* Already done? */ + if (hashdesc == NULL){ + /* + * It's the same with BuildHardcodedDescriptor() in relcache.c to build + * a hard-coded tuple descriptor. + */ + MemoryContext oldcxt; + + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + + hashdesc = CreateTemplateTupleDesc(1, false); + hashdesc->tdtypeid = INT4OID; + hashdesc->tdtypmod = -1; + memcpy(hashdesc->attrs[0], &Desc_hash[0], ATTRIBUTE_TUPLE_SIZE); + hashdesc->attrs[0]->attcacheoff = 0; + + MemoryContextSwitchTo(oldcxt); + } + + return hashdesc; + + } + + /* + * _hash_get_datum - get the hash index tuple's first column datum (i.e. hash code) + */ + Datum _hash_get_datum(IndexTuple itup) + { + return fetch_att( (char *) (itup) + IndexInfoFindDataOffset((itup)->t_info) , + true, + 4); + + } + /* + * _hash_form_tuple - form a tuple with hash code only + */ + IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull) + { + TupleDesc hashdesc; + IndexTuple itup; + uint32 hashkey; + + hashdesc = _get_hash_desc(); + hashkey = _hash_datum2hashkey(rel, values[0]); + values[0] = UInt32GetDatum(hashkey); + itup = index_form_tuple(hashdesc, values, isnull); + return itup; + } + + /* + * _hash_binsearch - Return the offset number in the page where the specified hash value + * should be located. + * + * The return value might exceed the page's max offset + * if the hash value is greater than any hash in the page. + */ + OffsetNumber + _hash_binsearch(Page page, uint32 hash_value) + { + OffsetNumber upper; + OffsetNumber lower; + + upper = PageGetMaxOffsetNumber(page) + 1; + lower = FirstOffsetNumber; + + while (upper > lower) + { + IndexTuple pos; + OffsetNumber off; + uint32 hashkey; + Datum datum; + bool isNull; + + off = (upper + lower) / 2; + Assert(OffsetNumberIsValid(off)); + + pos = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); + datum = _hash_get_datum(pos); + hashkey = DatumGetUInt32(datum); + if (hashkey < hash_value) + lower = off + 1; + else + upper = off; + } + + return upper; + } *** a/src/backend/utils/sort/tuplesort.c --- b/src/backend/utils/sort/tuplesort.c *************** *** 456,465 **** static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, --- 456,468 ---- static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state); static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); + static void copytup_index_hash(Tuplesortstate *state, SortTuple *stup, void *tup); static void writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup); static void readtup_index(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len); + static void readtup_index_hash(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); static void reversedirection_index_btree(Tuplesortstate *state); static void reversedirection_index_hash(Tuplesortstate *state); static int comparetup_datum(const SortTuple *a, const SortTuple *b, *************** *** 682,690 **** tuplesort_begin_index_hash(Relation indexRel, state->nKeys = 1; /* Only one sort column, the hash code */ state->comparetup = comparetup_index_hash; ! state->copytup = copytup_index; state->writetup = writetup_index; ! state->readtup = readtup_index; state->reversedirection = reversedirection_index_hash; state->indexRel = indexRel; --- 685,693 ---- state->nKeys = 1; /* Only one sort column, the hash code */ state->comparetup = comparetup_index_hash; ! state->copytup = copytup_index_hash; state->writetup = writetup_index; ! state->readtup = readtup_index_hash; state->reversedirection = reversedirection_index_hash; state->indexRel = indexRel; *************** *** 2822,2830 **** comparetup_index_hash(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) { /* ! * It's slightly annoying to redo the hash function each time, although ! * most hash functions ought to be cheap. Is it worth having a variant ! * tuple storage format so we can store the hash code? */ uint32 hash1; uint32 hash2; --- 2825,2831 ---- Tuplesortstate *state) { /* ! * we needn't redo hash function each time since we've stored it. */ uint32 hash1; uint32 hash2; *************** *** 2836,2845 **** comparetup_index_hash(const SortTuple *a, const SortTuple *b, /* Compute hash codes and mask off bits we don't want to sort by */ Assert(!a->isnull1); ! hash1 = DatumGetUInt32(FunctionCall1(state->hash_proc, a->datum1)) & state->hash_mask; Assert(!b->isnull1); ! hash2 = DatumGetUInt32(FunctionCall1(state->hash_proc, b->datum1)) & state->hash_mask; if (hash1 > hash2) --- 2837,2846 ---- /* Compute hash codes and mask off bits we don't want to sort by */ Assert(!a->isnull1); ! hash1 = DatumGetUInt32(a->datum1) & state->hash_mask; Assert(!b->isnull1); ! hash2 = DatumGetUInt32(b->datum1) & state->hash_mask; if (hash1 > hash2) *************** *** 2894,2899 **** copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) --- 2895,2916 ---- } static void + copytup_index_hash(Tuplesortstate *state, SortTuple *stup, void *tup) + { + IndexTuple tuple = (IndexTuple) tup; + unsigned int tuplen = IndexTupleSize(tuple); + IndexTuple newtuple; + + /* copy the tuple into sort storage */ + newtuple = (IndexTuple) palloc(tuplen); + memcpy(newtuple, tuple, tuplen); + USEMEM(state, GetMemoryChunkSpace(newtuple)); + stup->tuple = (void *) newtuple; + /* set up first-column key value(i.e. hash code) */ + stup->datum1 = _hash_get_datum(newtuple); + stup->isnull1 = false; + } + static void writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) { IndexTuple tuple = (IndexTuple) stup->tuple; *************** *** 2936,2941 **** readtup_index(Tuplesortstate *state, SortTuple *stup, --- 2953,2979 ---- } static void + readtup_index_hash(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) + { + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) palloc(tuplen); + + USEMEM(state, GetMemoryChunkSpace(tuple)); + if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple, + tuplen) != tuplen) + elog(ERROR, "unexpected end of data"); + if (state->randomAccess) /* need trailing length word? */ + if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen, + sizeof(tuplen)) != sizeof(tuplen)) + elog(ERROR, "unexpected end of data"); + stup->tuple = (void *) tuple; + /* set up first-column key value(i.e. hash code) */ + stup->datum1 = _hash_get_datum(tuple); + stup->isnull1 = false; + } + + static void reversedirection_index_btree(Tuplesortstate *state) { ScanKey scanKey = state->indexScanKey; *** a/src/include/access/hash.h --- b/src/include/access/hash.h *************** *** 100,105 **** typedef struct HashScanOpaqueData --- 100,107 ---- /* Current and marked position of the scan */ ItemPointerData hashso_curpos; ItemPointerData hashso_mrkpos; + /* Hash value of the scan key */ + uint32 hashso_sk_hash; } HashScanOpaqueData; typedef HashScanOpaqueData *HashScanOpaque; *************** *** 227,233 **** typedef HashMetaPageData *HashMetaPage; */ #define HASHPROC 1 ! /* public routines */ extern Datum hashbuild(PG_FUNCTION_ARGS); --- 229,239 ---- */ #define HASHPROC 1 ! /* ! * hard-coded hash desc ! */ ! #define Schema_Hash \ ! { 0, {"hashcode"}, 23, -1, 4, 1, 0, -1, -1, true, 'p', 'i', false, false, false, true, 0 } /* public routines */ extern Datum hashbuild(PG_FUNCTION_ARGS); *************** *** 330,335 **** extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, --- 336,345 ---- uint32 highmask, uint32 lowmask); extern uint32 _hash_log2(uint32 num); extern void _hash_checkpage(Relation rel, Buffer buf, int flags); + extern TupleDesc _get_hash_desc(); + extern Datum _hash_get_datum(IndexTuple itup); + extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull); + OffsetNumber _hash_binsearch(Page page, uint32 hash_value); /* hash.c */ extern void hash_redo(XLogRecPtr lsn, XLogRecord *record); -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3With the help of David Fetter, you can get the copy by
git clone http://git.postgresql.org/git/~davidfetter/hash/.git It's in the branch gsoc-hash. Thank you, David. -- Best Regards, Xiao Meng DKERC, Harbin Institute of Technology, China Gtalk: mx.cogito@... MSN: cnEnder@... http://xiaomeng.yo2.cn |
|
|
Re: hash index improving v3On Mon, 2008-08-18 at 09:46 +0800, Xiao Meng wrote: > There's minor change against the previous > one( http://archives.postgresql.org/pgsql-hackers/2008-07/msg01183.php ). > * merge branch master(Aug 16) into the patch > * clean code and make some comment > Performance result is here > http://wiki.postgresql.org/wiki/Gsoc08-hashindex > > It seems hash index is a little better on index creation and > selection. > But maybe it's in the range of noise, I'm not sure. > I'd like to try it with a bigger dataset (e.g. table with 10GB) but > there is not enough space in my computer. > Anyone interest can make a test on a bigger data set. You don't give the text of the query used to do these performance tests, so I can't validate your test results. Right now it seems strange that the index is larger than a btree, yet the performance tests show that 3 times as much I/O was used accessing the btree. -- Simon Riggs www.2ndQuadrant.com PostgreSQL Training, Services and Support -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Wed, Sep 3, 2008 at 10:06 PM, Simon Riggs <simon@...> wrote:
>> It seems hash index is a little better on index creation and >> selection. >> But maybe it's in the range of noise, I'm not sure. >> I'd like to try it with a bigger dataset (e.g. table with 10GB) but >> there is not enough space in my computer. >> Anyone interest can make a test on a bigger data set. I tried it earlier on a 500M row table and found a few bugs. In particular, it doesn't seem like recheck is happening and the performance/sizing is a bit *interesting*. I'll post stats tomorrow when I'm in the office. -- Jonah H. Harris, Senior DBA myYearbook.com -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3Simon Riggs <simon@...> writes:
> Right now it seems strange that the index is larger than a btree, yet > the performance tests show that 3 times as much I/O was used accessing > the btree. Well, in an ideal world a hash index probe is O(1) while a btree probe is O(log N), so that result is exactly what hash proponents would hope for. Whether it's real or not is another question, but it could be. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3I performed code review and see my comments.
pgsql/src/backend/access/hash/hashpage.c <http://reviewdemo.postgresql.org/r/26/#comment31> use sizeof() or something better the 4. pgsql/src/backend/access/hash/hashpage.c <http://reviewdemo.postgresql.org/r/26/#comment32> New empty line. pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment27> It would be better remove #define from hash.h and setup it there directly. pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment28> Why not return directly uint32? pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment22> Retype to correct return type. pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment29> Whats about null values? pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment30> I'm not sure if values modification is safe. Please, recheck. pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment23> Return value is not much clear. I prefer to return InvalidOffset when no record is found. However it seems that you use result also for PageAddItem to put item on correct ordered position. I think better explanation should help to understand how it works. pgsql/src/backend/access/hash/hashutil.c <http://reviewdemo.postgresql.org/r/26/#comment26> It could return FirstOffset number in case when nothing interesting is on the page. pgsql/src/include/access/hash.h <http://reviewdemo.postgresql.org/r/26/#comment34> Why not define new datatype for example HashKey instead of uint32? pgsql/src/include/access/hash.h <http://reviewdemo.postgresql.org/r/26/#comment33> It is not good place. See my other comment. -------------- You also forgot to bump hash index version in meta page. Zdenek -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3Zdenek Kotala <Zdenek.Kotala@...> writes:
> I performed code review and see my comments. Thanks for the comments. I've incorporated all of these into an updated patch that I'm preparing, except for > Why not define new datatype for example HashKey instead of uint32? This seems like a good idea, but I think we should do it as a separate, cosmetic-cleanup patch. It'll touch a lot of parts of access/hash/ that the current patch doesn't need to change, and thus complicate reviewing. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3Zdenek Kotala <Zdenek.Kotala@...> writes:
> pgsql/src/backend/access/hash/hashutil.c > <http://reviewdemo.postgresql.org/r/26/#comment27> > It would be better remove #define from hash.h and setup it there > directly. Actually, I don't like this aspect of the patch one bit: it means that the system catalogs are lying about what is stored in the index, which seems likely to break something somewhere, either now or down the road. I think the correct way to handle this is to make the pg_attribute entries (and hence the index's relation descriptor) accurately match what is stored in the index. For testing purposes I propose this crude hack in catalog/index.c's ConstructTupleDescriptor(): *** src/backend/catalog/index.c.orig Mon Aug 25 18:42:32 2008 --- src/backend/catalog/index.c Thu Sep 4 16:20:12 2008 *************** *** 133,138 **** --- 133,139 ---- Form_pg_attribute to = indexTupDesc->attrs[i]; HeapTuple tuple; Form_pg_type typeTup; + Form_pg_opclass opclassTup; Oid keyType; if (atnum != 0) *************** *** 240,246 **** if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for opclass %u", classObjectId[i]); ! keyType = ((Form_pg_opclass) GETSTRUCT(tuple))->opckeytype; ReleaseSysCache(tuple); if (OidIsValid(keyType) && keyType != to->atttypid) --- 241,252 ---- if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for opclass %u", classObjectId[i]); ! opclassTup = (Form_pg_opclass) GETSTRUCT(tuple); ! /* HACK: make hash always use int4 as storage (really it's uint32) */ ! if (opclassTup->opcmethod == HASH_AM_OID) ! keyType = INT4OID; ! else ! keyType = opclassTup->opckeytype; ReleaseSysCache(tuple); if (OidIsValid(keyType) && keyType != to->atttypid) Assuming the patch gets accepted, we should devise some cleaner way of letting index AMs adjust their indexes' reldescs; maybe declare a new entry point column in pg_am that lets the AM modify the tupledesc constructed by this function before it gets used to create the index. But that is irrelevant to performance testing, so I'm not going to do it right now. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3Here is an updated patch incorporating Zdenek's review, my own
observation that we should make the index tupledesc tell the truth, and some other fixes/improvements such as making backwards scans work as expected. The main thing lacking before this could be committed, from a code standpoint, is a cleaner solution to the problem of adjusting the index tupledesc (see the ugly hack in catalog/index.c). However, that complaint is irrelevant for functionality or performance testing, so I'm throwing this back out there in hopes someone will do some... I thought a little bit about how to extend this to store both hashcode and original index key, and realized that the desire to have a truthful index tupledesc makes that a *whole* lot harder. The planner, and really even the pg_index catalog representation, assume that the visible columns of an index are one-for-one with the index keys. We can slide through with the attached patch because this is still true --- effectively we're just using a "storage type" different from the indexed column's type for hash indexes, as already works for GIST and GIN. But having two visible columns would bollix up quite a lot of stuff. So I think if we actually want to do that, we'd need to revert to the concept of cheating on the tupledesc. Aside from the various uglinesses that I was able to remove from the original patch by not having that, I'm still quite concerned that we'd find something else wrong with doing that, further down the road. So my thinking right now is that we should just test this patch as-is. If it doesn't show really horrid performance when there are lots of hash key collisions, we should forget the store-both-things idea and just go with this. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Thu, Sep 4, 2008 at 5:11 PM, Tom Lane <tgl@...> wrote:
> So my thinking right now is that we should just test this patch as-is. > If it doesn't show really horrid performance when there are lots of > hash key collisions, we should forget the store-both-things idea and > just go with this. Ok let me know if this is to naive of an approach or not hitting the right cases you want tested. create table hash_a (same text, uniq text); insert into hash_a (same, uniq) select 'same', n from generate_series(0, 5000) as n; create table hash_b (uniq text); insert into hash_b (uniq) select n from generate_series(5000, 10000) as n; pgbench -c 1 -t 100 -n -f of the following hash_same.sql: set enable_seqscan to off; set enable_mergejoin to off; select 1 from hash_a as a inner join hash_a as aa on aa.same = a.same; hash_uniq.sql: set enable_seqscan to off; set enable_mergejoin to off; select 1 from hash_a as a inner join hash_b as b on b.uniq = a.uniq; -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3"Alex Hunsaker" <badalex@...> writes:
> Ok let me know if this is to naive of an approach or not hitting the > right cases you want tested. You have the unique-versus-not dimension, but I'm also wondering about narrow vs wide index keys (say about 8 bytes vs 50-100 or so). In the former case we're not saving any index space by storing only the hash code, so these could be expected to have different performance behaviors. As for specifics of the suggested scripts: * might be better to do select count(*) not select 1, so that client communication is minimized * check that the queries actually use the indexes (not sure that the proposed switch settings ensure this, not to mention you didn't create the indexes) * make sure the pgbench transaction counts are large enough to ensure significant runtime * the specific table sizes suggested are surely not large enough regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3I wrote:
> You have the unique-versus-not dimension, On second thought, actually not. What we want to look at is the penalty for false matches due to *distinct* key values that happen to have the same hash codes. Your test case for all-the-same is using all the same key values, which means it'll hit the heap a lot, but none of those will be wasted trips. So what we need for testing is a few different key values that hash to the same code. Not sure about an easy way to find such. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Thu, Sep 4, 2008 at 7:13 PM, Tom Lane <tgl@...> wrote:
> "Alex Hunsaker" <badalex@...> writes: >> Ok let me know if this is to naive of an approach or not hitting the >> right cases you want tested. > > You have the unique-versus-not dimension, but I'm also wondering about > narrow vs wide index keys (say about 8 bytes vs 50-100 or so). In the > former case we're not saving any index space by storing only the hash > code, so these could be expected to have different performance > behaviors. Arg yes... I just read the last part of your mail in this thread. I think it was the one on -hackers that talked about narrow vs wide... so I figured I would just try to do what the thread where you posted the patch talked about namley the below: >So my thinking right now is that we should just test this patch as-is. >If it doesn't show really horrid performance when there are lots of >hash key collisions, we should forget the store-both-things idea and >just go with this. So I thought, lets try to generate lots of hash collisions... obviosly though using the same key wont do that... Not sure what I was thinking > As for specifics of the suggested scripts: > > * might be better to do select count(*) not select 1, so that client > communication is minimized Yar. > * check that the queries actually use the indexes (not sure that the > proposed switch settings ensure this, not to mention you didn't create > the indexes) Well I was assuming I could just test the speed of a hash join... > * make sure the pgbench transaction counts are large enough to ensure > significant runtime > * the specific table sizes suggested are surely not large enough Ok -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Thu, Sep 4, 2008 at 7:45 PM, Tom Lane <tgl@...> wrote:
> So what we need for testing is a few different key values that hash to > the same code. Not sure about an easy way to find such. Hrm, well I have not really looked at the hash algorithm but I assume we could just reduce the number of buckets? -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3"Alex Hunsaker" <badalex@...> writes:
> On Thu, Sep 4, 2008 at 7:45 PM, Tom Lane <tgl@...> wrote: >> So what we need for testing is a few different key values that hash to >> the same code. Not sure about an easy way to find such. > Hrm, well I have not really looked at the hash algorithm but I assume > we could just reduce the number of buckets? No, we need fully equal hash keys, else the code won't visit the heap. I guess one thing we could do for testing purposes is lobotomize one of the datatype-specific hash functions. For instance, make int8_hash return the input mod 2^32, ignoring the upper bytes. Then it'd be easy to compute different int8s that hash to the same thing. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3"Alex Hunsaker" <badalex@...> writes:
> On Thu, Sep 4, 2008 at 7:13 PM, Tom Lane <tgl@...> wrote: >> * check that the queries actually use the indexes (not sure that the >> proposed switch settings ensure this, not to mention you didn't create >> the indexes) > Well I was assuming I could just test the speed of a hash join... Uh, no, hash joins have nearly zip to do with hash indexes. They rely on the same per-datatype support functions but that's the end of the commonality. regards, tom lane -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Thu, Sep 4, 2008 at 8:17 PM, Tom Lane <tgl@...> wrote:
> I guess one thing we could do for testing purposes is lobotomize one of > the datatype-specific hash functions. For instance, make int8_hash > return the input mod 2^32, ignoring the upper bytes. Then it'd be easy > to compute different int8s that hash to the same thing. Heh Ok im slowly getting there... So we lobotomize hashint8 and then time how long it takes to make an index on a table... something like: create table test_hash(num int8); (obviously on a 64 bit machine) int main(void) { unsigned long y = 0; unsigned cnt = 0; printf("insert into test_hash (num) values "); //while(cnt != LONG_MAX/UINT_MAX) while(cnt < 10000000) { y += UINT_MAX; printf("(%ld), ", y); cnt++; } printf("(0);\n"); } ./a.out | psql pgbench -c 1 -t1000 -n -f test.sql test.sql: create index test_hash_num_idx on test_hash using hash (num); drop index test_hash_num_idx; For both pre and post patch just to make sure post patch is not worse than pre patch??? If im still way off and its not to much trouble want to give me a test case to run =) ? Or maybe because hash collisions should be fairly rare its not something to really worry about? -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3On Thu, Sep 4, 2008 at 9:48 PM, Alex Hunsaker <badalex@...> wrote:
Ok here are the results: (data generated from the c program before) select count(1) from test_hash; count ----------- 100000011 create index test_hash_num_idx on test_hash using hash (num); CVS: Time: 698065.180 ms patch: Time: 565982.099 ms ./pgbench -c 1 -t 100000 -n -f bench.sql bench.sql select count(1) from test_hash where num = 110034304728896610; CVS: tps = 7232.375875 (excluding connections establishing) patch: tps = 7913.700150 (excluding connections establishing) EXPLAIN ANALYZE select count(1) from test_hash where num = 110034304728896610; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------ Aggregate (cost=29.24..29.25 rows=1 width=0) (actual time=0.066..0.067 rows=1 loops=1) -> Index Scan using test_hash_num_idx on test_hash (cost=0.00..29.24 rows=1 width=0) (actual time=0.051..0.054 rows=1 loops=1) Index Cond: (num = 110034304728896610::bigint) Total runtime: 0.153 ms Oddly the index sizes were the same (4096 MB) is that to be expected? Here is the change I made to hashint8 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -61,12 +61,14 @@ hashint8(PG_FUNCTION_ARGS) */ #ifndef INT64_IS_BUSTED int64 val = PG_GETARG_INT64(0); - uint32 lohalf = (uint32) val; +/* uint32 lohalf = (uint32) val; uint32 hihalf = (uint32) (val >> 32); lohalf ^= (val >= 0) ? hihalf : ~hihalf; return hash_uint32(lohalf); +*/ + return val % 4294967296; #else /* here if we can't count on "x >> 32" to work sanely */ return hash_uint32((int32) PG_GETARG_INT64(0)); -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
Re: hash index improving v3Alex Hunsaker napsal(a):
> On Thu, Sep 4, 2008 at 9:48 PM, Alex Hunsaker <badalex@...> wrote: > > Ok here are the results: > > (data generated from the c program before) > select count(1) from test_hash; > count > ----------- > 100000011 > > create index test_hash_num_idx on test_hash using hash (num); > CVS: Time: 698065.180 ms > patch: Time: 565982.099 ms > > ./pgbench -c 1 -t 100000 -n -f bench.sql > bench.sql > select count(1) from test_hash where num = 110034304728896610; > > CVS: tps = 7232.375875 (excluding connections establishing) > patch: tps = 7913.700150 (excluding connections establishing) > > EXPLAIN ANALYZE select count(1) from test_hash where num = 110034304728896610; > QUERY > PLAN > ------------------------------------------------------------------------------------------------------------------------------------ > Aggregate (cost=29.24..29.25 rows=1 width=0) (actual > time=0.066..0.067 rows=1 loops=1) > -> Index Scan using test_hash_num_idx on test_hash > (cost=0.00..29.24 rows=1 width=0) (actual time=0.051..0.054 rows=1 > loops=1) > Index Cond: (num = 110034304728896610::bigint) > Total runtime: 0.153 ms > > > Oddly the index sizes were the same (4096 MB) is that to be expected? I think yes, because haskey is uint32. You save space only if you use hash for example on varchar attribute. Zdenek -- Sent via pgsql-patches mailing list (pgsql-patches@...) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-patches |
|
|
|
| < Prev | 1 - 2 - 3 - 4 | Next > |
| Free embeddable forum powered by Nabble | Forum Help |