C++STL（第十五篇：hashtable）

更新时间:2023-05-03 04:23:15 阅读：评论：0

C++STL（第⼗五篇：hashtable）

1、hashtable

hashtable 的⽬的是为了提供任何操作都是常数级别。SGI STL 中， hash table 使⽤了开链法实现的。⼤致的意思如下图所⽰：

hash table 内的元素为桶⼦（bucket），每个桶⼦⾥⾯有很多节点。其实有点像我们前⾯整理的，双端队列主控是个指向指针的指针，⽽hash table是⼀个vector；双端队列缓冲区是⼀块连续内存，像是array，⽽hash table 则是⼀个类似的单向链表。⼀下便是⼀个node 节点的结构：

template<class Value>

struct __hashtable_node

{

__hashtable_node* next;

Value val;

}

2、hashtable 迭代器

hashtable 迭代器必须永远维系着与整个 “buckets vector” 的关系，并记录⽬前所指的节点。前进时，如果正巧是list 的尾端，则跳到下⼀个节点。因为使⽤了类似单向链表的结构，所以迭代器类型是 forward_iterator_tag（前向迭代器）。迭代器的定义如下：

template<class Value, class Key, class HashFcn, class ExtractKey,....>

struct __hashtable_iterator

{

typedef forward_iterator_tag iterator_category;

typedef Value value_type;

typedef ptrdiff_t difference_type； //迭代器萃取那⼀套

...

node* cur; //迭代器⽬前所指节点

hashtable* ht; //保持对容器的连接关系（可能需要从当前 bucket 跳到下⼀个 bucket）

//其它操作就不写了，这⾥只写⼀下 operator++的操作

iterator operator++()

{

const node* old = cur;

cur = cur->next;

if( !cur ) //如果到了当前 bucket 的尾端

{

size_type bucket = ht->bkt_num(old->val); //根据元素值，定位出下⼀个bucket

while( !cur && ++bucket < ht->buckets.size() )

cur = ht->buckets[bucket];

}

return *this;

}

3、hashtable 的数据结构

hashtable 的模板参数⽐较多，包括：

Value：节点的实值型别

Key：节点的键值型别

HashFcn：hash function 的函数型别

ExtractKey：从节点中取出键值的⽅法（函数或仿函数）

EqualKey：判断键值相同与否的⽅法

Alloc：空间配置器

template<class Value, class Key, class HashFcn, class ExtractKey, class EqualKey, calss Alloc>

class hashtable

{

public:

typedef HashFcn hasher;

typedef EqualKey key_equal;

typedef size_t size_type;

private:

typedef __hashtable_node<Value> node;

har hash;

key_equal equals;

ExtractKey get_key;

vector<node*,Alloc> buckets; //中控表格

size_type num_element怎样挽回天蝎男 s; //所有链表元素的个数

};

hashtable 中控表格的⼤⼩⼀般设计为⼀个质数，⾄于为什么⼤家需要⾃⾏百度，这⾥我讲不明⽩。SGI STL 中先把 28 个质数准备好，以备随时访问，同时提供⼀个函数，⽤来查询在这 28 个质数之中，“最接近某数并⼤于某数” 的质数：

static const int __stl_num_primes = 28;

static const unsigned long __stl_prime_list[__stl_num_primes] =

{

53, 97, 193, 389, 769,

1543, 3079, 6151, 12289, 24593,

49157, 98317, 1幼鸽怎么喂养 96613, 393241, 786433,

1572869, 3145739, 6291469, 12582917, 25165843,

50331653, 100663319, 201326611, 402653189, 805306457,

1610612741, 3221225473ul, 4294967291ul

};

//以下是找出，最接近并⼤于或等于n的那个质数

inline unsigned long __stl_next_prime(unsigned long n)

{

const unsigned long* first = __stl_prime_list;

const unsigned long* last = __stl_prime_list + __stl_num_primes;

//lower_bound 是⼀个泛型算法

const unsigned long* pos = lower_bound(first, last, n);

}

4、hashtable操作

hashtable 的插⼊跟 RB-tree 的插⼊类似，有两种插⼊⽅法 inrt_unique 和 inrt_equal ，意思也是⼀样的，inrt_unique 不允许有重复值，⽽ inrt_equal 允许有重复值。

因为都会⽤到是否需要重建表格的判断，我们先来整理这⼀部分：

template<class V, class K, class HF, class Ex, class Eq, class A>

void hashtable<V, K, HF, Ex, Eq, A>::resize(size_type num_elements_hint)

{

//判断 “表格重建与否” 是拿元素个数和 bucket vector 的⼤⼩来⽐，如果前者⼤于后者，就重建表格

//所以每个 bucket list 的最⼤容量和 bucket vector 的⼤⼩相同

const size_type old_n = buckets.size();

if( num_elements_hint > old_n )

{

const size_type n = next_si淋过雨的空气 ze(num_elements_hint); //next_size 底层调⽤ __stl_next_prime()

if( n > old_n)

{

vector<node*,A> tmp(n, (node*) 0); //设⽴新的 buckets

//以下是处理每⼀个旧的 bucket

for( size_type bucket = 0; bucket < old_n; ++bucket )

{

node* first = buckets[bucket]; //指向节点所对应之串⾏的起始节点

while( first ) //串⾏还没结束

{

//找出当前节点应该放在新buckets 的哪⼀个位置

size_type new_bucket = bkt_num(first->val, n);

//以下就是对新旧表格的处理，同时还要维护好 first 指针

buckets[bucket] = first->next;

first->next = tmp[new_bucket];

tmp[new_bucket] = first;

first = buckets[bucket];

}

buckets.swap( tmp ); //vector::swap 函数，新旧两个buckets对调，对调之后释放tmp内存

}

现在来看⼀下 inrt_unique 函数，需要注意的是插⼊时，新节点直接插⼊到链表的头节点，代码如下：

pair<iterator, bool> inrt_unique(const value_type& obj)

{

resize(num_elements + 1);

return inrt_unique_noresize(obj);

}

pair<iterator, bool> inrt_unique_noresize(const value_type& obj)

{

const size_type n = bkt_num(obj); //决定 obj 应位于 buckets 的那⼀个链表中

node* first = buckets[n];

//遍历当前链表，如果发现有相同的键值，就不插⼊，⽴刻返回

for( node* cur = fir白蛋白高 st; cur; cur = cur->next)

{

if( equals(get_key(cur->val), get_key(obj)) )

return pair<iterator, bool>(iterator(cur, this), fal);

}

//离开以上循环（或根本未进⼊循环）时，first指向 bucket 所指链表的头节点

node* tmp = new_node(obj); //产⽣新节点

tmp->next = first;

buckets[n] = tmp; //令新节点为链表的第⼀个节点

++num_elements; //节点个数累加1

return pair<iterator, bool>( iterator(tmp,this), true);

}

允许重复插⼊的 inrt_equal，需要注意的是插⼊时，重复节点插⼊到相同节点的后⾯，新节点还是插⼊到链表的头节点，代码如下：

iterator inrt_equal(const value_type& obj)

{

resize( num_elements + 1 ); //判断是否需要重建表格，如需要就扩充

return inrt_equal_noresize(obj);

}

iterator inrt_equalnoresize(const value_type& obj)

{

const size_type n = bkt_num(obj); //决定 obj 应位于 buckets 的那⼀个链表中 node* first = buckets[n];

//遍历当前链表，如果发现有相同的键值，就马上插⼊，⽴刻返回

for( node* cur = first; cur; cur = cur->next)

{

if( equals(get_key(cur->val), get_key(obj)) )

{

node* tmp = new_node(obj);

tmp->next = cur->next; //新节点插⼊当前节点位置之后

cur->next = tmp;

++num_elements;

return iterator(tmp, this);

}

//运⾏到这⾥，表⽰没有发现重复的键值

node* tmp = new_node(obj); //产⽣新节点

tmp->next = first;

buckets[n] = tmp; //令新节点为链表的第⼀个节点

++num_elements; //节点个数累加1

return iterator(tmp, this);

}

整体的删除操作 clear，代码如下：

void clear()

{

for( size_type i = 0; i<buckets.size(); ++i)

{

node* cur = buckets[i];

//将 bucket list 中的每⼀个节点删除掉

while( cur != 0 )

{

node* next = cur->next;

delete_node(cur);

cur = next;

}

buckets[i] = 0; //令bucket内容为null指针

}

num_elements = 0; //令总节点个数为 0

/注意：buckets vector 并未释放掉空间，扔保留原来⼤⼩

}

复制操作 copy_from，代码如下：

void copy_from(const hashtable& ht)

{

//先清除⼰⽅的 buckets vector

buckets.clear();

buckets.r秘决 erve(ht.buckets.size());

//从⼰⽅的 buckets vector 尾端开始，插⼊ n 个元素，其值为 null 指针

buckets.d(), ht.buckets.size(), (node*)0);

//真正的执⾏复制操作

for(size_type i = 0; i < ht.buckets.size(); ++i)

{

if( const node* cur = ht.bucktes[i] ）

{

node* copy = new_node(cur->val);

buckets[i] = copy;

for(node* next = cu中国大黄片 r->next; next; cur = next, next = cur->next)

{

copy->next = new_node(next->val);

copy = copy->next;

}

hashtable 使⽤时，⼀开始是53个元素节点，因为咱上⾯28个质数中，最⼩的是53。也就是说 buckets vector 保留的是 53 个bucket，每个 bucket 是⼀个指针，初始值为 null。如果，我们循序加⼊ 6 个元素：59,63,108,2,53,55，于是 hashtable 变成如下图的样⼦：

59 除 53 余数为 6，所以放到第六个bucket中，同理，63放第⼗个bucket中，⽽108,2,55都放在第⼆个 bucket 中。

如果，我们在插⼊48个元素（0~47），是总数达到54个元素，则超过了 buckets vector 的⼤⼩，符合表格重建的条件，hashtable扩充到第⼆个质数 97。然后排列如下所⽰：

本文发布于:2023-05-03 04:23:15，感谢您对本站的认可！

本文链接：https://www.wtabcd.cn/fanwen/fan/82/519926.html

上一篇：Unity中的Animator动画详解

下一篇：小升初重点词汇(素材)-六年级下册英语