unordered_map与unordered_set封装

🕗 发布于 2024-12-09 13:52 哈希算法算法散列表 数据结构

源码及框架分析

SGI-STL30版本源代码中没有 unordered_map 和 unordered_set，SGI-STL30版本是 C++11 之前的 STL 版本，这两个容器是 C++11 之后才更新的。但是 SGI-STL30 实现了哈希表，只容器的名字是 hash_map 和 hash_set，他是作为非标准的容器出现的，非标准是指非 C++ 标准规定必须实现的，源代码在 hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h 中、

unordered_map和unordered_set的作用和优点

unordered_map和unordered_set在存储元素的时候是没有顺序的，它只会根据key的哈希值来存放数据，在根据key查找的时候是非常高效的，它们查找的时间复杂度约等于O(n)。

模拟实现unordered_map和unordered_set

实现出不同类型的数据也能使用散列函数

template<class K>
struct HashFunc
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};

template<>
struct HashFunc<string>
{
size_t operator()(const string& s)
{
// BKDR
size_t hash = 0;
for (auto ch : s)
{
hash += ch;
hash *= 131;
}

return hash;
}
};

实现出复用哈希表的框架，并支持insert

namespace hyx
{
template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
public:

struct key
{
const K& operator()(const pair<K, V>& key)
{
return key.first;
}
};

pair<Iterator, bool> Insert(const pair<const K,V> key)
{
return _ht.insert(key);
}

private:
hhyx::hashtable<K, pair<const K, V>, key, Hash> _ht;
};
}

#include"hashtable.h"
namespace hyx
{
template<class K, class V=K, class Hash=HashFunc<K>>
class unordered_set
{
public:
struct key
{
const K& operator()(const K& key)
{
return key;
}
};

pair<Iterator, bool> Insert(const V key)
{
return _ht.insert(key);
}

private:
hhyx::hashtable<K, V, key, Hash> _ht;
};
}

struct key是在hashtable中能够返回数据正确的key，其中实现了operator()作为仿函数传参。

namespace hhyx
{
template<class V>
struct hashnode
{
V _kv;
hashnode<V>* next;

hashnode(const V kv)
:_kv(kv)
, next(nullptr)
{}
};

inline unsigned long __stl_next_prime(unsigned long n)//用来更新数组容量的大小
{
// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] = {
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};
const unsigned long* first = __stl_prime_list;
const unsigned long* last = __stl_prime_list + __stl_num_primes;
const unsigned long* pos = lower_bound(first, last, n);
return pos == last ? *(last - 1) : *pos;
}
    template<class K, class V, class KeyOfT, class Hash>
   class hashtable
   {  
   public:
   typedef hashnode<V> node;

hashtable()
:_root(__stl_next_prime(0))
, _n(0)
{}
    pair<iterator,bool> insert(const V kv)
   {
   //如果该结点已经存在则不需要进行插入
   KeyOfT key;
   Hash hashfunc;
   iterator find = Find(kv);//查找是根据key来进行查找而不是pair
   if (find != end())
   {
    return { find,false };
   }
   if (_n == _root.size())//负载因子=1需要扩容
   {
   vector<node*> newhash(__stl_next_prime(_root.size() + 1));
   for (size_t i = 0; i < _root.size(); i++)
   {
   node* cur = _root[i];
   while (cur)//扩容的时候不创建新节点，而是将旧结点的指针移走，提高了效率
   {
   int hash0 = hashfunc(key(cur->_kv)) % newhash.size();
   cur->next = newhash[hash0];
   newhash[hash0] = cur;
   cur = cur->next;
   }
   _root[i] = nullptr;//移除完毕之后将旧表的结点设成空
    }
   _root.swap(newhash);
   }

   int hash0 = hashfunc(key(kv)) % _root.size();
   node* cur = new node(kv);
   cur->next = _root[hash0];
   _root[hash0] = cur;
   _n++;
   return { iterator(cur,this),true };
   }
private:
vector<node*> _root;
size_t _n;
};
}

这里使用vector<node*>来作为散列表，_n作为存储的数据个数

支持iterator的实现

• iterator 实现的大框架跟 list 的 iterator 思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器。

• 这里的难点是 operator++ 的实现。iterator 中有一个指向结点的指针，如果当前桶下面还有结点，则结点的指针指向下个结点即可。如果当前桶走完了，则需要想办法计算找到下一个桶。这里的难点反而是结构设计的问题，参考上面的源码，我们可以看到 iterator 中除了有结点的指针，还有哈希表对象的指针，这样当前桶走完了，要计算下一个桶就相对容易多了，用 key 值计算出当前桶位置，依次往后找下一个不为空的桶即可。

• begin() 返回第一个桶中第一个节点指针构造的迭代器，这里的 end() 返回迭代器可以用空表示。 • unordered_set 的 iterator 不支持修改，我们把 unordered_set 的第二个模板参数改成 const K 即可，HashTable<K, const K, SetKeyOfT, Hash> _ht;

• unordered_map 的 iterator 不支持修改 key 但是可以修改 value，我们把 unordered_map 的第二个模板参数 pair 的第一个参数改成 const K 即可，HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;

•在实现iterator前需要将hashtable进行前置声明

template<class K, class T, class KeyOfT, class Hash>
class hashtable;

template<class K, class V, class Ref, class Ptr, class KeyOfT, class Hash>
struct hashiterator
{
typedef hashnode<V> node;
typedef hashtable<K, V, KeyOfT, Hash> ht;
typedef hashiterator<K, V, Ref, Ptr, KeyOfT, Hash> Self;

node* _node;
const ht* _ht;

hashiterator(node* nod, const ht* ht)
:_node(nod)
, _ht(ht)
{}

Ref operator*()
{
return _node->_kv;
}

Ptr operator->()
{
return &_node->_kv;
}

bool operator!=(const Self& s)
{
return _node != s._node;
}

Self operator++()
{
if (_node->next)//如果当前桶还未走完，那就继续走
{
_node = _node->next;
}
else//当前桶已经走完
{
node* prev = _node;
node* cur = _node->next;
KeyOfT kf;
size_t hash0 = kf(_node->_kv) % _ht->_root.size();
hash0++;
while (hash0 < _ht->_root.size())
{
_node = _ht->_root[hash0];
if (_node)
break;
else
hash0++;
}

if (hash0 == _ht->_root.size())
{
_node = nullptr;
}

return *this;
}
}
};

源码

#include"hashtable.h"
namespace hyx
{
template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
public:

struct key
{
const K& operator()(const pair<K, V>& key)
{
return key.first;
}
};

typedef typename hhyx::hashtable<K, pair<const K, V>, key, Hash>::iterator Iterator;

Iterator Begin()
{
return _ht.begin();
}

Iterator End()
{
return _ht.end();
}

pair<Iterator, bool> Insert(const pair<const K,V> key)
{
return _ht.insert(key);
}

private:
hhyx::hashtable<K, pair<const K, V>, key, Hash> _ht;
};
}

#include"hashtable.h"
namespace hyx
{
template<class K, class V=K, class Hash=HashFunc<K>>
class unordered_set
{
public:
struct key
{
const K& operator()(const K& key)
{
return key;
}
};

typedef typename hhyx::hashtable<K, V, key, Hash>::iterator Iterator;

Iterator Begin()
{
return _ht.begin();
}

Iterator End()
{
return _ht.end();
}

pair<Iterator, bool> Insert(const V key)
{
return _ht.insert(key);
}

bool Erase(K key)
{
return _ht.earse(key);
}
private:
hhyx::hashtable<K, V, key, Hash> _ht;
};
}

#include<string>
using namespace std;

template<class K>
struct HashFunc
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};

template<>
struct HashFunc<string>
{
size_t operator()(const string& s)
{
// BKDR
size_t hash = 0;
for (auto ch : s)
{
hash += ch;
hash *= 131;
}

return hash;
}
};

namespace hhyx
{
template<class V>
struct hashnode
{
V _kv;
hashnode<V>* next;

hashnode(const V kv)
:_kv(kv)
, next(nullptr)
{}
};

inline unsigned long __stl_next_prime(unsigned long n)//用来更新数组容量的大小
{
// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] = {
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};
const unsigned long* first = __stl_prime_list;
const unsigned long* last = __stl_prime_list + __stl_num_primes;
const unsigned long* pos = lower_bound(first, last, n);
return pos == last ? *(last - 1) : *pos;
}

template<class K, class T, class KeyOfT, class Hash>
class hashtable;

template<class K, class V, class Ref, class Ptr, class KeyOfT, class Hash>
struct hashiterator
{
typedef hashnode<V> node;
typedef hashtable<K, V, KeyOfT, Hash> ht;
typedef hashiterator<K, V, Ref, Ptr, KeyOfT, Hash> Self;

node* _node;
const ht* _ht;

hashiterator(node* nod, const ht* ht)
:_node(nod)
, _ht(ht)
{}

Ref operator*()
{
return _node->_kv;
}

Ptr operator->()
{
return &_node->_kv;
}

bool operator!=(const Self& s)
{
return _node != s._node;
}

Self operator++()
{
if (_node->next)//如果当前桶还未走完，那就继续走
{
_node = _node->next;
}
else//当前桶已经走完
{
node* prev = _node;
node* cur = _node->next;
KeyOfT kf;
size_t hash0 = kf(_node->_kv) % _ht->_root.size();
hash0++;
while (hash0 < _ht->_root.size())
{
_node = _ht->_root[hash0];
if (_node)
break;
else
hash0++;
}

if (hash0 == _ht->_root.size())
{
_node = nullptr;
}

return *this;
}
}
};

template<class K, class V, class KeyOfT, class Hash>
class hashtable
{
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
friend struct hashiterator;

public:
typedef hashnode<V> node;
typedef hashiterator<K, V, const V&, const V*, KeyOfT, Hash> iterator;

hashtable()
:_root(__stl_next_prime(0))
, _n(0)
{}

iterator begin()
{
if (_n == 0)
return end();

node* cur;
for (int i = 0; i < _root.size(); i++)
{
cur = _root[i];
if (cur)
{
return iterator(cur, this);
}
}

return end();
}

iterator end()
{
return iterator(nullptr, this);
}

pair<iterator,bool> insert(const V kv)
{
//如果该结点已经存在则不需要进行插入
KeyOfT key;
Hash hashfunc;
iterator find = Find(kv);//查找是根据key来进行查找而不是pair
if (find != end())
{
return { find,false };
}
if (_n == _root.size())//负载因子=1需要扩容
{
vector<node*> newhash(__stl_next_prime(_root.size() + 1));
for (size_t i = 0; i < _root.size(); i++)
{
node* cur = _root[i];
while (cur)//扩容的时候不创建新节点，而是将旧结点的指针移走，提高了效率
{
int hash0 = hashfunc(key(cur->_kv)) % newhash.size();
cur->next = newhash[hash0];
newhash[hash0] = cur;
cur = cur->next;
}
_root[i] = nullptr;//移除完毕之后将旧表的结点设成空
}
_root.swap(newhash);
}

int hash0 = hashfunc(key(kv)) % _root.size();
node* cur = new node(kv);
cur->next = _root[hash0];
_root[hash0] = cur;
_n++;
return { iterator(cur,this),true };
}

iterator Find(const V& kv)
{
Hash hashfunc;
KeyOfT ko;
int hash0 = hashfunc(ko(kv)) % _root.size();
node* cur = _root[hash0];
while (cur)
{
if (cur->_kv == kv) return { cur,this };

cur = cur->next;
}
return end();
}

bool earse(const K kv)
{
KeyOfT ko;
int hash0 = kv % _root.size();
node* prev = nullptr;
node* cur = _root[hash0];
while (cur)
{
if (ko(cur->_kv) == kv)
{
if (prev == nullptr)//删除的是头结点
{
_root[hash0] = nullptr;
}
else//非头节点
{
prev->next = cur->next;
}

delete cur;
_n--;
return true;
}
else//还未找到
{
prev = cur;
cur = cur->next;
}
}
return false;
}


private:
vector<node*> _root;
size_t _n;
};
}

原文地址：https://blog.csdn.net/hyxdg/article/details/144333871

免责声明：本站文章内容转载自网络资源，如本站内容侵犯了原著者的合法权益，可联系本站删除。更多内容请关注自学内容网（zxcms.com）！

上一篇：39 vector深入理解 · 迭代器失效&深度浅拷贝
下一篇：大语言模型应用开发框架LangChain

迅为瑞芯微RK3562开发板/核心板应用于人脸跟踪、身体跟踪、视频监控、自动语音识别(ASR)、图像分类驾驶员辅助系统(ADAS)、车牌识别、物体识别等
iTOP-3562开发板/核心板采用瑞芯微RK3562处理器，内部集成了四核A53+Mali G52架构，主频2GHZ，内置1TOPSNPU算力，RK809动态调频。支持几乎全格式的H.264解码，支
阅读更多2024-12-31
低精度只适用于未充分训练的LLM？腾讯提出LLM量化的scaling laws
尽管有一些研究声称原生的低比特LLM可以媲美fp16/bf16精度下的表现，但这些研究普遍都是在未充分语言模型上得到的结果从而推出的结论，研究人员认为在充分训练的情况下进行比较的话，低比特LLM也将很
阅读更多2024-12-31
医疗数仓配置Flume
Flume需要将Kafka中各topic的数据传输到HDFS，因此选用KafkaSource以及HDFSSink。对于安全性要求高的数据（不允许丢失）选用FileChannel，允许部分丢失的数据如日
阅读更多2024-12-31
Go中CAS算法
CAS算法常被用作乐观锁的实现方式，用于解决并发问题。例如，在计数器、缓存等场景中，可以使用CAS算法来确保数据的一致性和正确性。CAS操作会检查内存位置V的当前值是否与预期的旧值A相等。这一操作是原
阅读更多2024-12-31
Node.js 记账本项目总结
在使用Node.js结合Express框架构建记账本的过程中，遇到了一些常见的问题，这些问题主要集中在路径处理、包管理工具的选择、开发环境优化以及HTTP响应处理等方面。
阅读更多2024-12-31
Java 处理base64文件上传
在系统内有一个类似于公告的模块，如果里面添加的文章不是选择富文本上传图片的方式，而是选择复制别的文章直接粘贴到系统内的富文本，里面的图片就不会url，而是图片的base64格式，这样会导致数据库存储压
阅读更多2024-12-31
Tailwind CSS 实战：现代登录注册页面开发
基础结构搭建表单组件开发社交登录集成响应式适配深色模式支持动画效果表单验证性能优化可访问性增强通过合理使用 Tailwind CSS 的原子类，我们不仅实现了美观的界面，还确保了良好的用户体验和可维护
阅读更多2024-12-31
node.js下载、安装、设置国内镜像源（永久）（Windows11）
系统：Windows 11。
阅读更多2024-12-31
Slate文档编辑器-Decorator装饰器渲染调度
在这里我们主要讨论了slate中的decoration装饰器的实现，以及在实际使用中可能会遇到的问题，主要是在跨节点的情况下，我们需要将range拆分为多个range，然后分别进行处理，并且还分析了源
阅读更多2024-12-31
python去水印
请注意，这种方法仅适用于简单的水印去除，对于复杂的水印或嵌入到图像纹理中的水印，可能无法达到理想的效果。在实际应用中，还可以考虑使用深度学习的方法，如生成对抗网络（GAN）来去除水印，但需要大量的数据
阅读更多2024-12-31