-
-
Notifications
You must be signed in to change notification settings - Fork 806
Expand file tree
/
Copy pathset_adt.py
More file actions
191 lines (148 loc) · 5.38 KB
/
set_adt.py
File metadata and controls
191 lines (148 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# -*- coding: utf-8 -*-
# 从数组和列表章复制的代码
class Array(object):
def __init__(self, size=32, init=None):
self._size = size
self._items = [init] * size
def __getitem__(self, index):
return self._items[index]
def __setitem__(self, index, value):
self._items[index] = value
def __len__(self):
return self._size
def clear(self, value=None):
for i in range(len(self._items)):
self._items[i] = value
def __iter__(self):
for item in self._items:
yield item
class Slot(object):
"""定义一个 hash 表 数组的槽
注意,一个槽有三种状态,看你能否想明白。相比链接法解决冲突,二次探查法删除一个 key 的操作稍微复杂。
1.从未使用 HashMap.UNUSED。此槽没有被使用和冲突过,查找时只要找到 UNUSED 就不用再继续探查了
2.使用过但是 remove 了,此时是 HashMap.EMPTY,该探查点后边的元素扔可能是有key
3.槽正在使用 Slot 节点
"""
def __init__(self, key, value):
self.key, self.value = key, value
class HashTable(object):
UNUSED = None # 没被使用过
EMPTY = Slot(None, None) # 使用却被删除过
def __init__(self):
self._table = Array(8, init=HashTable.UNUSED) # 保持 2*i 次方
self.length = 0
@property
def _load_factor(self):
# load_factor 超过 0.8 重新分配
return self.length / float(len(self._table))
def __len__(self):
return self.length
def _hash(self, key):
return abs(hash(key)) % len(self._table)
def _find_key(self, key):
index = self._hash(key)
_len = len(self._table)
while self._table[index] is not HashTable.UNUSED:
if self._table[index] is HashTable.EMPTY:
index = (index*5 + 1) % _len
continue
elif self._table[index].key == key:
return index
else:
index = (index*5 + 1) % _len
return None
def _find_slot_for_insert(self, key):
index = self._hash(key)
_len = len(self._table)
while not self._slot_can_insert(index):
index = (index*5 + 1) % _len
return index
def _slot_can_insert(self, index):
return (self._table[index] is HashTable.EMPTY or self._table[index] is HashTable.UNUSED)
def __contains__(self, key): # in operator
index = self._find_key(key)
return index is not None
def add(self, key, value):
if key in self:
index = self._find_key(key)
self._table[index].value = value
return False
else:
index = self._find_slot_for_insert(key)
self._table[index] = Slot(key, value)
self.length += 1
if self._load_factor >= 0.8:
self._rehash()
return True
def _rehash(self):
old_table = self._table
newsize = len(self._table) * 2
self._table = Array(newsize, HashTable.UNUSED)
self.length = 0
for slot in old_table:
if slot is not HashTable.UNUSED and slot is not HashTable.EMPTY:
index = self._find_slot_for_insert(slot.key)
self._table[index] = slot
self.length += 1
def get(self, key, default=None):
index = self._find_key(key)
if index is None:
return default
else:
return self._table[index].value
def remove(self, key):
index = self._find_key(key)
if index is None:
raise KeyError()
value = self._table[index].value
self.length -= 1
self._table[index] = HashTable.EMPTY
return value
def __iter__(self):
for slot in self._table:
if slot not in (HashTable.EMPTY, HashTable.UNUSED):
yield slot.key
#########################################
# 上边是从 哈希表章 拷贝过来的代码,我们会直接继承 HashTable 实现 集合 set
#########################################
class SetADT(HashTable):
def add(self, key):
# 集合其实就是一个 dict,只不过我们把它的 value 设置成 1
return super(SetADT, self).add(key, True)
def __and__(self, other_set):
"""交集 A&B"""
new_set = SetADT()
for element_a in self:
if element_a in other_set:
new_set.add(element_a)
return new_set
def __sub__(self, other_set):
"""差集 A-B"""
new_set = SetADT()
for element_a in self:
if element_a not in other_set:
new_set.add(element_a)
return new_set
def __or__(self, other_set):
"""并集 A|B"""
new_set = SetADT()
for element_a in self:
new_set.add(element_a)
for element_b in other_set:
new_set.add(element_b)
return new_set
def test_set_adt():
sa = SetADT()
sa.add(1)
sa.add(2)
sa.add(3)
assert 1 in sa # 测试 __contains__ 方法,实现了 add 和 __contains__,集合最基本的功能就实现啦
sb = SetADT()
sb.add(3)
sb.add(4)
sb.add(5)
assert sorted(list(sa & sb)) == [3]
assert sorted(list(sa - sb)) == [1, 2]
assert sorted(list(sa | sb)) == [1, 2, 3, 4, 5]
if __name__ == '__main__':
test_set_adt()