数据结构

本文总阅读量

利用python实现 链表,栈与队列,树,图的数据结构

0.前记

python内置了很多高级数据结构,list,dict,tuple,string,set等,在使用的时候十分舒心。现在自己想了解下数据结构,所以重新查阅资料,写下数据结构的实现方式
Python内置多种数据结构的时间复杂度说明:https://wiki.python.org/moin/TimeComplexity
网友总结的数据结构时间复杂度说明:http://blog.csdn.net/u010366748/article/details/51469937

1.一维数据结构

一维数据结构, 是最简单和常用的数据结构, 常见的有数组, 链表, 栈和队列.

1.1顺序表

数组是一个每一项都内存相连的数据结构, 查找子项的时间复杂度是O(1), 插入数据的时间复杂度为O(n). 而顺序表是经过更改的数组, 插入最后一项数据的时间复杂度为O(1).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from typing import Any


class LinearTable(object):
"""在Python中使用list模拟线性表..."""

def __init__(self, max_length: int = 10, extend_num: int = 8):
self.max_length: int = max_length

# 当前有效的数组最长值
# 比如数组实际长度为20, 当只用了10个, 那么num为10
self.num: int = 0
# 扩容的长度
self.extend_num: int = extend_num
self.data = [None] * self.max_length

def is_empty(self) -> bool:
return self.num == 0

def is_full(self) -> bool:
return self.num == self.max_length

@classmethod
def from_list(cls, raw_list: list):
instance: "LinearTable" = cls()
for i in raw_list:
instance.append(i)
return instance

def to_list(self) -> list:
return self.data[: self.num]

def _extend(self):
self.data.extend([None] * self.extend_num)

def __getitem__(self, index: int) -> Any:
if not isinstance(index, int):
raise TypeError
if 0 <= index < self.num:
return self.data[index]
else:
# 表为空或者索引超出范围都会引发索引错误
raise IndexError

def __setitem__(self, index: int, value: Any):
if not isinstance(index, int):
raise TypeError
# 只能访问列表里已有的元素,self.num=0时,一个都不能访问,self.num=1时,只能访问0
if 0 <= index < self.num:
self.data[index] = value
else:
raise IndexError

def clear(self):
self.__init__()

def __len__(self):
return self.num

# 加入元素的方法 append()和insert()
def append(self, value: Any):
if self.is_full():
# 如果满了则进行扩容
self._extend()
self.data[self.num] = value
self.num += 1

def insert(self, index: int, value: Any):
if not isinstance(index, int):
raise TypeError
if index < 0: # 暂时不考虑负数索引
raise IndexError
# 当key大于元素个数时,默认尾部插入
if index > self.num:
self.append(value)
else:
if self.is_full():
self._extend()
self.num += 1

# 移动key后的元素
for i in range(self.num, index, -1):
self.data[i] = self.data[i - 1]
# 赋值
self.data[index] = value

def remove(self, index: int = -1):
"""假删除, 只是把值往前挪, 缩小一个有效范围"""
if not isinstance(index, int):
raise TypeError
if self.num - 1 < 0:
raise IndexError("pop from empty list")
elif index == -1:
# 原来的数还在,但列表不识别他
self.num -= 1
else:
for i in range(index, self.num - 1):
self.data[i] = self.data[i + 1]
self.num -= 1

def index(self, value: Any, start: int = 0) -> int:
"""从第几个开始找, 找到则返回索引"""
for i in range(start, self.num):
if self.data[i] == value:
return i
# 没找到
raise ValueError("%d is not in the list" % value)

def reverse(self):
"""列表反转"""
i, j = 0, self.num - 1
while i < j:
self.data[i], self.data[j] = self.data[j], self.data[i]
i, j = i + 1, j - 1

1.2单链表

单链表与顺序表不同的是, 每个子项都是通过指针连接起来的, 在内存中是不连续的. 由于每个子项都需要通过上个子项查出来, 所以查找的时间复杂度是O(n),不过由于在内存中是不连续的, 所以对中间的数据进行增删时, 只需要更改指针, 而不用把受到影响的所有子项进行挪动.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from typing import Any, Optional


class Node(object):
"""创建数据和数据对应的指针"""
def __init__(self, val: Any, node: Optional["Node"] = None):
self.data: Any = val
self.next: Optional["Node"] = node


class LinkList(object):
def __init__(self):
self.head: Optional[Node] = None
self.length: int = 0

@classmethod
def from_list(cls, raw_list: list) -> "LinkList":
"""通过list写入数据"""
instance: "LinkList" = cls()
instance.head = Node(raw_list[0])
node: Node = instance.head

for i in raw_list[1:]:
new_node: Node = Node(i)
# 定义指针
node.next = new_node
# 定义下一个数
node = new_node
instance.length = len(raw_list)
return instance

def to_list(self, left: int = 0, right: int = -1) -> list:
"""获取列表的函数,如果没输入right时,则默认读取到最后面,如果没输入left和right则输出全部数据.left不能大于等于right"""
if self.is_empty():
raise ValueError('Linklist is empty.')
cursor: int = 0
node: Node = self.head
if right == -1:
right = self.length
if right > self.length or left >= right:
raise ValueError('right param error')

while node.next and cursor < left:
node = node.next
cursor += 1
new_list: list = []
while node and cursor <= right:
new_list.append(node.data)
cursor += 1
node = node.next
return new_list

def __len__(self):
return self.length

def is_empty(self) -> bool:
return self.length == 0

def clear(self):
self.head = None
self.length = 0

def append(self, item: Any):
# 在列表最后面添加一个数据
new_node: Node = Node(item)
if not self.head:
self.head = new_node
else:
node: Node = self.head
while node.next:
node = node.next
node.next = new_node

def __getitem__(self, index: int) -> Any:
self._check_index(index)

cursor: int = 0
node: Node = self.head

while node:
node = node.next
cursor += 1
if cursor == index:
return node.data

raise ValueError('target is not exist!')

def _check_index(self, index: int):
if self.is_empty() or index < 0 or index > self.length:
raise ValueError(f"index:{index} error")

def __setitem__(self, index: int, item: Any):
"""单独实现替换, 节省一遍查询"""
self._check_index(index)

node: Node = self.head
if index == 0:
new_node: Node = node.next
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
cursor: int = 0
while node:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node.next)
prev_node.next = new_node

def insert(self, index: int, item: Any):
"""向指定位置插入数据"""
self._check_index(index)

if index == 0:
new_node: Node = Node(item, self.head)
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
node: Node = self.head
cursor: int = 0
while node:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node)
prev_node.next = new_node
new_node.next = node

def delete(self, index: int):
"""删除指定位置数据"""
self._check_index(index)

node: Node = self.head
# 如果是0的话,起点改为第二个数据
if index == 0:
new_node: Node = node.next
self.head = new_node
else:
cursor: int = 1
while node:
prev_node = node
node = node.next
cursor += 1
# 把下一个数据的指针,改为下下个数据的指针
if index == cursor:
prev_node.next = node.next

def __contains__(self, item: Any) -> bool:
"""查找元素是否在里面"""
if self.is_empty():
raise ValueError('Linklist is empty.')

node: Node = self.head
while node:
if item == node.data:
return True
node = node.next
return False

1.3双向链表

双向链表与单链表一样, 只是每个节点多存放了一个前驱指针, 经过拓展可以很快的查出该节点的上一个节点是哪个

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from typing import Any, Optional


class Node(object):
"""创建数据和数据对应的指针"""
def __init__(self, val: Any, next_node: Optional["Node"] = None, prev_node: Optional["Node"] = None):
self.data: Any = val
self.next: Optional["Node"] = next_node
self.prev: Optional["None"] = prev_node


class LinkList(object):
def __init__(self):
self.head: Optional[Node] = None
self.length: int = 0

@classmethod
def from_list(cls, raw_list: list) -> "LinkList":
"""通过list写入数据"""
instance: "LinkList" = cls()
instance.head = Node(raw_list[0])
node: Node = instance.head

for i in raw_list[1:]:
new_node: Node = Node(i)
# 定义指针
node.next = new_node
new_node.prev = node
# 定义下一个数
node = new_node
instance.length = len(raw_list)
return instance

def to_list(self, left: int = 0, right: int = -1) -> list:
"""获取列表的函数,如果没输入right时,则默认读取到最后面,如果没输入left和right则输出全部数据.left不能大于等于right"""
if self.is_empty():
raise ValueError('Linklist is empty.')
cursor: int = 0
node: Node = self.head
if right == -1:
right = self.length
if right > self.length or left >= right:
raise ValueError('right param error')

while node.next and cursor < left:
node = node.next
cursor += 1
new_list: list = []
while node and cursor <= right:
new_list.append(node.data)
cursor += 1
node = node.next
return new_list

def __len__(self):
return self.length

def is_empty(self) -> bool:
return self.length == 0

def clear(self):
self.head = None
self.length = 0

def append(self, item: Any):
# 在列表最后面添加一个数据
new_node: Node = Node(item)
if not self.head:
self.head = new_node
else:
node: Node = self.head
while node.next:
node = node.next
node.next = new_node
new_node.prev = node

def __getitem__(self, index: int) -> Any:
self._check_index(index)

cursor: int = 0
node: Node = self.head

while node:
node = node.next
cursor += 1
if cursor == index:
return node.data

raise ValueError('target is not exist!')

def _check_index(self, index: int):
if self.is_empty() or index < 0 or index > self.length:
raise ValueError(f"index:{index} error")

def __setitem__(self, index: int, item: Any):
"""单独实现替换, 节省一遍查询"""
self._check_index(index)

node: Node = self.head
if index == 0:
new_node: Node = node.next
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
cursor: int = 0
while node:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node.next)
prev_node.next = new_node
new_node.prev = prev_node

def insert(self, index: int, item: Any):
"""向指定位置插入数据"""
self._check_index(index)

if index == 0:
new_node: Node = Node(item, self.head)
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
node: Node = self.head
cursor: int = 0
while node:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node)
prev_node.next = new_node
new_node.next = node
new_node.prev = prev_node

def delete(self, index: int):
"""删除指定位置数据"""
self._check_index(index)

node: Node = self.head
# 如果是0的话,起点改为第二个数据
if index == 0:
new_node: Node = node.next
self.head = new_node
else:
cursor: int = 1
while node:
prev_node = node
node = node.next
cursor += 1
# 把下一个数据的指针,改为下下个数据的指针
if index == cursor:
prev_node.next = node.next
node.prev = prev_node

def __contains__(self, item: Any) -> bool:
"""查找元素是否在里面"""
if self.is_empty():
raise ValueError('Linklist is empty.')

node: Node = self.head
while node:
if item == node.data:
return True
node = node.next
return False

1.4循环链表

循环链表是特殊的双向链, 特殊之处在于链表最后一个数据的指针要指向第一个数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
from typing import Any, Optional


class Node(object):
"""创建数据和数据对应的指针"""
def __init__(self, val: Any, next_node: Optional["Node"] = None, prev_node: Optional["Node"] = None):
self.data: Any = val
self.next: Optional["Node"] = next_node
self.prev: Optional["None"] = prev_node


class LinkList(object):
def __init__(self):
self.head: Optional[Node] = None
self.length: int = 0

@classmethod
def from_list(cls, raw_list: list) -> "LinkList":
"""通过list写入数据"""
instance: "LinkList" = cls()
instance.head = Node(raw_list[0])
node: Node = instance.head

for i in raw_list[1:]:
new_node: Node = Node(i)
# 定义指针
node.next = new_node
new_node.prev = node
# 定义下一个数
node = new_node
node.next = instance.head
instance.head.prev = node
instance.length = len(raw_list)
return instance

def to_list(self, left: int = 0, right: int = -1) -> list:
"""获取列表的函数,如果没输入right时,则默认读取到最后面,如果没输入left和right则输出全部数据.left不能大于等于right"""
if self.is_empty():
raise ValueError('Linklist is empty.')
cursor: int = 0
node: Node = self.head
if right == -1:
right = self.length
if right > self.length or left >= right:
raise ValueError('right param error')

while node.next and cursor < left:
node = node.next
cursor += 1
new_list: list = []
while node and cursor <= right:
new_list.append(node.data)
if node.next is self.head:
break
cursor += 1
node = node.next
return new_list

def __len__(self):
return self.length

def is_empty(self) -> bool:
return self.length == 0

def clear(self):
self.head = None
self.length = 0

def append(self, item: Any):
# 在列表最后面添加一个数据
new_node: Node = Node(item)
if not self.head:
self.head = new_node
else:
node: Node = self.head
while node.next and node.next is not self.head:
node = node.next

node.next = new_node
new_node.prev = node
new_node.next = self.head
self.head.prev = new_node

def __getitem__(self, index: int) -> Any:
self._check_index(index)

cursor: int = 0
node: Node = self.head

while node and node.next is not self.head:
node = node.next
cursor += 1
if cursor == index:
return node.data

raise ValueError('target is not exist!')

def _check_index(self, index: int):
if self.is_empty() or index < 0 or index > self.length:
raise ValueError(f"index:{index} error")

def __setitem__(self, index: int, item: Any):
"""单独实现替换, 节省一遍查询"""
self._check_index(index)

node: Node = self.head
if index == 0:
new_node: Node = node.next
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
cursor: int = 0
while node and node.next is not self.head:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node.next)
prev_node.next = new_node
new_node.prev = prev_node

def insert(self, index: int, item: Any):
"""向指定位置插入数据"""
self._check_index(index)

if index == 0:
new_node: Node = Node(item, self.head)
self.head = new_node
elif index == self.length - 1:
self.append(item)
else:
node: Node = self.head
cursor: int = 0
while node and node.next is not self.head:
prev_node = node
node = node.next
cursor += 1

if index == cursor:
new_node = Node(item, node)
prev_node.next = new_node
new_node.next = node
new_node.prev = prev_node

def delete(self, index: int):
"""删除指定位置数据"""
self._check_index(index)

node: Node = self.head
# 如果是0的话,起点改为第二个数据
if index == 0:
new_node: Node = node.next
new_node.prev = self.head.prev
self.head = new_node
else:
cursor: int = 1
while node and node.next is not self.head:
prev_node = node
node = node.next
cursor += 1
# 把下一个数据的指针,改为下下个数据的指针
if index == cursor:
prev_node.next = node.next
node.prev = prev_node

def __contains__(self, item: Any) -> bool:
"""查找元素是否在里面"""
if self.is_empty():
raise ValueError('Linklist is empty.')

node: Node = self.head
while node:
if item == node.data:
return True
node = node.next
return False

1.5队列

队列,是一种先进先出的数据结构, 实际的应用场景很多,像抢手机时,有个排队页面,这里就用到队列排队
这里使用list这个数据结构封装成队列.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from typing import Any, Optional


class Node(object):
"""创建数据和数据对应的指针"""
def __init__(self, val: Any, node: Optional["Node"] = None):
self.data: Any = val
self.next: Optional["Node"] = node


class Queue(object):
def __init__(self, max_length: int):
# 定义队列长度
self.size: int = 0
self.max_length: int = max_length

# 定义首尾node
self.head_node: Optional[Node] = None
self.tail_node: Optional[Node] = None

def put(self, value: Any):
"""入队操作"""
if self.is_full():
raise Exception("queue is full")
elif self.is_empty():
node: Node = Node(value)
self.head_node = node
self.tail_node = node
else:
node: Node = Node(value)
self.tail_node.next = node
self.tail_node = node
self.size += 1

def get(self) -> Any:
# 出队操作,切片取数据是O(1),如果要使用remove复杂度为O(k)
if self.is_empty():
raise Exception("queue is empty")
else:
self.size -= 1
value: Any = self.head_node.data
self.head_node = self.head_node.next
return value

def __len__(self) -> int:
if self.is_empty():
raise Exception("queue is empty")
return self.size

def is_full(self) -> bool:
return self.size == self.max_length

def is_empty(self) -> bool:
return self.size == 0

@classmethod
def from_list(cls, raw_list: list) -> "Queue":
instance: "Queue" = cls(len(raw_list))
for i in raw_list:
instance.put(i)
return instance

def to_list(self) -> list:
if self.is_empty():
raise Exception("queue is empty")
_list: list = []
node: Node = self.head_node
while node:
_list.append(node.data)
node = node.next
return _list

1.6双端队列

正常的队列是只有一进一出, 比如右进左出, 但是双端队列可以左进左出, 右进右出, 右进左出, 左进右出. 如果把队列的底层实现看做单链表, 那么双端队列的实现就是循环链表, 只需要在队列的Node添加prev前驱指针, 并把最后一个Node的next指针指向head node, head node的prev前驱指针指向tail node即可

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from typing import Any, Optional


class Node(object):
"""创建数据和数据对应的指针"""
def __init__(self, val: Any, next_node: Optional["Node"] = None, prev_node: Optional["Node"] = None):
self.data: Any = val
self.next: Optional["Node"] = next_node
self.prev: Optional["None"] = prev_node


class Deque(object):
def __init__(self, max_length: int):
# 定义队列长度
self.size: int = 0
self.max_length: int = max_length

# 定义首尾node
self.head_node: Optional[Node] = None
self.tail_node: Optional[Node] = None

def put_left(self, value: Any):
if self.is_full():
raise Exception("queue is full")
elif self.is_empty():
node: Node = Node(value)
self.head_node = node
self.tail_node = node
else:
node: Node = Node(value, self.head_node, self.tail_node)
self.tail_node.next = node
self.head_node = node
self.size += 1

def put(self, value: Any):
"""入队操作"""
if self.is_full():
raise Exception("queue is full")
elif self.is_empty():
node: Node = Node(value)
self.head_node = node
self.tail_node = node
else:
node: Node = Node(value, self.head_node, self.tail_node)
self.tail_node.next = node
self.tail_node = node
self.size += 1

def get(self) -> Any:
# 出队操作,切片取数据是O(1),如果要使用remove复杂度为O(k)
if self.is_empty():
raise Exception("queue is empty")
else:
self.size -= 1
value: Any = self.head_node.data
self.head_node = self.head_node.next
return value

def pop(self) -> Any:
if self.is_empty():
raise Exception("queue is empty")
else:
self.size -= 1
value: Any = self.tail_node.data
self.tail_node = self.tail_node.prev
self.tail_node.next = self.head_node
self.head_node.prev = self.tail_node
return value

def __len__(self) -> int:
if self.is_empty():
raise Exception("queue is empty")
return self.size

def is_full(self) -> bool:
return self.size == self.max_length

def is_empty(self) -> bool:
return self.size == 0

@classmethod
def from_list(cls, raw_list: list) -> "Deque":
instance: "Deque" = cls(len(raw_list))
for i in raw_list:
instance.put(i)
return instance

def to_list(self) -> list:
if self.is_empty():
raise Exception("queue is empty")
_list: list = []
node: Node = self.head_node
while node:
_list.append(node.data)
if node.next is self.head_node:
break
node = node.next
return _list

1.7栈

栈可以看做是一个顺序表的订制版, 他只能对最后一个子项进行处理, 只能在最后添加子项或者弹出最后一个子项.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from typing import Any


class Stack(object):
"""在Python中使用list模拟线性表..."""

def __init__(self, max_length: int = 10, extend_num: int = 8):
self.max_length: int = max_length

# 当前有效的数组最长值
# 比如数组实际长度为20, 当只用了10个, 那么num为10
self.num: int = 0
# 扩容的长度
self.extend_num: int = extend_num
self.data = [None] * self.max_length

def is_empty(self) -> bool:
return self.num == 0

def is_full(self) -> bool:
return self.num == self.max_length

@classmethod
def from_list(cls, raw_list: list):
instance: "Stack" = cls()
for i in raw_list:
instance.append(i)
return instance

def to_list(self) -> list:
return self.data[: self.num]

def _extend(self):
self.data.extend([None] * self.extend_num)

def __len__(self):
return self.num

def append(self, value: Any):
if self.is_full():
# 如果满了则进行扩容
self._extend()
self.data[self.num] = value
self.num += 1

def pop(self) -> Any:
"""假删除, 只是把值往前挪, 缩小一个有效范围"""
if self.num - 1 < 0:
raise IndexError("pop from empty list")
else:
value: Any = self.data[self.num]
self.num -= 1
return value

2.二维数据结构–树

树是链表的一个变体, 但链表有一个节点有一个分叉时,这个链表就是树. 树有很简单的二叉树, 也有复杂的红黑树, 树的应用很多.
树有很多专属名词:

  • 路径 从树中一个结点到另一个结点之间的分支构成两个结点之间的路径.
  • 路径长度 路径上的分支数目称作路径长度。
  • 树的路径长度 树的路径长度就是从根节点到每一结点的路径长度之和。
  • 结点的带权路径长度 结点的带权路径长度就是从该结点到根节点之间的路径长度与结点上权的乘积。
  • 树的带权路径长度 树的带权路径长度就是树中所有叶子结点的带权路径长度之和,通常记做WPL。

2.1二叉树以及遍历

树以及遍历
图是一个二叉树以及遍历的说明, 可以看出各种遍历的规律:

  • 层级遍历 一层一层的遍历, 为了达到遍历左节点后能再遍历右节点, 需要利用队列的先进先出来实现.
  • 前序遍历 从跟节点开始, 如果有左节点就读左节点的数据, 没有则读右节点的数据
  • 中序遍历 优先读取最深子节点, 没有则读最深子节点的上一个节点, 再读上一个节点的右节点
  • 后序遍历 优先读取的最深子节点, 同时也保持优先读取左节点再读右节点的原则

我们根据图实现一个二叉树, 并实现各种遍历:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from queue import Queue
from typing import Any, Optional


class BinaryTreeNode(object):
def __init__(self, data: Any, left: Optional["BinaryTreeNode"] = None, right: Optional["BinaryTreeNode"] = None):
self.left: Optional["BinaryTreeNode"] = left
self.right: Optional["BinaryTreeNode"] = right
self.data: Any = data


class BinaryTreeHelper(object):
def __init__(self, root: BinaryTreeNode):
self.root: BinaryTreeNode = root

def pre_order(self, node: Optional[BinaryTreeNode] = None) -> list:
"""前序遍历"""
if node is None:
node = self.root
_list: list = []

if node is not None:
_list.append(node.data)
if node.left is not None:
_list.extend(self.pre_order(node.left))
if node.right is not None:
_list.extend(self.pre_order(node.right))
return _list

def in_order(self, node: Optional[BinaryTreeNode] = None) -> list:
"""中序遍历"""
if node is None:
node = self.root
_list: list = []
if node is not None:
if node.left is not None:
_list.extend(self.in_order(node.left))
_list.append(node.data)
if node.right is not None:
_list.extend(self.in_order(node.right))
return _list

def post_order(self, node: Optional[BinaryTreeNode] = None) -> list:
"""后序遍历"""
if node is None:
node = self.root
_list: list = []
if node is not None:
if node.left is not None:
_list.extend(self.post_order(node.left))
if node.right is not None:
_list.extend(self.post_order(node.right))
_list.append(node.data)
return _list

def level_order(self, node: Optional[BinaryTreeNode] = None) -> list:
"""层级遍历"""
if node is None:
node = self.root
_list: list = []

queue: "Queue" = Queue()
while node is not None:
_list.append(node.data)
if node.left is not None:
queue.put(node.left)
if node.right is not None:
queue.put(node.right)
if queue.empty():
node = None
else:
node = queue.get()
return _list


if __name__ == "__main__":
binary_tree: BinaryTreeNode = BinaryTreeNode(
"0",
BinaryTreeNode(
"1",
BinaryTreeNode(
"3",
BinaryTreeNode("7"),
BinaryTreeNode("8")
),
BinaryTreeNode(
"4",
BinaryTreeNode("9")
)
),
BinaryTreeNode(
"2",
BinaryTreeNode("5"),
BinaryTreeNode("6"),
)
)
binary_tree_helper: BinaryTreeHelper = BinaryTreeHelper(binary_tree)
print("层级遍历", binary_tree_helper.level_order())
print("前序遍历", binary_tree_helper.pre_order())
print("中序遍历", binary_tree_helper.in_order())
print("后序遍历", binary_tree_helper.post_order())

2.2哈弗曼树

哈弗曼树是树的应用之一
利用哈弗曼树做出了最早的压缩编码哈弗曼编码,根据每个字母的出现频率来定义他们如:
(‘C’, 2), (‘G’, 2), (‘E’, 3), (‘K’, 3), (‘B’, 4),(‘F’, 4), (‘I’, 4), (‘J’, 4), (‘D’, 5), (‘H’, 6),(‘N’, 6), (‘L’, 7), (‘M’, 9), (‘A’, 10),(‘O’,5)
按照频路形成由上到下,由左到右的二叉树,且左树的权为0,右树的权为1
利用哈弗曼树输出哈弗曼编码
代码来自于:https://www.2cto.com/kf/201503/385537.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
class Node:
def __init__(self,freq):
self.left = None
self.right = None
self.father = None
self.freq = freq
def isLeft(self):
return self.father.left == self
#create nodes创建叶子节点
def createNodes(freqs):
return [Node(freq) for freq in freqs]

#create Huffman-Tree创建Huffman树
def createHuffmanTree(nodes):
queue = nodes[:]
while len(queue) > 1:
queue.sort(key=lambda item:item.freq)
node_left = queue.pop(0)
node_right = queue.pop(0)
node_father = Node(node_left.freq + node_right.freq)
node_father.left = node_left
node_father.right = node_right
node_left.father = node_father
node_right.father = node_father
queue.append(node_father)
queue[0].father = None
return queue[0]
#Huffman编码
def huffmanEncoding(nodes,root):
codes = [''] * len(nodes)
for i in range(len(nodes)):
node_tmp = nodes[i]
while node_tmp != root:
if node_tmp.isLeft():
codes[i] = '0' + codes[i]
else:
codes[i] = '1' + codes[i]
node_tmp = node_tmp.father
return codes

if __name__ == '__main__':
#chars = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N']
#freqs = [10,4,2,5,3,4,2,6,4,4,3,7,9,6]
chars_freqs = [('C', 2), ('G', 2), ('E', 3), ('K', 3), ('B', 4),
('F', 4), ('I', 4), ('J', 4), ('D', 5), ('H', 6),
('N', 6), ('L', 7), ('M', 9), ('A', 10)]
nodes = createNodes([item[1] for item in chars_freqs])
root = createHuffmanTree(nodes)
codes = huffmanEncoding(nodes,root)
for item in zip(chars_freqs,codes):
print ('Character:%s freq:%-2d encoding: %s' % (item[0][0],item[0][1],item[1]))

3.图(Graph)

3.1图的遍历

来源于:http://www.cnblogs.com/yupeng/p/3414736.html

首先有一个概念:回溯
  回溯法(探索与回溯法)是一种选优搜索法,按选优条件向前搜索,以达到目标。但当探索到某一步时,发现原先选择并不优或达不到目标,就退回一步重新选择,这种走不通就退回再走的技术为回溯法,而满足回溯条件的某个状态的点称为“回溯点”。
深度优先算法:
(1)访问初始顶点v并标记顶点v已访问。
(2)查找顶点v的第一个邻接顶点w。
(3)若顶点v的邻接顶点w存在,则继续执行;否则回溯到v,再找v的另外一个未访问过的邻接点。
(4)若顶点w尚未被访问,则访问顶点w并标记顶点w为已访问。
(5)继续查找顶点w的下一个邻接顶点wi,如果v取值wi转到步骤(3)。直到连通图中所有顶点全部访问过为止。
广度优先算法:
(1)顶点v入队列。
(2)当队列非空时则继续执行,否则算法结束。
(3)出队列取得队头顶点v;访问顶点v并标记顶点v已被访问。
(4)查找顶点v的第一个邻接顶点col。
(5)若v的邻接顶点col未被访问过的,则col入队列。
(6)继续查找顶点v的另一个新的邻接顶点col,转到步骤(5)。直到顶点v的所有未被访问过的邻接点处理完。转到步骤(2)。

通俗点讲
假如有个房屋,房屋里有3间房,每间房有桌子,抽屉,床。而我们要的钥匙可能在桌子,抽屉,床上面。
而钥匙放在抽屉的概率>桌子>床
深度优先遍历就是先找一间房间,找完房间里的桌子,抽屉,床后再找另外一个房间
广度优先遍历则是先找每间房间里的抽屉,找完后再找每间房间的桌子,找完后再找每间房间的床
ps广度优先遍历类似于树的层级遍历
实现代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
class Graph(object):

def __init__(self,*args,**kwargs):
self.node_neighbors = {}

def add_nodes(self,nodelist):
#批量添加节点
for node in nodelist:
self.add_node(node)

def add_node(self,node):
#添加节点
if not node in self.nodes():
self.node_neighbors[node] = []

def add_edge(self,edge):
#定义节点的邻接点(双向的)
u,v = edge
if(v not in self.node_neighbors[u]) and ( u not in self.node_neighbors[v]):
self.node_neighbors[u].append(v)

if(u!=v):
self.node_neighbors[v].append(u)

def nodes(self):
#返回节点
return self.node_neighbors.keys()

def depth_first_search(self,root=None):
#深度遍历
visited = {}
order = [] #记录遍历好的节点
def dfs(node):
visited[node] = True #代表该节点标记为读取过了
order.append(node) #记录
for n in self.node_neighbors[node]:
if not n in visited:
dfs(n) #取出第一个节点,利用回调,遍历该节点的第一个邻节点


if root:
dfs(root)

#如果定义了开始节点,此步骤就是最后循环一遍,把未遍历的点再遍历一次
#如果没定义节点,此步骤就是记录要遍历的点
for node in self.nodes():
if not node in visited:
dfs(node)

print (order)

return order

def breadth_first_search(self,root=None):
#广度遍历
visited = {}
queue = [] #记录要遍历的节点
order = [] #记录遍历好的节点
def bfs():
while len(queue)> 0:
node = queue.pop(0) #取出顶点
visited[node] = True #代表该节点标记为读取过了
for n in self.node_neighbors[node]: #self.node_neighbors[node]为读取对应key的value也就是读取节点对应的邻节点
if (not n in visited) and (not n in queue):
#not n in self.visited判断这个节点是否读取过,not n in queue用来对节点对应的邻节点去重
queue.append(n) #添加要遍历的节点
order.append(n) #记录遍历好的节点

if root:
queue.append(root)
order.append(root)
bfs()

#如果定义了开始节点,此步骤就是最后循环一遍,把未遍历的点再遍历一次
#如果没定义节点,此步骤就是记录要遍历的点
for node in self.nodes():
if not node in visited:
queue.append(node)
order.append(node)
bfs()
print (order)

return order


if __name__ == '__main__':
g = Graph()
g.add_nodes([i+1 for i in range(8)])
g.add_edge((1, 2))
g.add_edge((1, 3))
g.add_edge((2, 4))
g.add_edge((2, 5))
g.add_edge((4, 8))
g.add_edge((5, 8))
g.add_edge((3, 6))
g.add_edge((3, 7))
g.add_edge((6, 7))
print ("nodes:", g.nodes())

order = g.breadth_first_search(1)
order = g.depth_first_search(1)

3.2图的路径

代码来于:http://www.cnblogs.com/yupeng/p/3414569.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def find_path(graph, start, end, path=[]):
path = path + [start]
if start == end:
return path
if not start in graph:
return None
for node in graph[start]:
if node not in path:
newpath = find_path(graph, node, end, path)
if newpath:
return newpath
return None

def find_all_paths(graph, start, end, path=[]):
path = path + [start]
if start == end:
return [path]
if not start in graph:
return []
paths = []
for node in graph[start]:
if node not in path:
newpaths = find_all_paths(graph, node, end, path)
for newpath in newpaths:
paths.append(newpath)
return paths

def find_shortest_path(graph, start, end, path=[]):
path = path + [start]
if start == end:
return path
if not start in graph:
return None
shortest = None
for node in graph[start]:
if node not in path:
newpath = find_shortest_path(graph, node, end, path)
if newpath:
if not shortest or len(newpath) < len(shortest):
shortest = newpath
return shortest

graph = {'A': ['B', 'C'],
'B': ['C', 'D'],
'C': ['D'],
'D': ['C'],
'E': ['F'],
'F': ['C']}

print(find_path(graph,'A','C'))
print(find_all_paths(graph,'A','C'))
print(find_shortest_path(graph,'A','C'))

其中遍历的最关键代码在于:

1
2
3
4
5
6
7
8
9
#1        path = path + [start]
#2 if start == end:
#3 return [path]
#4 if not start in graph:
#5 return []
#6 paths = []
#7 for node in graph[start]:
#8 if node not in path:
#9 newpaths = (对应自己的函数,再次遍历)(graph, node, end, path)

这段代码采用深度遍历
理解的顺序如下
步骤0. #4如果终点不在图里面,直接返回空路径表
步骤1. #1把start添加到路径列表(末尾添加)
步骤2. #7遍历start对应字典的value
步骤3. #8判断value是否在路径列表
步骤4. #9回调函数,但起点已经换回刚才的value
步骤5. #2判断这个value是不是等于自己要的结束点,如果是就是一条路径,否则继续遍历

对于find_path。如果找到路径就直接返回
对于find_all_paths。找到了路径就添加到另一个列表里面,直到遍历完整个图
对于find_shortest_path。找到路径就跟之前的路径对比,如果长度短的就保存,遍历结束后,返回最后一条保存的路径

查看评论