Huffman哈夫曼树(霍夫曼树,赫夫曼树)在通信领域最主要的应用是数据编码。假设现在有A、B、C、D、E五个字符,它们出现的概率或者权值不同,从A到E,权值依次降低,那么就可以用哈夫曼最优二叉树对其进行编码。左边为0,右边为1,通信网络电气链路的传输数据是0或者1。
from binarytree import Node, get_parent
def app():
data = [(1, 'E'), (3, 'D'), (7, 'C'), (9, 'B'), (11, 'A')]
huffman_tree = build_huffman_tree(data)
print('-----')
print('最终的Huffman树')
root = huffman_tree.pop()
print(root)
print('---')
print(data)
travle(root, data)
def travle(root, data):
for leave in root.leaves:
print('==')
path = find_path(root, leave, data)
d = find_data(data, leave.value)
print('节点', d, '路径 ->', path)
def find_data(data, v):
for d in data:
if d[0] == v:
return d
def build_huffman_tree(data):
nodes = []
for i in data:
nodes.append(Node(i[0]))
print('nodes', nodes)
print('开始构建Huffman树')
while True:
nodes = sorted(nodes, key=lambda x: x.value)
# print('nodes', nodes)
if len(nodes) == 1:
# print('仅剩一个节点,建树结束')
break
left = nodes.pop(0)
right = nodes.pop(0)
# print('选取节点', left.value, right.value)
root = Node(left.value + right.value)
root.left = left
root.right = right
nodes.append(root)
return nodes
def find_path(root, node, data):
root_value = root.value
path = [node]
temp = node.value
codes = []
while True:
p = get_parent(root, node)
path.append(p)
if p.left.value == node.value:
codes.append(0)
elif p.right.value == node.value:
codes.append(1)
if p.value == root_value:
break
else:
node = p
list.reverse(codes)
s = ''.join(str(item) for item in codes)
print(find_data(data, temp), '字符', find_data(data, temp)[1], '编码:', s)
list.reverse(path)
return path
if __name__ == '__main__':
app()
输出:
nodes [Node(1), Node(3), Node(7), Node(9), Node(11)]
开始构建Huffman树
-----
最终的Huffman树
___31__
/ \
__11 20
/ \ / \
4 7 9 11
/ \
1 3
---
[(1, 'E'), (3, 'D'), (7, 'C'), (9, 'B'), (11, 'A')]
==
(7, 'C') 字符 C 编码: 01
节点 (7, 'C') 路径 -> [Node(31), Node(11), Node(7)]
==
(9, 'B') 字符 B 编码: 10
节点 (9, 'B') 路径 -> [Node(31), Node(20), Node(9)]
==
(11, 'A') 字符 A 编码: 11
节点 (11, 'A') 路径 -> [Node(31), Node(20), Node(11)]
==
(1, 'E') 字符 E 编码: 000
节点 (1, 'E') 路径 -> [Node(31), Node(11), Node(4), Node(1)]
==
(3, 'D') 字符 D 编码: 001
节点 (3, 'D') 路径 -> [Node(31), Node(11), Node(4), Node(3)]