forked from algorithmzuo/algorithmbasic2020
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCode05_HuffmanTree.java
More file actions
214 lines (196 loc) · 6.11 KB
/
Code05_HuffmanTree.java
File metadata and controls
214 lines (196 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
package class46;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.PriorityQueue;
// 本文件不牵扯任何byte类型的转化
// 怎么转byte自己来,我只负责huffman算法本身的正确实现
// 字符串为空的时候,自己处理边界吧
// 实现的代码通过了大样本随机测试的对数器
// 可以从main函数的内容开始看起
public class Code05_HuffmanTree {
// 根据文章str, 生成词频统计表
public static HashMap<Character, Integer> countMap(String str) {
HashMap<Character, Integer> ans = new HashMap<>();
char[] s = str.toCharArray();
for (char cha : s) {
if (!ans.containsKey(cha)) {
ans.put(cha, 1);
} else {
ans.put(cha, ans.get(cha) + 1);
}
}
return ans;
}
public static class Node {
public int count;
public Node left;
public Node right;
public Node(int c) {
count = c;
}
}
public static class NodeComp implements Comparator<Node> {
@Override
public int compare(Node o1, Node o2) {
return o1.count - o2.count;
}
}
// 根据由文章生成词频表countMap,生成哈夫曼编码表
// key : 字符
// value: 该字符编码后的二进制形式
// 比如,频率表 A:60, B:45, C:13 D:69 E:14 F:5 G:3
// A 10
// B 01
// C 0011
// D 11
// E 000
// F 00101
// G 00100
public static HashMap<Character, String> huffmanForm(HashMap<Character, Integer> countMap) {
HashMap<Character, String> ans = new HashMap<>();
if (countMap.size() == 1) {
for (char key : countMap.keySet()) {
ans.put(key, "0");
}
return ans;
}
HashMap<Node, Character> nodes = new HashMap<>();
PriorityQueue<Node> heap = new PriorityQueue<>(new NodeComp());
for (Entry<Character, Integer> entry : countMap.entrySet()) {
Node cur = new Node(entry.getValue());
char cha = entry.getKey();
nodes.put(cur, cha);
heap.add(cur);
}
while (heap.size() != 1) {
Node a = heap.poll();
Node b = heap.poll();
Node h = new Node(a.count + b.count);
h.left = a;
h.right = b;
heap.add(h);
}
Node head = heap.poll();
fillForm(head, "", nodes, ans);
return ans;
}
public static void fillForm(Node head, String pre, HashMap<Node, Character> nodes, HashMap<Character, String> ans) {
if (nodes.containsKey(head)) {
ans.put(nodes.get(head), pre);
} else {
fillForm(head.left, pre + "0", nodes, ans);
fillForm(head.right, pre + "1", nodes, ans);
}
}
// 原始字符串str,根据哈夫曼编码表,转译成哈夫曼编码返回
public static String huffmanEncode(String str, HashMap<Character, String> huffmanForm) {
char[] s = str.toCharArray();
StringBuilder builder = new StringBuilder();
for (char cha : s) {
builder.append(huffmanForm.get(cha));
}
return builder.toString();
}
// 原始字符串的哈夫曼编码huffmanEncode,根据哈夫曼编码表,还原成原始字符串
public static String huffmanDecode(String huffmanEncode, HashMap<Character, String> huffmanForm) {
TrieNode root = createTrie(huffmanForm);
TrieNode cur = root;
char[] encode = huffmanEncode.toCharArray();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < encode.length; i++) {
int index = encode[i] == '0' ? 0 : 1;
cur = cur.nexts[index];
if (cur.nexts[0] == null && cur.nexts[1] == null) {
builder.append(cur.value);
cur = root;
}
}
return builder.toString();
}
public static TrieNode createTrie(HashMap<Character, String> huffmanForm) {
TrieNode root = new TrieNode();
for (char key : huffmanForm.keySet()) {
char[] path = huffmanForm.get(key).toCharArray();
TrieNode cur = root;
for (int i = 0; i < path.length; i++) {
int index = path[i] == '0' ? 0 : 1;
if (cur.nexts[index] == null) {
cur.nexts[index] = new TrieNode();
}
cur = cur.nexts[index];
}
cur.value = key;
}
return root;
}
public static class TrieNode {
public char value;
public TrieNode[] nexts;
public TrieNode() {
value = 0;
nexts = new TrieNode[2];
}
}
// 为了测试
public static String randomNumberString(int len, int range) {
char[] str = new char[len];
for (int i = 0; i < len; i++) {
str[i] = (char) ((int) (Math.random() * range) + 'a');
}
return String.valueOf(str);
}
// 为了测试
public static void main(String[] args) {
// 根据词频表生成哈夫曼编码表
HashMap<Character, Integer> map = new HashMap<>();
map.put('A', 60);
map.put('B', 45);
map.put('C', 13);
map.put('D', 69);
map.put('E', 14);
map.put('F', 5);
map.put('G', 3);
HashMap<Character, String> huffmanForm = huffmanForm(map);
for (Entry<Character, String> entry : huffmanForm.entrySet()) {
System.out.println(entry.getKey() + " : " + entry.getValue());
}
System.out.println("====================");
// str是原始字符串
String str = "CBBBAABBACAABDDEFBA";
System.out.println(str);
// countMap是根据str建立的词频表
HashMap<Character, Integer> countMap = countMap(str);
// hf是根据countMap生成的哈夫曼编码表
HashMap<Character, String> hf = huffmanForm(countMap);
// huffmanEncode是原始字符串转译后的哈夫曼编码
String huffmanEncode = huffmanEncode(str, hf);
System.out.println(huffmanEncode);
// huffmanDecode是哈夫曼编码还原成的原始字符串
String huffmanDecode = huffmanDecode(huffmanEncode, hf);
System.out.println(huffmanDecode);
System.out.println("====================");
System.out.println("大样本随机测试开始");
// 字符串最大长度
int len = 500;
// 所含字符种类
int range = 26;
// 随机测试进行的次数
int testTime = 100000;
for (int i = 0; i < testTime; i++) {
int N = (int) (Math.random() * len) + 1;
String test = randomNumberString(N, range);
HashMap<Character, Integer> counts = countMap(test);
HashMap<Character, String> form = huffmanForm(counts);
String encode = huffmanEncode(test, form);
String decode = huffmanDecode(encode, form);
if (!test.equals(decode)) {
System.out.println(test);
System.out.println(encode);
System.out.println(decode);
System.out.println("出错了!");
}
}
System.out.println("大样本随机测试结束");
}
}