本文是记录数据结构习题解析与实验指导的课后实验五—基于哈夫曼树的数据压缩算法。
1 实验内容
描述
输入一串字符串,根据给定的字符串中字符出现的频率建立相应哈夫曼树,构造哈夫曼编码表,在此基础上可以对待压缩文件进行压缩(即编码),同时可以对压缩后的二进制编码文件进行解压(即译码)。
输入
多组数据,每组数据一行,为一个字符串(只考虑26个小写字母即可)。当输入字符串为“0”时,输入结束。
输出
每组数据输出2n+3行(n为输入串中字符类别的个数)。第一行为统计出来的字符出现频率(只输出存在的字符,格式为:字符:频度),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第二行至第2n行为哈夫曼树的存储结构的终态(形如教材139页表5.2(b),一行当中的数据用空格分隔)。第2n+1行为每个字符的哈夫曼编码(只输出存在的字符,格式为:字符:编码),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第2n+2行为编码后的字符串,第2n+3行为解码后的字符串(与输入的字符串相同)。
样例输入1
aaaaaaabbbbbccdddd
aabccc
0
样例输出1
a:7 b:5 c:2 d:4
1 7 7 0 0
2 5 6 0 0
3 2 5 0 0
4 4 5 0 0
5 6 6 3 4
6 11 7 2 5
7 18 0 1 6
a:0 b:10 c:110 d:111
00000001010101010110110111111111111
aaaaaaabbbbbccdddd
a:2 b:1 c:3
1 2 4 0 0
2 1 4 0 0
3 3 5 0 0
4 3 5 2 1
5 6 0 3 4
a:11 b:10 c:0
111110000
aabccc
2 基本思路
这里可以参考课本P138–P141,给出的解法很详细。
3 数据结构代码实现
1.存储状态的数据结构:
2 树的创建
#include<stdio.h>
typedef struct Node
{
int weight;
int parent;
int lChild;
int rChild;
}node;
void initNode(node data[], int len)
{
for (int i = 0; i < len; ++i) {
data[i].parent = 0;
data[i].lChild = 0;
data[i].rChild = 0;
}
data[0].weight = 65535; //用于下方最小两个数的查找
}
void createTree(node data[], int len)
{
int s1 = 0, s2 = 0;
int solvedData[len/2];
int point = 0;
for (int i = len/2 + 1; i < len; ++i)
{
searchTwoNumber(data,i,&s1,&s2,solvedData,point);
point += 2;
data[s1].parent = i;
data[s2].parent = i;
data[i].weight = data[s1].weight + data[s2].weight;
data[i].lChild = s1;
data[i].rChild = s2;
}
}
int contains(int solvedData[], int i, int point)
{
int flag = 1;
for (int j = 0; j < point; ++j)
{
if (solvedData[j] == i)
return 0;
}
return flag;
}
void searchTwoNumber(node data[], int end, int *s1, int *s2, int solvedData[], int point)
{
int m = 0, n = 0;
for (int i = 1; i < end; ++i)
{
if (data[m].weight > data[i].weight && contains(solvedData,i,point) != 0)
{
n = m;
m = i;
}
else if (data[n].weight > data[i].weight && contains(solvedData,i,point) != 0)
{
n = i;
}
}
*s1 = m;
*s2 = n;
solvedData[point] = m;
solvedData[point + 1] = n;
}
void getCharData(char data[], int *len, int *p)
{
int i = 0;
while(data[i] != '\0')
{
p[data[i] - 97]++;
i++;
}
i = 0;
for (int j = 0; j < 26; j++) {
if (p[j] != 0) {
i++;
}
}
*len = i;
}
void show(node data[], int len)
{
for (int i = 1; i < len; ++i)
{
printf("%d %d %d %d %d\n",i,data[i].weight,data[i].parent,data[i].lChild,data[i].rChild);
}
}
int main()
{
char data[30] = {'a','a','b','c','c','c'};
int asc[26] = {0};
int len = 0;
//scanf("%s",data);
getCharData(data,&len,asc);
node test[len*2];
for (int i = 0, j = 1; i < 26; ++i)
{
if (asc[i] != 0)
{
test[j++].weight = asc[i];
printf("%c:%d ",i+97,asc[i]);
}
}
printf("\n");
initNode(test,len*2);
createTree(test,len*2);
show(test, len*2);
return 0;
}
首先是剥离字符串,获得不同字母的个数,并且将每个字母的个数存在asc数组中。
接着构造一个2*len的数组,用于存储,下标0不用,从1开始存储,然后利用asc数组,初始化weight.并且打印出题目要求输出的第一行。
接着初始化每个节点的weight,rChild,lChild,这里把下标为0的赋为无穷大,用于下方的查找。
然后创建树,创建树的过程中有一个查找两个最小数据的过程。这时就用到了data[0].weight.并且将查过的数据放到solvedData数组中,以便判断是否已经查找过。
3.利用构建好的树求解哈夫曼编码
void coding(node data[], int id)
{
char result[8];
char result2[8];
int position = id;
int point = 0;
int temp;
while (data[id].parent != 0)
{
temp = data[id].parent;
if (data[temp].lChild == id)
{
result[point++] = '0';
}
else if (data[temp].rChild == id)
{
result[point++] = '1';
}
id = temp;
}
point--;
int m = 0;
while(point >= 0)
{
result2[m++] = result[point];
point--;
}
result2[m] = '\0';
printf("%c:%s ",position+96,result2);
}
从叶子节点开始网上找,直到根节点。左子树记0,右子树记1,然后因为是从底向上,所以要反转。
4 全部代码
#include<stdio.h>
char code[100][100];
typedef struct Node
{
int weight;
int parent;
int lChild;
int rChild;
}node;
void initNode(node data[], int len)
{
for (int i = 0; i < len; ++i) {
data[i].parent = 0;
data[i].lChild = 0;
data[i].rChild = 0;
}
data[0].weight = 65535; //用于下方最小两个数的查找
}
void createTree(node data[], int len)
{
int s1 = 0, s2 = 0;
int solvedData[len/2];
int point = 0;
for (int i = len/2 + 1; i < len; ++i)
{
searchTwoNumber(data,i,&s1,&s2,solvedData,point);
point += 2;
data[s1].parent = i;
data[s2].parent = i;
data[i].weight = data[s1].weight + data[s2].weight;
data[i].lChild = s1;
data[i].rChild = s2;
}
}
int contains(int solvedData[], int i, int point)
{
int flag = 1;
for (int j = 0; j < point; ++j)
{
if (solvedData[j] == i)
return 0;
}
return flag;
}
void searchTwoNumber(node data[], int end, int *s1, int *s2, int solvedData[], int point)
{
int m = 0, n = 0;
for (int i = 1; i < end; ++i)
{
if (data[m].weight > data[i].weight && contains(solvedData,i,point) != 0)
{
n = m;
m = i;
}
else if (data[n].weight > data[i].weight && contains(solvedData,i,point) != 0)
{
n = i;
}
}
*s1 = m;
*s2 = n;
solvedData[point] = m;
solvedData[point + 1] = n;
}
void getCharData(char data[], int *len, int *p)
{
int i = 0;
while(data[i] != '\0')
{
p[data[i] - 97]++;
i++;
}
i = 0;
for (int j = 0; j < 26; j++) {
if (p[j] != 0) {
i++;
}
}
*len = i;
}
void show(node data[], int len)
{
for (int i = 1; i < len; ++i)
{
printf("%d %d %d %d %d\n",i,data[i].weight,data[i].parent,data[i].lChild,data[i].rChild);
}
}
void coding(node data[], int id)
{
char result[8];
char result2[8];
int position = id;
int point = 0;
int temp;
while (data[id].parent != 0)
{
temp = data[id].parent;
if (data[temp].lChild == id)
{
result[point++] = '0';
}
else if (data[temp].rChild == id)
{
result[point++] = '1';
}
id = temp;
}
point--;
int m = 0;
while(point >= 0)
{
result2[m++] = result[point];
point--;
}
result2[m] = '\0';
printf("%c:%s ",position+96,result2);
}
int main()
{
char data[30] = {'a','a','b','c','c','c'};
int asc[26] = {0};
int len = 0;
//scanf("%s",data);
getCharData(data,&len,asc);
node test[len*2];
for (int i = 0, j = 1; i < 26; ++i)
{
if (asc[i] != 0)
{
test[j++].weight = asc[i];
printf("%c:%d ",i+97,asc[i]);
}
}
printf("\n");
initNode(test,len*2);
createTree(test,len*2);
show(test, len*2);
for(int i = 1; i < len + 1; ++i)
{
coding(test, i);
}
printf("\n%s",data);
return 0;
}
注意: 由于对c字符数组的存储忘得有些厉害,所以这里的编码并没有存储起来,并且也是按字符顺序输出的。和题目要求不太一样,但是编码已知,只需存储起来,然后遍历字符串,然后进行输出即可。
如果看了有收获,可以点赞加关注😉,看计算机小白的成长之路。