数据结构 哈夫曼编码

一、需求分析

功能需求:

设计并实现一个写一个哈夫曼码的编/译码系统,系统功能包括:
(1)I:初始化(Initialization)。从终端读入字符集大小 n,以及 n 个字符和 n 个权值,建立哈夫曼树,并将它存于文件 hfmTree 中;
(2)E:编码(Encoding)。利用以建好的哈夫曼树(如不在内存,则从文件 hfmTree 中读入),对文件ToBeTran 中的正文进行编码,然后将结果存入文件CodeFile 中;
(3)D:译码(Decoding)。利用已建好的哈夫曼树将文件 CodeFile 中的代码进行译码,结果存入文件TextFile 中;
(4)P:印代码文件(Print)。将文件 CodeFile 以紧凑格式显示在终端上,每行 50 个代码。同时将此字符形式的编码文件写入文件 CodePrin 中;
(5)T:印哈夫曼树(Tree printing)。将已在内存中的哈夫曼树以直观的方式(树或凹入表形式)显示在终端上,同时将此字符形式的哈夫曼树写入文件TreePrint 中。

界面需求:

可以输入哈夫曼编码所需的字符及权值,打印出哈夫曼树结构和各个字符的哈夫曼编码。读取字符串可以进行编码和解码并输出,输出在TXT文件中展示

二、概要设计

1

模块层次结构设计

Initialhuffman()进行初始化
Encoding()进行编码
Decoding()进行解码
Codeprint()将CodeFile.txt的数据紧凑打印在终端上

接口设计

int MinVal(HuffmanTree tree,int i);//在前i个节点中选择parent为-1且weight最小的结点,返回其序号
void Select(HuffmanTree tree,int i,int *s1,int *s2);//设定s1序号更小
void Display(HuffmanTree tree);//打印哈夫曼树并将其输出至TreePrint.txt(直观)和hfmTree.txt(供读取)文件
int huffcode(HuffmanTree tree, huffmanCodex code);//编码并写入文件中
int incode(HuffmanTree tree, huffmanCodex code); //对每一个字符进行编码
int decode(HuffmanTree tree);//用树对CodeFile.txt进行解码并输出至TextFile.txt中
void CodePrint();//将CodeFile.txt中的数据改为50个一行并存入CodePrin.txt中

数据结构设计

struct HTNode{
	char ch;
	int weight,lchild,rchild,parent;
};//哈夫曼树结点结构

typedef struct{
        struct HTNode ht[1024];
        int htsize;
}Huffman,*HuffmanTree;//哈夫曼树结构

typedef struct {
	char bits[50];
	char ch;
}huffmanCode,*huffmanCodex;//哈夫曼编码结构

三、详细设计

Initialhuffman.cpp内容如下:

#include<stdio.h>
#include <stdlib.h>
#include<string.h>

struct HTNode{
	char ch;
	int weight,lchild,rchild,parent;
};//哈夫曼树结点结构

typedef struct{
        struct HTNode ht[1024];
        int htsize;
}Huffman,*HuffmanTree;//哈夫曼树结构

//在前i个节点中选择parent为-1且weight最小的结点,返回其序号 
int MinVal(HuffmanTree tree,int i){
	int j,k,min=-1;
	for(j=0;j<i;j++){
		if(tree->ht[j].parent==-1&&min==-1){
			min=tree->ht[j].weight; //将第一个双亲为空的点置为最小值 
			k=j;
			break;
		}
	}
	for(j=0;j<i;j++){
		if(tree->ht[j].parent==-1&&tree->ht[j].weight<min){
			min=tree->ht[j].weight;
			k=j;
		}
	}
	tree->ht[k].parent=-2;//置双亲非空 
	
	return k;
}

void Select(HuffmanTree tree,int i,int *s1,int *s2){//设定s1序号更小 
	int s;
	
	*s1 = MinVal(tree,i);
	*s2 = MinVal(tree,i);
	if(*s1>*s2){
		s=*s1;
		*s1=*s2;
		*s2=s;
	}
}

void Display(HuffmanTree tree){
	int i;
	//定义文件指针
	FILE *f = NULL,*fp = NULL;
	//打开文件
	f = fopen("TreePrint.txt","w");
	fp =fopen("hfmTree.txt","w");
	if(f==NULL||fp==NULL)
	{
	printf("文件读取失败!\n");
	
	}
	char buf[1024];
	//写文件
	strcpy(buf,"下标    字符     权值    左孩子    右孩子   双亲\n");
	fputs(buf,f);
	fprintf(fp,"%d\n",tree->htsize);
	printf("下标    字符     权值    左孩子    右孩子   双亲\n") ; 
	for(i=1;i<=tree->htsize;i++){//将已在内存中的哈夫曼树以直观的方式(树或凹入表形式)显示在终端上,同时将此字符形式的哈夫曼树写入文件TreePrint中

		fprintf(f,"%d",i); 
		fprintf(f,"%12c",tree->ht[i].ch);
		fprintf(f,"%12d",tree->ht[i].weight);
		fprintf(f,"%12d",tree->ht[i].lchild);
		fprintf(f,"%12d",tree->ht[i].rchild);
		fprintf(f,"%12d\n",tree->ht[i].parent);
		fprintf(fp,"%d ",i); 
		fprintf(fp,"%c " ,tree->ht[i].ch);
		fprintf(fp,"%d ",tree->ht[i].weight);
		fprintf(fp,"%d ",tree->ht[i].lchild);
		fprintf(fp,"%d ",tree->ht[i].rchild);
		fprintf(fp,"%d\n",tree->ht[i].parent);
		fprintf(f,"_______________________________________________\n");
		printf("%d",i); 
		printf("%9c",tree->ht[i].ch);
		printf("%9d",tree->ht[i].weight);
		printf("%9d",tree->ht[i].lchild);
		printf("%9d",tree->ht[i].rchild);
		printf("%9d\n",tree->ht[i].parent);
		printf("_______________________________________________\n");
	}	
	fclose(f);//关闭文件
} 


int main(){//初始化(Initialization)。从终端读入字符集大小n,以及n个字符和n个权值,建立哈夫曼树,并将它存于文件hfmTree中
	int n,i;
	printf("请输入字符集大小:");
    scanf("%d",&n);
    getchar();
    int total = 2 * n -1;
    HuffmanTree tree = (HuffmanTree)malloc(total * sizeof(Huffman));
    tree->htsize = total;
	for(i=1;i<=n;i++){//下标从1开始 
    	printf("请输入字符%d:",i);
    	scanf("%c",&tree->ht[i].ch);
    	getchar();
    	printf("请输入该字符的权重:");
		scanf("%d",&tree->ht[i].weight);
		getchar();
        tree->ht[i].lchild=-1;
        tree->ht[i].rchild=-1;
        tree->ht[i].parent=-1;
       
    }
    int s1,s2; 
    for(;i<=tree->htsize;++i){
    	Select(tree,i,&s1,&s2);
    	//将两个最小结点合并成新节点,并接在静态链表后 
    	tree->ht[s1].parent=tree->ht[s2].parent=i;
		tree->ht[i].lchild=s1;
		tree->ht[i].rchild=s2;
		tree->ht[i].parent=-1;
		tree->ht[i].weight=tree->ht[s1].weight+tree->ht[s2].weight;
		tree->ht[i].ch=NULL; //非树叶结点无字符 
	
	}
	Display(tree);

	return 0;
} 

Encoding.cpp内容如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 16
struct HTNode{
	char ch;
	int weight,lchild,rchild,parent;
};//哈夫曼树结点结构

typedef struct{
        struct HTNode *ht;
        int htsize;
}Huffman,*HuffmanTree;//哈夫曼树结构


typedef struct {
	char bits[50];
	char ch;
}huffmanCode,*huffmanCodex;//哈夫曼编码结构 


void Display(HuffmanTree tree){
	int i;
	printf("下标    字符     权值    左孩子    右孩子   双亲\n") ; 
	for(i=1;i<=tree->htsize;i++){
		printf("%d",i); 
		printf("%9c",tree->ht[i].ch);
		printf("%9d",tree->ht[i].weight);
		printf("%9d",tree->ht[i].lchild);
		printf("%9d",tree->ht[i].rchild);
		printf("%9d\n",tree->ht[i].parent);
		printf("_______________________________________________\n");
	}	
} 

//对每一个字符进行编码 
int incode(HuffmanTree tree, huffmanCodex code)
{
	int i, parent, child;
	huffmanCode temp;
	char cd[tree->htsize];
	int start;//记录尾结点 
	for (i = 1; i <= (tree->htsize+1)/2; i++) {
		child = i;
		temp.ch=tree->ht[i].ch;
		parent = tree->ht[child].parent;
		start = tree->htsize;
		while (parent != -1) {//从叶子到根逆向求编码 
			if (tree->ht[parent].lchild == child) {
				cd[--start] = '0';
			}
			else {
				cd[--start] = '1';
			}
			child = parent;
			parent = tree->ht[child].parent;
		}
		int j;
		//将序列倒序储存 
		for(j=start;j<tree->htsize;j++){
			temp.bits[j-start]=cd[j];
		}
		temp.bits[j-start]='\0'; //在末尾加上标识符 
		code[i] = temp;
	}

}


//编码并写入文件中 
int huffcode(HuffmanTree tree, huffmanCodex code)
{
	char c;
	int j, index;
	FILE *write;
	char buf[1024];  //缓冲区
	FILE *fp;            //文件指针
 	int len;             //行字符个数
 	if((fp = fopen("ToBeTran.txt","r")) == NULL)
 	{
 		printf("fail to read");
 		return 0;
 	}

 	while(fgets(buf,1024,fp) != NULL)
 	{
 		len = strlen(buf);
 		buf[len] = '\0';  //去掉换行符
 		printf("%s %d \n",buf,len);
 	}
 	fclose(fp);
	if ((write = fopen("CodeFile.txt", "w")) == NULL) {
		printf("文件读取失败\n");
		return 0;
	}
	int i;
	for(j=0;j<len;j++){
		//printf("测试点2\n");
		for(i=1;i<(tree->htsize+1)/2;i++){			
			if(buf[j]==code[i].ch){					//逐个比较并输出编码 
				fputs(code[i].bits,write);
				break;	
			}
		}
	}
	fclose(fp);
	fclose(write);
	return 0;
}


int main(){//利用以建好的哈夫曼树,对文件ToBeTran 中的正文进行编码,然后将结果存入文件CodeFile中

	char buf[1024];  //缓冲区
	FILE *fp;            //文件指针
 	int len;             //行字符个数
 	if((fp = fopen("ToBeTran.txt","r")) == NULL)
 	{
 		printf("fail to read");
 		return 0;
 	}

 	while(fgets(buf,1024,fp) != NULL)
 	{
 		len = strlen(buf);
 		buf[len] = '\0';  //去掉换行符
 		printf("ToBeTran中的内容为:%s 长度:%d \n",buf,len );
 	}
 	fclose(fp);
 	FILE *f=NULL;
    f=fopen("hfmTree.txt","r");
    if(!f)
    {
        printf("error!\n");
        return 0;
    }
    int n;
	fscanf(f,"%d\n",&n);
	HuffmanTree tree = (HuffmanTree)malloc(n * sizeof(Huffman));

	tree->htsize=n;
	int i,j; 
	//读取哈夫曼树 
	for(i=1;i<=tree->htsize;i++){
		fscanf(f,"%d ",&j);
		fscanf(f,"%c %d %d %d %d\n",&tree->ht[i].ch,&tree->ht[i].weight,&tree->ht[i].lchild,&tree->ht[i].rchild,&tree->ht[i].parent);
	
	}
	printf("读入的哈夫曼树为:\n"); 
	Display(tree);
    fclose(f);
    
    huffmanCode code[tree->htsize];
    //huffmanCode *code = (huffmanCodex)malloc(n * sizeof(huffmanCode));;
   // huffmanCode temp;
	//strcpy(temp.bits,"asd");
	//code[1]=temp;
	incode(tree,code);
	printf("各个字符的编码为:\n");
    for(i=1;i<=8;i++){
    	printf("%c\n",code[i].ch);
    	puts(code[i].bits);
	}
	
	huffcode(tree,code);
	
   

  return 0;
}

Decoding.cpp内容如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 16
struct HTNode{
	char ch;
	int weight,lchild,rchild,parent;
};//哈夫曼树结点结构

typedef struct{
        struct HTNode *ht;
        int htsize;
}Huffman,*HuffmanTree;//哈夫曼树结构


typedef struct {
	char bits[50];
	char ch;
}huffmanCode,*huffmanCodex;//哈夫曼编码结构 

void Display(HuffmanTree tree){
	int i;
	printf("下标    字符     权值    左孩子    右孩子   双亲\n") ; 
	for(i=1;i<=tree->htsize;i++){
		printf("%d",i); 
		printf("%9c",tree->ht[i].ch);
		printf("%9d",tree->ht[i].weight);
		printf("%9d",tree->ht[i].lchild);
		printf("%9d",tree->ht[i].rchild);
		printf("%9d\n",tree->ht[i].parent);
		printf("_______________________________________________\n");
	}	
} 

int decode(HuffmanTree tree)
{
	int i,len;
	char b;
	FILE *fp=NULL; 
	FILE *write=NULL;
	char buf[1024];  //缓冲区
	if((fp = fopen("CodeFile.txt","r")) == NULL){
 		printf("fail to read");
 		return 0;
 	}

 	while(fgets(buf,1024,fp) != NULL)
 	{
 		len = strlen(buf);
 		buf[len] = '\0';  //去掉换行符
 		printf("读入CodeFile为:%s 长度为:%d \n",buf,len);
 	}
 	fclose(fp);
 	int j;
	if ((write = fopen("TextFile.txt", "w")) == NULL) {
		printf("文件读取失败\n");
		return 0;
	}

	
	i = tree->htsize;
	printf("CodeFile的译码为:");
	for(j=0;j<len;j++){
		b=buf[j];
		if (b == '0'){//等于0时往左子树找 
			i = tree->ht[i].lchild;
		}
			
		else if (b == '1'){//等于1时往右子树找 
			i = tree->ht[i].rchild;
		}

		if (tree->ht[i].lchild == -1)//找到叶子节点后输出 
		{
			fprintf(write, "%c", tree->ht[i].ch);
			printf("%c", tree->ht[i].ch);
			i = tree->htsize;//返回根节点 
		}
	} 
	

	if (tree->ht[i].lchild != -1 && i != tree->htsize) {
		printf("电文有错\n");
	}
	fclose(write);
	return 0;
}


int main(){
	FILE *f=NULL;
    f=fopen("hfmTree.txt","r");
    if(!f)
    {
        printf("error!\n");
        return 0;
    }
    int n;
	fscanf(f,"%d\n",&n);
	HuffmanTree tree = (HuffmanTree)malloc(n * sizeof(Huffman));

	tree->htsize=n;
	int i,j; 
	//读取哈夫曼树 
	for(i=1;i<=tree->htsize;i++){
		fscanf(f,"%d ",&j);
		fscanf(f,"%c %d %d %d %d\n",&tree->ht[i].ch,&tree->ht[i].weight,&tree->ht[i].lchild,&tree->ht[i].rchild,&tree->ht[i].parent);
	
	}
    fclose(f);
    
    huffmanCode code[tree->htsize];
	decode(tree);
   

  return 0;
}

CodePrint.cpp内容如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define N 16
struct HTNode{
	char ch;
	int weight,lchild,rchild,parent;
};//哈夫曼树结点结构

typedef struct{
        struct HTNode *ht;
        int htsize;
}Huffman,*HuffmanTree;//哈夫曼树结构


typedef struct {
	char bits[50];
	char ch;
}huffmanCode,*huffmanCodex;//哈夫曼编码结构 


int main(){//将文件 CodeFile 以紧凑格式显示在终端上,每行 50个代码。同时将此字符形式的编码文件写入文件 CodePrin中

	int i,len;
	FILE *fp=NULL; 
	FILE *print=NULL; 
	char buf[1024];  //缓冲区
	if((fp = fopen("CodeFile.txt","r")) == NULL){
 		printf("fail to read");
 		return 0;
 	}
 		if((print = fopen("CodePrin.txt","w")) == NULL){
 		printf("fail to read");
 		return 0;
 	}


 	while(fgets(buf,1024,fp) != NULL)
 	{
 		len = strlen(buf);
 		buf[len] = '\0';  //去掉换行符
 		printf("读入CodeFile为:%s 长度为:%d \n",buf,len);
 	}
 	fclose(fp);
 	int j;
 	for(j=0;j<len;j++){
 		fprintf(print,"%c",buf[j]);
 		if((j+1)%50==0&&j!=0){//到五十个了换一次行 
 			fprintf(print,"\n");
		 }
 		
	 }
	 fclose(print);
	 return 0;
}

四、调试分析

1.建立哈夫曼树的第i个结点需要在前i-1个结点中挑选出两个无双亲且根结点权值最小的子树。
2.对每个字符求编码,是从树叶结点出发自下而上搜索至根结点的过程。
3.译码过程是自上而下搜索的过程,搜索到树叶结点时则确定了二进制电文对应的一个字符。

五、用户手册

本程序经过Dev-C++编译,运行环境为Windows 10.
1、本程序的执行文件为:Initialhuffman.exe,Encoding.exe, Decoding.exe, CodePrint.exe
2、打开Initialhuffman.exe,输入字符集大小之后回车,然后分别输入各个字符及权值。
将会生成哈夫曼树并存入TreePrint.txt中
3.在ToBeTran.txt中输入想要编码的字符串,然后打开Encoding.exe进行编码,可以在CodeFile.txt中查看编码结果
4.打开Decoding.exe进行译码,可以在TextFile.txt中查看译码结果
5.使用CodePrint.exe可以将CodeFile.txt中的内容变成50个字符一行。

六、测试结果

  1. 打开Initialhuffman.exe,输入5个字符和权值测试
    5
    q 1
    w 2
    e 4
    r 5
    t 12
    2
    3
    2.在ToBeTran.txt中输入qwetrewqrrrwwwqqrqrt,运行Encoding.exe
    4
    5
    3.运行Decoding.exe
    6
    4.运行CodePrint.exe进行换行7

七、附录

源程序文件名清单:Initialhuffman.cpp,Encoding.cpp,Decoding.cpp,CodePrint.cpp,
CodePrint.exe, Initialhuffman.exe,Encoding.exe,Decoding.exe, CodeFile.txt, CodePrin.txt, hfmTree.txt, TextFile.txt, ToBeTran.txt, TreePrint.txt


版权声明:本文为m0_48443990原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。