这题目看起来挺大的,其实细细分析起来,主要是这么几个。首先,如何根据各个字符出现的频率,求出WPL;其次,如何比较学生的输入是否是正确的最优编码。
对第一个问题,其实可以发现,在利用WPL最优算法构造Huffman Tree的时候,除了叶节点外的所有节点的频率之和就是WPL(证明略)。因此,只要写个最小堆,塞进去之后,拿出两个最小值来,求和,然后重复。只要把这些和都加起来,就是WPL了。
第二个问题,有这么几个方面需要考察。首先,是否输入了其他的字符;其次,得到的编码长度是否是WPL;最后,是否有某个字符的编码是另一个字符编码的前缀。对于第一个和第二个,将最开始输入的字符以及频率保存下来,之后每次将学生的答案输入时,都对学生输入的字符进行遍历,找到对应的频率(没有就表示输入其他字符),然后将频率与对应的编码长度相乘,累加求和,得到最终编码长度,在和WPL相比即可。找前缀,需要将学生输入的元素两两比较(两个for循环),在较长的编码中需找较短的编码(strstr(a,b)函数,在字符串a中寻找b,若有,返回从第一个字符串b开始直到末尾,没有则返回NULL),若返回的仍是较长字符串,则为前缀,否则不是。这样,整个问题就被解决了。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int Maxsize=0;
typedef struct _node{
int f;
char c;
}Node;
typedef struct _code{
char c;
char *code;
}Code;
typedef struct _heap{
Node *p;
int nowsize;
}Heap;
void insert_heap(Heap *heap,Node record);
Node dele_heap(Heap *heap);
int cnt_wpl(Heap *heap);
int find_flu(Node *record,char c);
int check(char a[],char b[]);
int main(){
scanf("%d",&Maxsize);
Heap *heap=(Heap*)malloc(sizeof(Heap)); //创建堆,为了哈夫曼树
heap->nowsize=0;
heap->p=(Node*)malloc((Maxsize+1)*sizeof(Node));
heap->p[0].f=-10000;
int wpl;
Node *record=(Node*)malloc((Maxsize)*sizeof(Node));//存储字符以及出现的频率
int i,j;
for(i=0;i<Maxsize;i++){
getchar();
scanf("%c",&record[i].c);
scanf("%d",&record[i].f);
insert_heap(heap,record[i]);
}
wpl=cnt_wpl(heap);//计算哈夫曼编码的最优WPL
// printf("%d\n",wpl);
// for(i=0;i<Maxsize;i++){
// printf("%c %d\n",record[i].c,record[i].f);
// }
Code stcode[Maxsize]; //记录学生输入的字符以及对应的编码。
int flu=0;
int flag=0;
int stwpl=0;
int number=0;
int k;
scanf("%d",&number);
for(k=0;k<number;k++){
stwpl=0;
for(i=0;i<Maxsize;i++){
getchar();
stcode[i].code=(char*)malloc(Maxsize*sizeof(char));
scanf("%c %s",&stcode[i].c,stcode[i].code);
flu=find_flu(record,stcode[i].c);
printf("flu= %d\n",flu);
if(flu==-1){
flag=-1; //表示输入了原本不存在的字符
}
else
{
stwpl=flu*strlen(stcode[i].code)+stwpl; //计算学生输入编码的WPL
}
}
int flag2=0;
for(i=0;i<Maxsize;i++){
for(j=i+1;j<Maxsize;j++){
flag2=check(stcode[i].code,stcode[j].code);
if(flag2){
break;
}
}
if(flag2){
break;
}
}
if(flag==-1){
printf("DIF CHAR No\n");
}
else
{
if(stwpl!=wpl){
printf("DIF LENGNo\n");
}else{
if(flag2){
printf("REPET No\n");
}
else
{
printf("Yes\n");
}
}
}
}
return 0;
}
int check(char a[],char b[]){ //测试b是否是a的前缀
char *big;
char *small;
if(strlen(a)>=strlen(b)){
big=a;small=b;
}
else
{
big=b;small=a;
}
return strstr(big,small)==big;
}
int find_flu(Node *record,char c){ //寻找字符c对应的频率,输入不存在字符返回-1
Node *p=record;
int i=0;
int flag=-1;
for(i=0;i<Maxsize;i++){
if(p[i].c==c){
flag=p[i].f;
break;
}
}
return flag;
}
int cnt_wpl(Heap *heap){ //计算WPL
int i;
int record=0;
int size=heap->nowsize;
Node a1;
Node a2;
Node b1;
for(i=0;i<size-1;i++){
a1=dele_heap(heap);
a2=dele_heap(heap);
b1.f=a1.f+a2.f;
b1.c='*';
record+=b1.f;
insert_heap(heap,b1);
}
return record;
}
Node dele_heap(Heap *heap){ //出堆
if(heap->nowsize==0){
printf("Heap is empty");
return;
}
else
{
Node record;
record=heap->p[1];
Node tmp=heap->p[heap->nowsize];
heap->nowsize-=1;
int pa=1,ch=0;
for(pa=1;2*pa<=heap->nowsize;pa=ch){ //,从顶向下过滤,注意终止条件
ch=2*pa;
if(ch != heap->nowsize && (heap->p[ch].f > heap->p[ch+1].f )) //注意第一个判断条件
{
ch++;
}
if(tmp.f < heap->p[ch].f){
break;
}else{
heap->p[pa]=heap->p[ch];
}
}
heap->p[pa]=tmp;
return record;
}
}
void insert_heap(Heap *heap,Node record){ //入堆 ,从底向上过滤
if(heap->nowsize==0 ){
heap->p[1]=record;
heap->nowsize+=1;
}
else
{
int i=heap->nowsize+1;
for(;heap->p[i/2].f > record.f;i=i/2){
heap->p[i]=heap->p[i/2];
}
heap->p[i]=record;
heap->nowsize+=1;
}
}05-树9 Huffman Codes (30分)
In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the Ndistinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, andf[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by Mstudent submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No