哈夫曼编码算法 (Huffman coding Algorithm)

Test:

 

/// <summary>
       ///A test for DeCode
       ///</summary>
       [TestMethod()]
       public void DeCodeTest()
       {
           string txt = File.ReadAllText("sampletxt.txt");

           HaffManCode target = new HaffManCode(); // TODO: Initialize to an appropriate value
           string src = "eeeaabbbccccdddddd";

           for (int i = 0; i < 3; i++)
           {
               src = src + src;
           }

           byte[] actual;
           actual = target.Encode(src);

           string decodedStr = target.Decode(actual);
           Assert.IsTrue(decodedStr == src);

       }

       /// <summary>
       ///A test for DeCode
       ///</summary>
       [TestMethod()]
       public void DeCodeTest1()
       {
        
           HaffManCode target = new HaffManCode(); // TODO: Initialize to an appropriate value
           string src = File.ReadAllText("sampletxt.txt");

           byte[] srcBytes = File.ReadAllBytes("sampletxt.txt");

          // src = "abcdefgeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaaaaaaaaabbbcd";

           for (int i = 0; i < 0; i++)
           {
               src = src + src;
           }
         
           byte[] actual;
           actual = target.Encode(src);

           string decodedStr = target.Decode(actual);
           Assert.IsTrue(decodedStr == src);

           double rate =  actual.Length/(src.Length * 1.0);

       }

 

 

code:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
using System.Runtime.Serialization.Formatters.Binary;
using System.Runtime.Serialization;
using System.IO;

namespace Algorithm
{
    [Serializable]
    public struct OccurCount : IComparable
    {
        public char Value;
        public int Count;

        public int CompareTo(object obj)
        {
            OccurCount to = (OccurCount)obj;

            if (this.Count > to.Count)
            {
                return 1;
            }
            else if (this.Count < to.Count)
            {
                return -1;
            }
            else
            {
                return 0;
            }

        }
    }

    public class HaffManCode
    {
        public const int ShiftLength = 64;
        public int CodeTableLength
        {
            get;
            private set;
        }

        private Dictionary<char, string> codeTable;

        public byte[] Encode(string src)
        {
            List<OccurCountTreeNode> occurCounts = StatisticOccur(src);

            OccurCountTreeNode root = BuildWeightTree(occurCounts);
            codeTable = BuildCodeTable(root);

            StringBuilder output = new StringBuilder();

            for (int i = 0; i < src.Length; i++)
            {
                string code = codeTable[src[i]];
                output.Append(code);
            }

            string codeString = output.ToString();
            //
            byte[] ret = BuildBytes(codeString, root);

            return ret;
        }

        private byte[] BuildBytes(string chars, OccurCountTreeNode root)
        {
            //index range start:0 , length :ShiftLength
            //codetable buff  start:ShiftLength , length:CodeTableLength
            //codes buff start:ShiftLength + CodeTableLength , length:codesBytesLen
            byte[] codeTableBuff = SerializeHuffmanTree(root);
            CodeTableLength = codeTableBuff.Count();

            int codesBuffCount = (chars.Length + 7) / 8;
            byte[] buff = new byte[ShiftLength + codeTableBuff.Length + codesBuffCount];

            // put codetable bytes to buff
            Array.Copy(codeTableBuff, 0, buff, ShiftLength, codeTableBuff.Length);

            //put string codes to bytes,and update buff
            int remain = chars.Length;
            int cutIndex = 0;
            while (remain > 0)
            {
                int cutLen = remain >= 8 ? 8 : remain;
                byte bit = 0x00;
                int sliceIndex = 0;
                while (sliceIndex < cutLen)
                {
                    bit |= (byte)(((UInt16)chars[chars.Length - remain + sliceIndex] - 48) << (8 - sliceIndex - 1));

                    sliceIndex++;
                }

                buff[cutIndex + ShiftLength + codeTableBuff.Length] = bit;
                cutIndex++;
                remain -= cutLen;
            }
           

            int codesBytesLen = (chars.Length + 7) / 8;

            UpdateBuffIndex(buff, 0, codeTableBuff.Length);
            UpdateBuffIndex(buff, 4, chars.Length);
            UpdateBuffIndex(buff, 8, codesBytesLen);

            return buff;

        }

        private void UpdateBuffIndex(byte[] buff, int start, int value)
        {
            buff[start] = (byte)((value >> 24) & 0xff);
            buff[start + 1] = (byte)((value >> 16) & 0xff);
            buff[start + 2] = (byte)((value >> 8) & 0xff);
            buff[start + 3] = (byte)((value >> 0) & 0xff);
        }

        private byte[] SerializeHuffmanTree(OccurCountTreeNode root)
        {
            IFormatter formatter = new BinaryFormatter();
            using (MemoryStream stream = new MemoryStream())
            {
                formatter.Serialize(stream, root);
                byte[] codeTableBuff = stream.ToArray();               
                return codeTableBuff;
            }
        }

        private Dictionary<char, string> BuildCodeTable(OccurCountTreeNode root)
        {
            Dictionary<char, string> dic = new Dictionary<char, string>();
            OccurCountTreeNode temp = root;

            StringBuilder path = new StringBuilder();

            Queue<OccurCountTreeNode> que = new Queue<OccurCountTreeNode>();
            que.Enqueue(root);

            while (que.Count > 0)
            {
                OccurCountTreeNode node = que.Dequeue();

                if (node.LeftNode == null
                    && node.RightNode == null
                    )
                {
                    dic[node.Value.Value] = node.Path;
                }
                else
                {
                    if (node.LeftNode != null)
                    {
                        node.LeftNode.Path = node.Path + "0";
                        que.Enqueue(node.LeftNode);
                    }

                    if (node.RightNode != null)
                    {
                        node.RightNode.Path = node.Path + "1";
                        que.Enqueue(node.RightNode);
                    }
                }
            }

            return dic;
        }

        private OccurCountTreeNode BuildWeightTree(List<OccurCountTreeNode> occurCounts)
        {
            OccurCountTreeNode root = null;
            while (occurCounts.Count > 1)
            {
                OccurCountTreeNode left = occurCounts[0];
                OccurCountTreeNode right = occurCounts[1];
                OccurCountTreeNode parent = new OccurCountTreeNode();

                parent.LeftNode = left;
                parent.RightNode = right;
                parent.Value = new OccurCount();

                parent.Value.Count = left.Value.Count + right.Value.Count;

                root = parent;

                occurCounts.RemoveAt(0);
                occurCounts.RemoveAt(0);

                //insert sort
                //todo binsearch insert
                //fibonachi insert
                if (occurCounts.Count > 0)
                {
                    int minIndex = 0;
                    while (minIndex < occurCounts.Count
                        && parent.Value.Count > occurCounts[minIndex].Value.Count)
                    {
                        minIndex++;
                    }

                    occurCounts.Insert(minIndex, parent);
                }
            }

            return root;
        }

        private List<OccurCountTreeNode> StatisticOccur(string src)
        {
            List<OccurCountTreeNode> result = new List<OccurCountTreeNode>();

            Dictionary<char, int> dic = new Dictionary<char, int>();
            for (int i = 0; i < src.Length; i++)
            {
                if (dic.ContainsKey(src[i]))
                {
                    dic[src[i]]++;
                }
                else
                {
                    dic[src[i]] = 1;
                }
            }

            foreach (char c in dic.Keys)
            {
                OccurCountTreeNode node = new OccurCountTreeNode();

                OccurCount item = new OccurCount();
                item.Value = c;
                item.Count = dic[c];

                node.Value = item;
                result.Add(node);
            }

            result.Sort();
            return result;
        }

        public string Decode(byte[] buff)
        {
            string ret = string.Empty;

            //De-serialize codetable
            int codeTableBytesLength = (buff[0] << 24 | buff[1] << 16 | buff[2] << 8 | buff[3]);
            byte[] buffDe = new byte[codeTableBytesLength];
            Array.Copy(buff, ShiftLength, buffDe, 0, codeTableBytesLength);
            IFormatter formatterDe = new BinaryFormatter();
            OccurCountTreeNode huffmanTreeDe = null;
            using (MemoryStream msDe = new MemoryStream(buffDe))
            {
                 huffmanTreeDe = (OccurCountTreeNode)formatterDe.Deserialize(msDe);
            }

            int codeCharsLeng = (buff[4] << 24 | buff[5] << 16 | buff[6] << 8 | buff[7]);

            int codeBytesLen = (buff[8] << 24 | buff[9] << 16 | buff[10] << 8 | buff[11]);

            byte[] codesBuff = new byte[codeBytesLen];


            Array.Copy(buff, ShiftLength + codeTableBytesLength, codesBuff, 0, codeBytesLen);

            ret = DecodeString(codesBuff, huffmanTreeDe, codeCharsLeng);

            return ret;

        }

        private string DecodeString(byte[] codesBuff, OccurCountTreeNode huffmanTree, int codeCharsLeng)
        {
            List<UInt16> bins = GetBinFromBuff(codesBuff, codeCharsLeng);
            //string binsStr = string.Join("", bins.ToArray());
            StringBuilder sb = new StringBuilder();
            OccurCountTreeNode temp = huffmanTree;

            int i = 0;
            while (i < bins.Count
                && temp != null)
            {
                UInt16 val = bins[i];
                if (val == 0)
                {
                    temp = temp.LeftNode;
                }
                else if (val == 1)
                {
                    temp = temp.RightNode;
                }

                if (temp.RightNode == null
                    && temp.LeftNode == null
                    )
                {
                    sb.Append(temp.Value.Value);
                    temp = huffmanTree;
                }

                i++;
            }

            return sb.ToString();
        }

        private List<UInt16> GetBinFromBuff(byte[] codesBuff, int codeCharsLen)
        {
            List<UInt16> bins = new List<UInt16>();
            int i = 0;
            while (i < codesBuff.Length)
            {
                //UInt16 bit0 = (UInt16)((codesBuff[i] >> 7) & 0x01);
                //UInt16 bit1 = (UInt16)((codesBuff[i] >> 6) & 0x01);
                //UInt16 bit2 = (UInt16)((codesBuff[i] >> 5) & 0x01);
                //UInt16 bit3 = (UInt16)((codesBuff[i] >> 4) & 0x01);
                //UInt16 bit4 = (UInt16)((codesBuff[i] >> 3) & 0x01);
                //UInt16 bit5 = (UInt16)((codesBuff[i] >> 2) & 0x01);
                //UInt16 bit6 = (UInt16)((codesBuff[i] >> 1) & 0x01);
                //UInt16 bit7 = (UInt16)((codesBuff[i] >> 0) & 0x01);
                //bins.Add(bit0);
                //bins.Add(bit1);
                //bins.Add(bit2);
                //bins.Add(bit3);
                //bins.Add(bit4);
                //bins.Add(bit5);
                //bins.Add(bit6);
                //bins.Add(bit7);

                for (int j = 0; j < 8; j++)
                {
                    UInt16 bit0 = (UInt16)((codesBuff[i] >> 7 - j) & 0x01);
                    bins.Add(bit0);
                }

                i++;
            }

            int tailcount = codeCharsLen % 8 == 0 ? 0 : 8 - (codeCharsLen % 8);

            if (tailcount > 0)
            {
                bins.RemoveRange(codesBuff.Length * 8 - tailcount, tailcount);
            }

            return bins;

        }
    }


    [Serializable]
    public class OccurCountTreeNode : IComparable
    {
        public  OccurCountTreeNode LeftNode
        {
            get;
            set;
        }

        public  OccurCountTreeNode RightNode
        {
            get;
            set;
        }

        public string Path
        {
            get;
            set;
        }

        public OccurCount Value;

        public int CompareTo(object obj)
        {
            OccurCountTreeNode to = (OccurCountTreeNode)obj;

            if (this.Value.Count > to.Value.Count)
            {
                return 1;
            }
            else if (this.Value.Count < to.Value.Count)
            {
                return -1;
            }
            else
            {
                return 0;
            }

        }
    }
}

转载于:https://www.cnblogs.com/netfuns/archive/2011/07/21/2112560.html