Huffman符号化復号アルゴリズムのC++実装

16106 ワード

ヘッダファイル:
 
/*****************************************************************************
 *                               huffmancode.h
 *
 * Huffman coding algorighm implemented by C++ template.
 *
 * This class is designed for Huffman codeing and decoding algorighm. The
 * priority queue is used in building Huffman tree. For comparing pointers
 * of Huffman Tree Node by their pointing contents, I use the smart pointer
 * replacing nomal pointer. And for saving memory, the coded word is stored
 * bit-by-bit in an unsigned char array.
 *
 * Zhang Ming, 20010-07, Xi'an Jiaotong University.
 *****************************************************************************/


#ifndef HUFFMANCODE_H
#define HUFFMANCODE_H


#include <iostream>
#include <binaryheap.h>


using namespace std;


namespace itlab
{

    /**
     * Object node to be coded
     */
    template <typename Object, typename Weight>
    struct CodeObject
    {
        Object data;
        Weight cost;
    };
    // struct CodeObject


    /**
     * Huffman codeword node
     */
    const int CODESIZE = 2;
    template <typename Object>
    struct HCNode
    {
        Object data;
        unsigned char bits[CODESIZE];
        unsigned int length;
    };
    // struct HCNode


    /**
     * Huffman tree node
     */
    template <typename Object, typename Weight>
    struct HTNode
    {
        Object data;
        Weight cost;
        HTNode<Object,Weight> *left,
                              *right;

        HTNode() : data(Object()), cost(Weight(0)), left(NULL), right(NULL)
        { }
        HTNode( const Object &x, const Weight w,
                HTNode<Object,Weight> *lp=NULL, HTNode<Object,Weight> *rp=NULL )
        {   data = x; cost = w; left = lp; right = rp;    }

        operator Weight() const
        {   return cost;    }

    };
    // struct HTNode


    /**
     * Smart pointer for Huffman tree bode
     */
    template <typename Object, typename Weight>
    class HTNSmartPtr
    {

    public:

        HTNSmartPtr( HTNode<Object,Weight> *p = NULL )
        {   ptr = p;    }

        HTNode<Object,Weight> operator*()
        {   return *ptr;    }
        HTNode<Object,Weight>* operator->()
        {   return ptr;    }

        operator HTNode<Object,Weight> *()
        {   return ptr;    }
        bool operator<( HTNSmartPtr<Object,Weight> p )
        {   return ptr->cost < p->cost;    }
        bool operator>( HTNSmartPtr<Object,Weight> p )
        {   return ptr->cost > p->cost;    }

    private:

        HTNode<Object,Weight> *ptr;

    };
    // class HTNSmartPtr


    /**
     * Huffman tree
     */
    template <typename Object, typename Weight>
    class HuffmanTree
    {

    public:

        HuffmanTree();
        ~HuffmanTree();

//        HuffmanTree( const HuffmanTree &rhs );
//        HuffmanTree & operator=( const HuffmanTree &rhs );

        void code( CodeObject<Object,Weight> *codeArray, int length );
        bool decode( unsigned char bits[CODESIZE], unsigned int length, Object &decodeword );
        void printCode( unsigned char bits[CODESIZE], unsigned int length );
        void printCodeTable();

    private:

        HTNSmartPtr<Object,Weight> root;
        int arraySize;
        HCNode<Object> *codeTable;

        void createHuffmanTree( CodeObject<Object,Weight> *codeArray );
        void createCodeTable();
        void createCodeTableRecursive( HTNSmartPtr<Object,Weight> ht,
                                       unsigned char *code, int pos, int &index );

        void setBit( unsigned char *bits, unsigned int pos, unsigned int state );
        unsigned int getBit( unsigned char *codeword, unsigned int pos );

        void destroy( HTNode<Object,Weight> *&r );

    };
    // class HuffmanTree


    #include "huffmancode-impl.h"

}
// namespace itlab


#endif
// HUFFMANCODE_H

 
実装ファイル:
 
/*****************************************************************************
 *                             huffmancode-impl.h
 *
 * Implementation for Huffman code.
 *
 * Zhang Ming, 2010-07
 *****************************************************************************/


/**
 * constructors and destructor
 */
template <typename Object, typename Weight>
HuffmanTree<Object,Weight>::HuffmanTree() : root(NULL)
{
    arraySize = 0;
    codeTable = NULL;
}

template <typename Object, typename Weight>
HuffmanTree<Object,Weight>::~HuffmanTree()
{
    HTNode<Object,Weight> *r = root;
    destroy(r);
    root = NULL;
    delete []codeTable;
}


/**
 * Huffman codeing.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::code( CodeObject<Object,Weight> *codeArray, int length )
{
    arraySize = length;

    if( codeTable != NULL )
        delete []codeTable;
    codeTable = new HCNode<Object>[arraySize];

    createHuffmanTree( codeArray );
    createCodeTable();
}


/**
 * If decodeing successful return ture, else return false;
 */
template <typename Object, typename Weight>
bool HuffmanTree<Object,Weight>::decode( unsigned char codeword[CODESIZE], unsigned int length,
                                         Object &decodeword )
{
    unsigned int pos = 0;
    HTNSmartPtr<Object,Weight> p = root;
    for( pos=0; pos<length; ++pos )
        if( p != NULL )
        {
            if( getBit( codeword, pos ) == 0 )
                p = p->left;
            else
                p = p->right;
        }
        else
            break;

    if( pos == length && p != NULL && p->left == NULL && p->right == NULL  )
    {
        decodeword = p->data;
        return true;
    }

    else
        return false;
}


/**
 * Print the codedword.
 */
template <typename Object, typename Weight>
inline void HuffmanTree<Object,Weight>::printCode( unsigned char codewrod[CODESIZE], unsigned int length )
{
    for( unsigned int j=0; j<length; ++j )
        cout << getBit( codewrod, j );
}


/**
 * Print the coded table.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::printCodeTable()
{
	cout << "Object\tCode\tSize" << endl;
	for( int i=0; i<arraySize; ++i )
    {
        cout << codeTable[i].data  << "\t";
        printCode( codeTable[i].bits, codeTable[i].length );
        cout << "\t" << codeTable[i].length << endl;
    }
	cout << endl;
}


/**
 * Create the Huffman tree.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::createHuffmanTree( CodeObject<Object,Weight> *codeArray )
{
    BinaryHeap<HTNSmartPtr<Object,Weight> > heap( arraySize );
    HTNSmartPtr<Object,Weight>  tree = NULL,
                                subtreeL = NULL,
                                subtreeR = NULL;

    for( int i=0; i<arraySize; ++i )
    {
        tree = new HTNode<Object,Weight>( codeArray[i].data, codeArray[i].cost, NULL, NULL );
        heap.insert( tree );
    }

    while( heap.size() > 1 )
    {
        heap.deleteMin( subtreeL );
        heap.deleteMin( subtreeR );
        tree = new HTNode<Object,Weight>( Object(), subtreeL->cost+subtreeR->cost, subtreeL, subtreeR );
        heap.insert( tree );
    }
    heap.deleteMin( root );
}


/**
 * Create the coded character table.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::createCodeTable()
{
    for( int i=0; i<arraySize; ++i )
    {
        codeTable[i].data = Object();
        codeTable[i].length = 0;
    }

    unsigned char code[CODESIZE];
    int index = 0;
    createCodeTableRecursive( root, code, 0, index );
}


/**
 * Create the coded character table.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::createCodeTableRecursive( HTNSmartPtr<Object,Weight> ht,
                                                           unsigned char *code, int pos, int &index )
{
    if( ht->left )
    {
        setBit( code, pos, 0 );
        createCodeTableRecursive( ht->left, code, pos+1, index );
    }

    if( ht->right )
    {
        setBit( code, pos, 1 );
        createCodeTableRecursive( ht->right, code, pos+1, index );
    }

    if( ht->left==NULL && ht->right==NULL )
    {
        codeTable[index].data = ht->data;
        for( int i=0; i<CODESIZE; ++i )
            codeTable[index].bits[i] = code[i];
        codeTable[index].length = pos;
        index++;
    }
}


/**
 * Set the bit at position pos in the array bits to the value state.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::setBit( unsigned char *bits, unsigned int pos, unsigned int state )
{
	unsigned char mask = 0x80;
	for( unsigned int i=0; i<(pos % 8); ++i )
		mask = mask >> 1;

	if( state == 1 )
		bits[pos/8] = bits[pos/8] | mask;
	else if( state == 0 )
		bits[pos/8] = bits[pos/8] & (~mask);
    else
        cerr << endl << "The bit to be set should be '1' or '0'!" << endl;

	return;
}


/**
 * Get the state of the bit at position pos in the array bits.
 */
template <typename Object, typename Weight>
unsigned int HuffmanTree<Object,Weight>::getBit( unsigned char *bits, unsigned int pos )
{
	unsigned char mask = 0x80;
	for( unsigned int i=0; i<(pos%8); ++i )
		mask = mask >> 1;

	return ( ((mask & bits[(int)(pos/8)]) == mask) ? 1 : 0 );
}


/**
 * Destroy the tree.
 */
template <typename Object, typename Weight>
void HuffmanTree<Object,Weight>::destroy( HTNode<Object,Weight> *&r )
{
    if( r != NULL )
    {
        destroy( r->left );
        destroy( r->right );
        delete r;
    }
    r = NULL;
}

 
テストファイル:
 
/*****************************************************************************
 *                              huffmancode_test.cpp
 *
 * Huffman Code class testing.
 *
 * Zhang Ming, 2010-07
 *****************************************************************************/


#include <iostream>
#include <string>
#include "huffmancode.h"


using namespace std;
using namespace itlab;


const int CHARNUM = 128;
template <typename Object, typename Weight>
void creatCodeInfo( string &str, CodeObject<Object,Weight> *&codeArr, int &length );
template <typename Object, typename Weight>
void printCodeInfo( CodeObject<Object,Weight> *&codeArr, int length );


int main()
{
    int length = 0;
    CodeObject<char, int> *codeArr = NULL;
    string str = "AAAABBCD";
    creatCodeInfo( str, codeArr, length );
    printCodeInfo( codeArr, length );

    HuffmanTree<char, int> ht;
    ht.code( codeArr, length );
    ht.printCodeTable();

    char decodedResult;
    unsigned char codeword[CODESIZE];
    cout << "Coding\t" << "Decodedwrod" << endl;
    codeword[0] = 0xC0;
    if( ht.decode( codeword, 3, decodedResult ) )
    {
        ht.printCode( codeword, 3 );
        cout << "\t" << decodedResult << endl;
    }
    else
        cerr << "invalid codeword! " << endl;
    codeword[0] = 0xC0;
    if( ht.decode( codeword, 4, decodedResult ) )
    {
        ht.printCode( codeword, 3 );
        cout << "\t" << decodedResult << endl;
    }
    else
    {
        ht.printCode( codeword, 4 );
        cerr << "\t" << "invalid codeword! " << endl;
    }
    codeword[0] = 0x40;
    if( ht.decode( codeword, 3, decodedResult ) )
    {
        ht.printCode( codeword, 3 );
        cout << "\t" << decodedResult << endl;
    }
    else
    {
        ht.printCode( codeword, 3 );
        cerr << "\t" << "invalid codeword! " << endl;
    }

    cout << endl;
    return 0;
}


/**
 * Generate the array for counting character frequency.
 */
template <typename Object, typename Weight>
void creatCodeInfo( string &str, CodeObject<Object,Weight> *&codeArr, int &length )
{
    char charFreq[CHARNUM];
    int index[CHARNUM];
    for( int i=0; i<CHARNUM; ++i )
    {
        charFreq[i] = 0;
        index[i] = -1;
    }

    length = 0;
    for( unsigned int i=0; i<str.size(); ++i )
        charFreq[int(str[i])] += 1;

    for( int i=0; i<CHARNUM; ++i )
        if( charFreq[i] )
            index[length++] = i;


    codeArr = new CodeObject<Object,Weight>[length];
    for( int i=0; i<length; ++i )
    {
        codeArr[i].data = index[i];
        codeArr[i].cost = charFreq[index[i]];
    }
}


/**
 * Print the characters and there frequency.
 */
template <typename Object, typename Weight>
void printCodeInfo( CodeObject<Object,Weight> *&codeArr, int length )
{
    cout << "Object\tCode" << endl;
    for( int i=0; i<length; ++i )
        cout << codeArr[i].data << "\t" << codeArr[i].cost << endl;
    cout << endl;
}