基于哈夫曼编码完成的文件压缩及解压
生活随笔
收集整理的這篇文章主要介紹了
基于哈夫曼编码完成的文件压缩及解压
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
這幾天在較為認真的研究基于哈夫曼編碼的文件壓縮及解壓,費了點時間,在這分享一下:
這里用鏈式結構,非順序表結構;
文件壓縮:
1.獲取文件信息(這里采用TXT格式文本);
2.壓縮文件;
3.寫配置文件(便于解壓時用,無非就是存放原文件的索引之類的,比如說,文件中某個字符出現的個數,記錄下來)
4.解壓縮,使用壓縮后的文件和配置文件解壓文件;
5.用比對軟件,比對解壓后的文件和源文件是否相同;
下面慢慢解析:
先看一個文件信息類:
typedef long long LongType; struct FileInfo {unsigned char _ch; //字符LongType _count; //字符出現次數string _code; //字符對應的哈夫曼編碼 FileInfo(unsigned char ch = 0):_ch(ch),_count(0){}FileInfo operator+(const FileInfo& x){FileInfo tmp;tmp._count = this->_count + x._count;return tmp;}bool operator !=(const FileInfo& x) const{return this->_count != x._count;} };bool operator<(const FileInfo info1,const FileInfo info2) {return info1._count < info2._count; }此為一個文件信息的類結構,包含字符,字符對應出現的次數,以及這個字符對應的哈夫曼編碼(能看到這篇博客的星弟,對哈夫曼編碼不會陌生,這里不再強調)除了統(tǒng)計字符出現的次數及哈夫曼編碼,還完成了幾個運算符的重載
要獲取哈夫曼編碼,就得建立哈夫曼樹,建立哈夫曼樹用最小堆取操作,以下是最小堆建立過程
// 小堆 template<class T> struct Less {bool operator() (const T& l, const T& r){return l < r; // operator<}};template<class T> struct Greater {bool operator() (const T& l, const T& r){return l > r; // operator<} };template<class T, class Compare = Less<T>> class Heap { public:Heap(){}Heap(const T* a, size_t size){for (size_t i = 0; i < size; ++i){_arrays.push_back(a[i]);}// 建堆for(int i = (_arrays.size()-2)/2; i >= 0; --i){AdjustDown(i);}}void Push(const T& x){_arrays.push_back(x);AdjustUp(_arrays.size()-1);}void Pop(){assert(_arrays.size() > 0);swap(_arrays[0], _arrays[_arrays.size() - 1]);_arrays.pop_back();AdjustDown(0);}T& Top(){assert(_arrays.size() > 0);return _arrays[0];}bool Empty(){return _arrays.empty();}int Size(){return _arrays.size();}void AdjustDown(int root){int child = root*2 + 1;// Compare com;while (child < _arrays.size()){// 比較出左右孩子中小的那個if (child+1<_arrays.size() &&*_arrays[child+1] < _arrays[child])//if(child+1<_arrays.size() &&// com(_arrays[child+1],_arrays[child])){++child;}if(*_arrays[child] < _arrays[root])//if(com(_arrays[child],_arrays[root])){swap(_arrays[child], _arrays[root]);root = child;child = 2*root+1;}else{break;}}}void AdjustUp(int child){int parent = (child-1)/2;//while (parent >= 0)while (child > 0){if (*_arrays[child] < _arrays[parent]){swap(_arrays[parent], _arrays[child]);child = parent;parent = (child-1)/2;}else{break;}}}public:vector<T> _arrays; };最小堆里也完成了很多接口,包括push ?pop等然后就是幾個壓縮和解壓的函數接口
1.根據哈夫曼樹獲取哈夫曼變慢:
void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root){if (root == nullptr){return;}_GenerateHuffmanCode(root->_left);_GenerateHuffmanCode(root->_right);//當前節(jié)點為葉子節(jié)點為空 才生成哈夫曼編碼if (root->_left == nullptr && root->_right == nullptr){HuffmanTreeNode<FileInfo>* cur = root;HuffmanTreeNode<FileInfo>* parent = cur->_parent;string& code = _infos[cur->_weight._ch]._code;while (parent){if (parent->_left == cur){code += '1';}else if (parent->_right == cur){code += '0';}cur = parent;parent = cur->_parent;}reverse(code.begin(), code.end());}}4.文件壓縮 //文件壓縮bool Compress(const char* filename){//1.打開一個文件,統(tǒng)計文件字符出現的次數//2.生成對應的哈弗曼編碼//3.壓縮文件//4.寫配置文件,方便解壓縮assert(filename);FILE *fOut = fopen(filename, "rb");assert(fOut);//統(tǒng)計文件字符出現的次數unsigned char ch = fgetc(fOut);while (!feof(fOut)) //文件結束{_infos[ch]._count++;ch = fgetc(fOut);}HuffmanTree<FileInfo> ht;FileInfo invalid;ht.CreateTree(_infos, 256, invalid);//哈夫曼編碼_GenerateHuffmanCode(ht.GetRoot());string compressFile = filename;compressFile += ".huf";//壓縮后的文件名 后綴為《輸入文件名+.huf》FILE *finCompress = fopen(compressFile.c_str(), "wb"); //獲取string中的C字符串assert(finCompress);fseek(fOut, 0, SEEK_SET);//將文件指針移到開頭char cha = fgetc(fOut);unsigned char inch = 0;int index = 0; //一個字節(jié)的八位while (!feof(fOut)){string& code = _infos[(unsigned char)cha]._code;for (size_t i = 0; i < code.size(); ++i){inch <<= 1; //低位向高位進if (code[i] == '1'){inch |= 1;}if (++index == 8){fputc(inch, finCompress); //夠8位,裝進文件index = 0; //重新一輪開始inch = 0;}}cha = fgetc(fOut);}fclose(fOut);//如果index = 0 說明 上邊8位剛好存滿 不等 下一個自己又出來了if (index != 0) //處理最后一個字符不夠的問題{inch <<= (8 - index); //最高位必須裝上 后邊的浪費掉fputc(inch, finCompress);}fclose(finCompress);}
5.寫配置文件: string logFile = filename;logFile += ".log";FILE *Log = fopen(logFile.c_str(), "wb");assert(Log);string chInfo;char str[128] = {0}; //沒空間 不可以for (size_t i = 1; i < 256; ++i){if (_infos[i]._count > 0){chInfo += _infos[i]._ch;chInfo += ',';chInfo += _itoa(_infos[i]._count,str,10);chInfo += '\n';fputs(chInfo.c_str(), Log);chInfo.clear();}}fclose(Log);
6.最后的文件解壓: //重構文件void _RestoreFiles(HuffmanTreeNode<FileInfo> *root, const char* Fileneme,long long size){assert(root);//原壓縮文件string name = Fileneme;name += ".huf";FILE* Out = fopen(name.c_str(),"rb");assert(Out);string restorefilename = Fileneme;restorefilename += ".over";FILE *over = fopen(restorefilename.c_str(),"wb");assert(over);int pos = 8;long long poss = size;unsigned char chz = fgetc(Out);while (poss>0){HuffmanTreeNode<FileInfo>* cur = nullptr;cur = root;while (cur->_left != nullptr || cur->_right != nullptr){pos--;unsigned char temp = chz >> pos;int ch = 1 & temp;if (ch == 0){cur = cur->_right;}else if (ch == 1){cur = cur->_left;}if (pos == 0){chz = fgetc(Out);pos = 8;}}fputc(cur->_weight._ch, over);poss--;}fclose(Out);fclose(over);}void UnCompress(const char* Fileneme)//解壓縮{//1.打開日志文件//2.根據信息還原哈夫曼樹//3.還原信息;string UnCompressneme = Fileneme;UnCompressneme += ".log";FILE *fOutLogFile = fopen(UnCompressneme.c_str(), "rb");assert(fOutLogFile);string line;while (_ReadLine(fOutLogFile, line)){unsigned char ch = line[0];_infos[ch]._count = atoi(line.substr(2).c_str());line.clear();} HuffmanTree<FileInfo> f;FileInfo invalid;f.CreateTree(_infos, 256, invalid);//根據重建的哈夫曼樹 還原文件;long long size = f.GetRoot()->_weight._count;_RestoreFiles(f.GetRoot(), Fileneme,size);}到此,此項目基本完成;如遇問題,希望留言,隨時解答,如有見解,跪求賜教!
轉載于:https://www.cnblogs.com/li-ning/p/9490022.html
總結
以上是生活随笔為你收集整理的基于哈夫曼编码完成的文件压缩及解压的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: BZOJ 2150: 部落战争 最大流
- 下一篇: GIt 从入门到放弃