Slim numerical data compression
1.0
|
00001 // -*- mode: c++; -*- 00002 00005 00006 // Copyright (C) 2008, 2009 Joseph Fowler 00007 // 00008 // This file is part of slim, a compression package for science data. 00009 // 00010 // Slim is free software: you can redistribute it and/or modify 00011 // it under the terms of the GNU General Public License as published by 00012 // the Free Software Foundation, either version 3 of the License, or 00013 // (at your option) any later version. 00014 // 00015 // Slim is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 // GNU General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU General Public License 00021 // along with slim. If not, see <http://www.gnu.org/licenses/>. 00022 00023 #ifndef SLIM_BITSTREAM_H 00024 #define SLIM_BITSTREAM_H 00025 00026 #include <stdint.h> 00027 #include <cstdio> 00028 00029 #include "bit_constants.h" 00030 00031 using namespace std; 00032 00033 class bitstream { 00034 public: 00035 bitstream(); 00036 bitstream(FILE *file, int buffersize=DEFAULT_IOBUFFER_SIZE); 00037 bitstream(const char *filename, 00038 int buffersize=DEFAULT_IOBUFFER_SIZE); 00039 bitstream(const bitstream &b); 00040 bitstream & operator=(const bitstream &b); 00041 virtual ~bitstream(); 00042 00043 virtual void close(); 00044 bool is_open() const; 00045 virtual void setupstream(); 00046 virtual void windup()=0; 00047 virtual int get_bytes_used(); 00048 int get_bitptr(); 00049 virtual void print() const = 0; 00050 00051 protected: 00052 static const int Bits_per_word = 8*sizeof(Word_t); 00053 00054 size_t bufsize; 00055 size_t buf_used; 00056 FILE *fp; 00057 Byte_t *buffer_base; 00058 Byte_t *beyondbuffer; 00059 union { 00060 Byte_t *Bptr; 00061 Word_t *Dptr; 00062 } buffptr; 00063 int bitptr; 00064 00065 00066 00067 public: 00068 enum {DEFAULT_IOBUFFER_SIZE=1024*1024}; 00069 enum {MAX_BITSTREAM_BUFSIZE=16*1024*1024}; 00070 }; 00071 00072 00073 00074 class obitstream : public bitstream { 00075 private: 00076 // No private attributes. 00077 00078 public: 00079 obitstream(FILE *file, int buffersize=DEFAULT_IOBUFFER_SIZE); 00080 obitstream(const char *filename, 00081 int buffersize=DEFAULT_IOBUFFER_SIZE); 00082 ~obitstream(); 00083 00084 void writebits(uint32_t data, int nbits); 00085 void writestring(const char *str, bool write_trailing_null=false); 00086 template <typename T> void writeword(const T data); 00087 void write_unary(unsigned int value); 00088 virtual void print() const; 00089 virtual void close(); 00090 void windup(); 00091 void flush(bool flush_trailing_bits); 00092 }; 00093 00094 00095 00096 00097 class ibitstream : public bitstream { 00098 private: 00099 // No private attributes (unless debugging). 00100 #ifdef DEBUG_READBITS 00101 int c1,c2, c3, c4, c5, c6; 00102 void print_debug(); 00103 #endif 00104 00105 public: 00106 ibitstream(FILE *file, int buffersize=DEFAULT_IOBUFFER_SIZE); 00107 ibitstream(const char *filename, 00108 int buffersize=DEFAULT_IOBUFFER_SIZE); 00109 ~ibitstream(); 00110 00111 void setupstream(); 00112 void windup(); 00113 virtual void print() const; 00114 virtual int get_bytes_used(); 00115 Word_t readbits(int nbits); 00116 int32_t readbits_int(int nbits); 00117 Word_t read_unary(); 00118 int readstring(char *s, int count=-1); 00119 //int get_bits_used() { return bitptr + Bits_per_word*buf_used;} 00120 00121 private: 00122 void next_word(); 00123 int fill(); 00124 00125 Word_t partial_word; 00126 int partial_word_bitptr; 00127 }; 00128 00129 00130 00132 // Inline functions 00134 00141 static inline unsigned int bit_size(int32_t i) { 00142 if (i < 0) 00143 i = (-i)-1; // Convert negative int to non-negatives of same size. 00144 00145 if (i>lowestNset32bits[15]) { 00146 if (i>lowestNset32bits[23]) { 00147 if (i>lowestNset32bits[27]) { 00148 if (i>lowestNset32bits[29]) { 00149 if (i>lowestNset32bits[30]) { 00150 return 32; 00151 } else { 00152 return 31; 00153 } 00154 } else { 00155 if (i>lowestNset32bits[28]) { 00156 return 30; 00157 } else { 00158 return 29; 00159 } 00160 } 00161 } else { 00162 if (i>lowestNset32bits[25]) { 00163 if (i>lowestNset32bits[26]) { 00164 return 28; 00165 } else { 00166 return 27; 00167 } 00168 } else { 00169 if (i>lowestNset32bits[24]) { 00170 return 26; 00171 } else { 00172 return 25; 00173 } 00174 } 00175 } 00176 } else { 00177 if (i>lowestNset32bits[19]) { 00178 if (i>lowestNset32bits[21]) { 00179 if (i>lowestNset32bits[22]) { 00180 return 24; 00181 } else { 00182 return 23; 00183 } 00184 } else { 00185 if (i>lowestNset32bits[20]) { 00186 return 22; 00187 } else { 00188 return 21; 00189 } 00190 } 00191 } else { 00192 if (i>lowestNset32bits[17]) { 00193 if (i>lowestNset32bits[18]) { 00194 return 20; 00195 } else { 00196 return 19; 00197 } 00198 } else { 00199 if (i>lowestNset32bits[16]) { 00200 return 18; 00201 } else { 00202 return 17; 00203 } 00204 } 00205 } 00206 } 00207 } else { 00208 if (i>lowestNset32bits[7]) { 00209 if (i>lowestNset32bits[11]) { 00210 if (i>lowestNset32bits[13]) { 00211 if (i>lowestNset32bits[14]) { 00212 return 16; 00213 } else { 00214 return 15; 00215 } 00216 } else { 00217 if (i>lowestNset32bits[12]) { 00218 return 14; 00219 } else { 00220 return 13; 00221 } 00222 } 00223 } else { 00224 if (i>lowestNset32bits[9]) { 00225 if (i>lowestNset32bits[10]) { 00226 return 12; 00227 } else { 00228 return 11; 00229 } 00230 } else { 00231 if (i>lowestNset32bits[8]) { 00232 return 10; 00233 } else { 00234 return 9; 00235 } 00236 } 00237 } 00238 } else { 00239 if (i>lowestNset32bits[3]) { 00240 if (i>lowestNset32bits[5]) { 00241 if (i>lowestNset32bits[6]) { 00242 return 8; 00243 } else { 00244 return 7; 00245 } 00246 } else { 00247 if (i>lowestNset32bits[4]) { 00248 return 6; 00249 } else { 00250 return 5; 00251 } 00252 } 00253 } else { 00254 if (i>lowestNset32bits[1]) { 00255 if (i>lowestNset32bits[2]) { 00256 return 4; 00257 } else { 00258 return 3; 00259 } 00260 } else { 00261 if (i>lowestNset32bits[0]) { 00262 return 2; 00263 } else { 00264 return 1; 00265 } 00266 } 00267 } 00268 } 00269 } 00270 } 00271 00272 00273 00278 static inline unsigned int bit_size(unsigned int u) { 00279 for (int bs=1; bs<=32; bs++) 00280 if (u == (u&lowestNset32bits[bs])) 00281 return bs; 00282 throw "Bit size (unsigned int) fails!"; 00283 } 00284 00285 00286 #endif // #ifdef SLIM_BITSTREAM_H