FPC源码分析 | 乌龙波霸七分甜

之前博客有记录BDI压缩算法以及FPC压缩算法的论文阅读记录，在github上面找到了对应的源码来源，基于对这两种压缩算法的理论理解，详细分析算法源码。

FPCCompress

//x最高位若为0，右移63位后的t为全0，x与t异或仍未x，减t也仍为x
//x最高位若为1，右移63位后的t为全1，x与t异或结果为x取反，然后（x取反-t）=-（t-x取反）=-x
//最高位为符号扩展位，负数绝对值等于它的相反数，正数绝对值等于本身
//long long为8个字节，64位
static unsigned long long my_llabs ( long long x )
{
   unsigned long long t = x >> 63;
   return (x ^ t) - t;
}
//int为4个字节，32位，绝对值算法同上
static unsigned my_abs ( int x )
{
   unsigned t = x >> 31;
   return (x ^ t) - t;
}

long long unsigned * convertBuffer2Array (char * buffer, unsigned size, unsigned step)
{
//size为缓冲区大小，除以step为values的大小，一个values包含step个buffer
     long long unsigned * values = (long long unsigned *) VG_(malloc)("cg.compress.ci.1", sizeof(long long unsigned) * size/step);
     unsigned int i,j; 
     for (i = 0; i < size / step; i++) {
          values[i] = 0;    // Initialize all elements to zero.
      }
//values[0]=(buffre[0]<<0)+(buffre[1]<<8)+(buffre[2]<<16)+(buffre[3]<<24)
//values[1]=(buffre[4]<<0)+(buffre[5]<<8)+(buffre[6]<<16)+(buffre[7]<<24)
      for (i = 0; i < size; i += step ){
          for (j = 0; j < step; j++){
              values[i / step] += (long long unsigned)((unsigned char)buffer[i + j]) << (8*j);
          }
      }
      return values;
}

unsigned FPCCompress(char * buffer, unsigned size ){
 long long unsigned * values = convertBuffer2Array(buffer, size*4, 4);
 unsigned compressable = 0;
 unsigned int i;
 for (i = 0; i < size; i++) {
 // 000
 if(values[i] == 0){
 compressable += 1;//SIM_printf("000\n ");
 continue;
 }
 // 001 010
 if(my_abs((int)(values[i])) <= 0xFF){
 compressable += 1;//SIM_printf("001\n ");
 continue;
 }
 // 011
 if(my_abs((int)(values[i])) <= 0xFFFF){
 compressable += 2;//SIM_printf("011\n ");
 continue;
 }
 //100
 if(((values[i]) & 0xFFFF) == 0 ){
 compressable += 2;//SIM_printf("100\n ");
 continue;
 }
 //101
 if( my_abs((int)((values[i]) & 0xFFFF)) <= 0xFF
 && my_abs((int)((values[i] >> 16) & 0xFFFF)) <= 0xFF){
 compressable += 2;//SIM_printf("101\n ");
 continue;
 }
 //110
 unsigned byte0 = (values[i]) & 0xFF;
 unsigned byte1 = (values[i] >> 8) & 0xFF;
 unsigned byte2 = (values[i] >> 16) & 0xFF;
 unsigned byte3 = (values[i] >> 24) & 0xFF;
 if(byte0 == byte1 && byte0 == byte2 && byte0 == byte3){
 compressable += 1; //SIM_printf("110\n ");
 continue;
 }
 //111
 compressable += 4;
 }
 VG_(free)(values);
 //6 bytes for 3 bit per every 4-byte word in a 64 byte cache line
 unsigned compSize = compressable + size * 3 / 8;
 if(compSize < size * 4)
 return compSize;
 else
 return size * 4;
}