之前博客有记录BDI压缩算法以及FPC压缩算法的论文阅读记录,在github上面找到了对应的源码来源,基于对这两种压缩算法的理论理解,详细分析算法源码。

FPCCompress

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
//x最高位若为0,右移63位后的t为全0,x与t异或仍未x,减t也仍为x
//x最高位若为1,右移63位后的t为全1,x与t异或结果为x取反,然后(x取反-t)=-(t-x取反)=-x
//最高位为符号扩展位,负数绝对值等于它的相反数,正数绝对值等于本身
//long long为8个字节,64位
static unsigned long long my_llabs ( long long x )
{
unsigned long long t = x >> 63;
return (x ^ t) - t;
}
//int为4个字节,32位,绝对值算法同上
static unsigned my_abs ( int x )
{
unsigned t = x >> 31;
return (x ^ t) - t;
}

long long unsigned * convertBuffer2Array (char * buffer, unsigned size, unsigned step)
{
//size为缓冲区大小,除以step为values的大小,一个values包含step个buffer
long long unsigned * values = (long long unsigned *) VG_(malloc)("cg.compress.ci.1", sizeof(long long unsigned) * size/step);
unsigned int i,j;
for (i = 0; i < size / step; i++) {
values[i] = 0; // Initialize all elements to zero.
}
//values[0]=(buffre[0]<<0)+(buffre[1]<<8)+(buffre[2]<<16)+(buffre[3]<<24)
//values[1]=(buffre[4]<<0)+(buffre[5]<<8)+(buffre[6]<<16)+(buffre[7]<<24)
for (i = 0; i < size; i += step ){
for (j = 0; j < step; j++){
values[i / step] += (long long unsigned)((unsigned char)buffer[i + j]) << (8*j);
}
}
return values;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
unsigned FPCCompress(char * buffer, unsigned size ){
long long unsigned * values = convertBuffer2Array(buffer, size*4, 4);
unsigned compressable = 0;
unsigned int i;
for (i = 0; i < size; i++) {
// 000
if(values[i] == 0){
compressable += 1;//SIM_printf("000\n ");
continue;
}
// 001 010
if(my_abs((int)(values[i])) <= 0xFF){
compressable += 1;//SIM_printf("001\n ");
continue;
}
// 011
if(my_abs((int)(values[i])) <= 0xFFFF){
compressable += 2;//SIM_printf("011\n ");
continue;
}
//100
if(((values[i]) & 0xFFFF) == 0 ){
compressable += 2;//SIM_printf("100\n ");
continue;
}
//101
if( my_abs((int)((values[i]) & 0xFFFF)) <= 0xFF
&& my_abs((int)((values[i] >> 16) & 0xFFFF)) <= 0xFF){
compressable += 2;//SIM_printf("101\n ");
continue;
}
//110
unsigned byte0 = (values[i]) & 0xFF;
unsigned byte1 = (values[i] >> 8) & 0xFF;
unsigned byte2 = (values[i] >> 16) & 0xFF;
unsigned byte3 = (values[i] >> 24) & 0xFF;
if(byte0 == byte1 && byte0 == byte2 && byte0 == byte3){
compressable += 1; //SIM_printf("110\n ");
continue;
}
//111
compressable += 4;
}
VG_(free)(values);
//6 bytes for 3 bit per every 4-byte word in a 64 byte cache line
unsigned compSize = compressable + size * 3 / 8;
if(compSize < size * 4)
return compSize;
else
return size * 4;
}