PForDelta索引壓縮算法的實現
前日一個朋友給我發來了一個索引壓縮算法,寫得非常漂亮而且簡潔,壓縮比和解壓性能方面大大超過目前已知的一些字節對齊的算法和Pfordelta這樣的非字節對齊的算法,讓人歎為觀止,這是我看到的最好的壓縮算法,他將會以論文的形式發表,相信必將震驚世界,我之前也寫了很多Pfordelta的博客,大家對這個算法的具體實現很好奇,有幾個難點,一個是bit pack和unpack,一個是關於exception的佔位符在做鏈接的時候如果間隔超過了2的b次方如何處理等等,在以前的博客中曾應用了一個日本開發者開發的算法,但目前已經不開源了,我昨天又讀了一遍pfordelta實現方面的論文【Super-Scalar RAM-CPU Cache compression】,今天一天寫了個大概,bit pack和unpack在1,2,4,8,16...這種2的冪是比較容易處理的,其餘的比較困難,我這裡只實現了pack3和unpack3,論文【Balancing Vectorized Query Execution with Bandwidth-optimized Storage】中給出了unpack12的參考代碼,理論上應該實現從pack3到pack16的全部方法,限於博客篇幅不全寫了,有興趣讀者朋友依葫蘆畫瓢可以繼續完成。
值得注意的是:(1)PFORDelta可以對0值進行壓縮,這是非常有利的。(2)pack和unpack的函數指針數組也比較有特色,PACKbitwidth;通過bitwidth值來取相應的pack和unpack函數,提高CPU流水線的通暢性。
算法本博客不做過多解釋,有興趣的朋友可以參見我此前的博客:
http://blog.csdn.net/pennyliang/archive/2010/09/25/5905691.aspx
另外,我這位朋友想讓我做一個最快的Pfordelta算法來PK一下,會輸多少,我知道肯定會輸,而且會很大,只是想盡可能做個最合格的對手,如果有讀者朋友有更好的實現,也請發給我參考,非常感謝。
#include<stdio.h>
#include<stdlib.h>
#include <stddef.h>
#include <stdint.h>
const int N = 64;
const int MAX = 64;
uint32_t MAXCODE[] = {0,
1,//1U<<0 - 1
3,//1u<<1 - 1
(1U << 3) - 1,
(1U << 4) - 1,
(1U << 5) - 1,
(1U << 6) - 1,
(1U << 7) - 1,
(1U << 8) - 1,
(1U << 9) - 1,
(1U << 10) - 1,
(1U << 11) - 1,
(1U << 12) - 1,
(1U << 13) - 1,
(1U << 14) - 1,
(1U << 15) - 1,
(1U << 16) - 1,
(1U << 17) - 1,
(1U << 18) - 1,
(1U << 19) - 1,
(1U << 20) - 1,
(1U << 21) - 1,
(1U << 22) - 1,
(1U << 23) - 1,
(1U << 24) - 1,
(1U << 25) - 1,
(1U << 26) - 1,
(1U << 27) - 1,
(1U << 28) - 1,
(1U << 29) - 1,
(1U << 30) - 1,
0x7FFFFFFF,
0xFFFFFFFF,
};
typedef void(*pattern_fun)(uint32_t* code, uint32_t* data, size_t n);
void PACK3(uint32_t* code, uint32_t* data, size_t n)
{
for (int i = 0, j = 0; j < n; i += 3, j += 32) {
code[i] = 0;
code[i + 1] = 0;
code[i + 2] = 0;
code[i] |= data[j + 0] << 0;
code[i] |= data[j + 1] << 3;
code[i] |= data[j + 2] << 6;
code[i] |= data[j + 3] << 9;
code[i] |= data[j + 4] << 12;
code[i] |= data[j + 5] << 15;
code[i] |= data[j + 6] << 18;
code[i] |= data[j + 7] << 21;
code[i] |= data[j + 8] << 24;
code[i] |= data[j + 9] << 27;
code[i] |= (data[j + 10] & 0x00000003) << 30;
code[i + 1] |= (data[j + 10] & 0x00000004) >> 2;
code[i + 1] |= data[j + 11] << 1;
code[i + 1] |= data[j + 12] << 4;
code[i + 1] |= data[j + 13] << 7;
code[i + 1] |= data[j + 14] << 10;
code[i + 1] |= data[j + 15] << 13;
code[i + 1] |= data[j + 16] << 16;
code[i + 1] |= data[j + 17] << 19;
code[i + 1] |= data[j + 18] << 22;
code[i + 1] |= data[j + 19] << 25;
code[i + 1] |= data[j + 20] << 28;
code[i + 1] |= (data[j + 21] & 0x00000001) << 31;
code[i + 2] |= (data[j + 21] & 0x00000006) >> 1;
code[i + 2] |= data[j + 22] << 2;
code[i + 2] |= data[j + 23] << 5;
code[i + 2] |= data[j + 24] << 8;
code[i + 2] |= data[j + 25] << 11;
code[i + 2] |= data[j + 26] << 14;
code[i + 2] |= data[j + 27] << 17;
code[i + 2] |= data[j + 28] << 20;
code[i + 2] |= data[j + 29] << 23;
code[i + 2] |= data[j + 30] << 26;
code[i + 2] |= data[j + 31] << 29;
}
}
void UNPACK3(uint32_t* data, uint32_t* code, size_t n)
{
for (int i = 0, j = 0; i < n; i += 32, j += 3) {
data[i + 0] = ((code[j + 0] & 0x00000007) >> 0); // 000......111
data[i + 1] = ((code[j + 0] & 0x00000038) >> 3); // 000...111000
data[i + 2] = ((code[j + 0] & 0x000001C0) >> 6);
data[i + 3] = ((code[j + 0] & 0x00000E00) >> 9);
data[i + 4] = ((code[j + 0] & 0x00007000) >> 12); //
data[i + 5] = ((code[j + 0] & 0x00038000) >> 15); //
data[i + 6] = ((code[j + 0] & 0x001C0000) >> 18);
data[i + 7] = ((code[j + 0] & 0x00E00000) >> 21);
data[i + 8] = ((code[j + 0] & 0x07000000) >> 24); //
data[i + 9] = ((code[j + 0] & 0x38000000) >> 27); //
data[i + 10] = ((code[j + 0] & 0xC0000000) >> 30) | (code[j + 1] & 0x00000001)
<< 2;
data[i + 11] = ((code[j + 1] & 0x0000000E) >> 1);
data[i + 12] = ((code[j + 1] & 0x00000070) >> 4);
data[i + 13] = ((code[j + 1] & 0x00000380) >> 7);
data[i + 14] = ((code[j + 1] & 0x00001C00) >> 10);
data[i + 15] = ((code[j + 1] & 0x0000E000) >> 13);
data[i + 16] = ((code[j + 1] & 0x00070000) >> 16);
data[i + 17] = ((code[j + 1] & 0x00380000) >> 19);
data[i + 18] = ((code[j + 1] & 0x01C00000) >> 22);
data[i + 19] = ((code[j + 1] & 0x0E000000) >> 25);
data[i + 20] = ((code[j + 1] & 0x70000000) >> 28);
data[i + 21] = ((code[j + 1] & 0x80000000) >> 31) | (code[j + 2] & 0x00000003)
<< 1;
data[i + 22] = ((code[j + 2] & 0x0000001C) >> 2);
data[i + 23] = ((code[j + 2] & 0x000000E0) >> 5);
data[i + 24] = ((code[j + 2] & 0x00000700) >> 8);
data[i + 25] = ((code[j + 2] & 0x00003800) >> 11);
data[i + 26] = ((code[j + 2] & 0x0001C000) >> 14);
data[i + 27] = ((code[j + 2] & 0x000E0000) >> 17);
data[i + 28] = ((code[j + 2] & 0x00700000) >> 20);
data[i + 29] = ((code[j + 2] & 0x03800000) >> 23);
data[i + 30] = ((code[j + 2] & 0x1C000000) >> 26);
data[i + 31] = ((code[j + 2] & 0xE0000000) >> 29);
}
}
pattern_fun PACK[] = {NULL, NULL, NULL, PACK3};
pattern_fun UNPACK[] = {NULL, NULL, NULL, UNPACK3};
int compress(size_t n, int bitwidth, uint32_t* input, uint32_t* code,
uint32_t* exception, uint32_t* f_e)
{
uint32_t miss[N], data[N], prev = 0;
int j = 0;
int last_e = -1;
for (int i = 0; i < MAX; i++) {
int val = input[i];
data[i] = val;
miss[j] = i;
if ((val > MAXCODE[bitwidth])) {
j++;
last_e = i;
} else if (last_e >= 0 && ((i - last_e)) > MAXCODE[bitwidth]) {
j++;
last_e = i;
} else {}
}
if (j > 0) {
*f_e = miss[0];
prev = miss[0];
exception[0] = input[prev];
for (int i = 1; i < j; i++) {
int cur = miss[i];
exception[i] = input[cur];
data[prev] = (cur - prev) - 1;
prev = cur;
}
}
PACK[bitwidth](code, data, MAX);
return j;
};
int decompress(size_t exception_cnt, int bitwidth, uint32_t* output,
uint32_t* input, uint32_t* exception, uint32_t* f_e)
{
uint32_t next, code[N], cur = *f_e;
UNPACK[bitwidth](code, input, MAX);
for (int i = 0; i < MAX; ++i) {
output[i] = code[i];
}
if (cur != MAX && exception_cnt > 0)
{
cur = *f_e;
next = output[cur];
output[cur] = exception[0];
cur += next + 1;
for (int i = 1, j = 1; cur < MAX && j < exception_cnt; i++, j++) {
next = cur + code[cur] + 1 ;
output[cur] = exception[i];
cur = next;
}
}
return MAX;
}
int main(void)
{
uint32_t input[] = {10, 13, 14, 16, 20, 22, 25, 30, 37, 40, 44, 47, 48, 50, 54, 56, 58, 63, 70, 73, 74, 77, 78, 80, 84, 86, 89, 94, 101, 104, 106, 109, 110, 112, 115, 117, 120, 121, 123, 133, 141, 151, 152, 157, 158, 166, 168, 178, 186, 195, 196, 202, 203, 209, 299, 301, 304, 329, 336, 339, 352, 354, 357, 359};
uint32_t* code = (uint32_t*)malloc(1024 * sizeof(uint32_t));
uint32_t* output = (uint32_t*)malloc(1024 * sizeof(uint32_t));
uint32_t* exception = code + 6;
uint32_t first_exception = MAX;
uint32_t compressed_temp_arr[MAX] = {0};
compressed_temp_arr[0] = input[0];
for (int i = 1; i < MAX; ++i) {
compressed_temp_arr[i] = input[i] - input[i - 1] - 1;
}
int j = compress(MAX, 3, compressed_temp_arr, code, exception,
&first_exception);
decompress(j, 3, output, code, exception, &first_exception);
for (int i = 1; i < MAX; ++i) {
output[i] = output[i] + output[i - 1] + 1;
}
for (int i = 0; i < MAX; ++i) {
printf("%d,", output[i]);
}
printf("/n");
for (int i = 0; i < MAX; ++i) {
printf("%d,", input[i]);
}
printf("/n");
int ori_size = sizeof(input) / sizeof(uint32_t) * 32;
int com_size = j + MAX * 3;
float ratio = ori_size / com_size;
printf("original size:%d bit,compressed size:%d bit,compressed ratio:%f/n,bit per docid:%f",
ori_size, com_size, ratio, com_size * 1.0f / 64);
return 0;
}