forked from fast-pack/FastPFOR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpartitionbylength.cpp
76 lines (71 loc) · 2.35 KB
/
partitionbylength.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
/**
* The ideas is to partition a database of arrays
* according to their length. We partition by the
* integer logarithm of the length so that arrays
* having length from 2^L to 2^L-1 will be stored
* together.
*/
#include <sstream>
#include <vector>
#include "maropuparser.h"
#include "util.h"
using namespace std;
using namespace FastPForLib;
int main(int argc, char **argv) {
if (argc < 2) {
cerr << "please provide an input file name" << endl;
return -1;
}
int argindex = 1;
string filename = argv[argindex++];
cout << "# parsing " << filename << endl;
MaropuGapReader reader(filename);
vector < uint32_t > rawdata;
reader.open();
map<uint32_t,FILE *> output;
map<uint32_t,size_t> counter;
map<uint32_t,string> name;
while (reader.loadIntegers(rawdata)) {
uint32_t lengthinbits = gccbits(static_cast<uint32_t>(rawdata.size()));
if(output.find(lengthinbits)==output.end()) {
ostringstream o;
o<<filename<<"."<<lengthinbits;
cout<<"creating output file "<<o.str()<<endl;
FILE * fd = ::fopen(o.str().c_str(), "w+b");
if (fd == NULL) {
cerr << strerror(errno) << endl;
cerr << "can't open " << o.str().c_str() << endl;
break;
}
setvbuf (fd , NULL , _IOFBF , 1024*4 ); // large buffer
output[lengthinbits] = fd;
counter[lengthinbits] = 0;
name[lengthinbits] = o.str();
}
uint32_t thislength = static_cast<uint32_t>(rawdata.size());
if(fwrite(&thislength, sizeof(thislength), 1, output[lengthinbits])!=1) {
cerr << "problem writing" << endl;
break;
}
if(fwrite(&rawdata[0], sizeof(uint32_t),thislength, output[lengthinbits])!=thislength) {
cerr << "problem writing" << endl;
break;
}
counter[lengthinbits] ++;
}
auto i = output.begin();
auto j = counter.begin();
auto k = name.begin();
for(; i!= output.end(); ++i, ++j, ++k) {
cout<<"file "<<k->second<<" contains "<<j->second<<" arrays"<<endl;
::fclose(i->second);
}
reader.close();
return 0;
}