forked from fast-pack/FastPFOR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinmemorybenchmark.cpp
155 lines (144 loc) · 5.34 KB
/
inmemorybenchmark.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
#include <getopt.h>
#include "common.h"
#include "codecfactory.h"
#include "maropuparser.h"
#include "util.h"
#include "ztimer.h"
#include "cpubenchmark.h"
#include "deltautil.h"
#include "stringutil.h"
using namespace std;
using namespace FastPForLib;
static struct option long_options[] = {
{ "codecs", required_argument, 0, 'c' },
{ "minlength", required_argument, 0, 'm' },
{ "maxlength", required_argument, 0, 'M' },
{ "nb", required_argument, 0, 'n' },
{ "splitlongarrays", no_argument, 0, 'S' },
{ "separatedeltatimes", no_argument, 0, 'D' },
{ 0, 0, 0, 0 } };
void message(const char * prog) {
cerr << " usage : " << prog << " scheme maropubinaryfile " << endl;
cerr << "By default, it assumes that the original data is made of "
"sorted distinct integers." << endl;
cerr << "Use the --codecs flag to specify the schemes." << endl;
cerr << " schemes include:" << endl;
vector < string > all = CODECFactory::allNames();
for (auto i = all.begin(); i != all.end(); ++i) {
cerr << *i << endl;
}
}
int main(int argc, char **argv) {
if (argc < 2) {
message(argv[0]);
return -1;
}
bool splitlongarrays = true;
bool separatedeltatimes = false;
size_t MINLENGTH = 1;
size_t MAXLENGTH = std::numeric_limits<uint32_t>::max();
size_t MAXCOUNTER = std::numeric_limits<std::size_t>::max();
vector < shared_ptr<IntegerCODEC> > tmp = CODECFactory::allSchemes();// the default
vector<algostats> myalgos;
for (auto i = tmp.begin(); i != tmp.end(); ++i) {
myalgos.push_back(algostats(*i));
myalgos.push_back(algostats(*i, true));// by default?
}
int c;
while (1) {
int option_index = 0;
c = getopt_long(argc, argv, "DSc:n:m:M:", long_options, &option_index);
if (c == -1)
break;
switch (c) {
case 'D' :
cout<<"# Outputting separate delta and inverseDelta times."<<endl;
separatedeltatimes = true;
break;
case 'S' :
cout<<"#\n# disabling partition of big arrays. Performance may suffer.\n#"<<endl;
splitlongarrays = false;
break;
case 'm' :
istringstream ( optarg ) >> MINLENGTH;
cout<<"# MINLENGTH = "<<MINLENGTH<<endl;
break;
case 'M' :
istringstream ( optarg ) >> MAXLENGTH;
cout<<"# MAXLENGTH = "<<MAXLENGTH<<endl;
break;
case 'n' :
istringstream ( optarg ) >> MAXCOUNTER;
cout<<"# MAXCOUNTER = "<< MAXCOUNTER << endl;
break;
case 'c':
{ myalgos.clear();
string codecsstr(optarg);
if (codecsstr.compare("NONE") != 0) {
vector < string > codecslst = split(codecsstr, ",:;");
for (auto i = codecslst.begin(); i != codecslst.end(); ++i) {
cout << "# pretty name = " << *i << endl;
if(i->at(0) == '@') {// SIMD
string namewithoutprefix = i->substr(1,i->size()-1);
myalgos.push_back(
algostats(CODECFactory::getFromName(namewithoutprefix),true));
} else {
myalgos.push_back(
algostats(CODECFactory::getFromName(*i)));
}
cout << "# added '" << myalgos.back().name() << "'" << endl;
}
}
}
break;
default:
cerr << "unrecognized flag" << endl;
break;
}
}
if (argc - optind < 1) {
cerr << " you must specify a file " << endl;
return -1;
}
string filename = argv[optind];
cout << "# parsing " << filename << endl;
MaropuGapReader reader(filename);
vector<uint32_t, cacheallocator> rawdata;
reader.open();
size_t counter = 0;
const size_t MAXBLOCKSIZE = 104857600;// 400 MB
while (counter < MAXCOUNTER) {
// collecting the data up to MAXBLOCKSIZE integers
vector < vector<uint32_t, cacheallocator> > datas;
size_t datastotalsize = 0;
cout<<"# Filling up a block "<<endl;
while (reader.loadIntegers(rawdata)) {
if ((rawdata.size() < MINLENGTH) || (rawdata.size() > MAXLENGTH))
continue;
++counter;
datastotalsize += rawdata.size();
datas.push_back(rawdata);
if (counter >= MAXCOUNTER) {
cout << "#breaking early" << endl;
break;
}
if (datastotalsize >= MAXBLOCKSIZE)
break;
}
if(datastotalsize == 0) break;
cout<<"# read "<< std::setprecision(3) << static_cast<double>(datastotalsize) * 4.0 / (1024.0 * 1024.0) << " MB, " << datas.size() << " arrays."<<endl;
cout<<"# processing block"<<endl;
if(splitlongarrays) splitLongArrays(datas);
processparameters pp(true, false, false, false, true, separatedeltatimes);
Delta::process(myalgos, datas, pp); // done collecting data, now allocating memory
}
reader.close();
cout<<"# build summary..."<<endl;
summarize(myalgos);
}