-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmatmul-bench.h
137 lines (103 loc) · 4.88 KB
/
matmul-bench.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#ifndef MATMUL_BENCH_H
#define MATMUL_BENCH_H
#include <stdio.h>
#ifdef _WIN32
#include <windows.h>
#ifdef MATMUL_BENCH_BUILD_LIB
#define MATMUL_BENCH_EXPORT __declspec(dllexport)
#else
#define MATMUL_BENCH_EXPORT __declspec(dllimport)
#endif
#else
#define MATMUL_BENCH_EXPORT __attribute__((visibility("default")))
#endif
struct MatmulBenchTest;
struct MatmulBenchConfig;
struct MatmulBench;
struct MatmulBenchConfig {
int iter; /* テスト回数 */
int *enable; /* test_set と対応 0:やらない 0以外:やる */
unsigned int mat_size; /* 計測サイズ。0で自動(下みっつのパラメータでテストする) */
unsigned int size_min; /* テスト開始サイズ (これより大きくて、size_stepの倍数が実際のsize_minになる) */
unsigned int i_block_size; /* ループのブロックサイズ */
unsigned int size_step; /* サイズ増加 (これより大きく、かつ、全テストのsize_stepの最小公倍数が実際のstepになる) */
double max_time_sec; /* 処理時間がこれを超えたらやめる */
};
struct MatmulBenchTestResult {
int num_run;
double **sec; /* sec[num_run][iter] */
};
struct MatmulBenchResult {
int num_test;
int *test_map; /* 結果とMatmulBench::test_setの対応 */
int num_run_max;
unsigned int num_run; /* 最大run数 */
unsigned int run_size_step; /* config::size_stepを全テストの最小公倍数になるようにテストした値 */
unsigned int run_size_min; /* config::size_minをrun_size_stepの倍になるように調整した値 */
struct MatmulBenchTestResult *results;
};
__attribute__((aligned(64))) struct MatmulBenchParam {
struct MatmulBench *mb;
float * out;
const float *inL, *inR;
const float *inL_plus1line, *inR_plus1line;
unsigned int n;
unsigned int pitch_byte;
unsigned int i_block_size;
void *ptr;
};
typedef void (*matmul_bench_test_run_t)(struct MatmulBenchParam *p);
struct MatmulBenchTest {
const char *name;
matmul_bench_test_run_t run;
unsigned int size_step;
};
typedef void (*matmul_bench_finish_callback_t)(const struct MatmulBenchTest *test,
double sec,
unsigned int iter,
unsigned long mat_size,
void *ptr);
#define MATMULBENCH_FEATURE_SSE (1<<0)
#define MATMULBENCH_FEATURE_AVX (1<<1)
#define MATMULBENCH_FEATURE_FMA (1<<2)
#define MATMULBENCH_FEATURE_NEON (1<<3)
#define MATMULBENCH_FEATURE_VFPV4 (1<<4)
#define MATMULBENCH_FEATURE_GCCVEC (1<<5)
struct MatmulBench {
int num_test;
struct MatmulBenchTest *test_set;
long long cpu_freq;
int feature_bits;
int ops_per_cycle;
const char *arch_name;
struct MatmulBenchThreadPool *threads;
double theoretical_peak_flops;
};
struct MatmulBenchResult;
/* 0 でシステムのスレッド数 */
MATMUL_BENCH_EXPORT struct MatmulBench *matmul_bench_init(unsigned int num_thread);
MATMUL_BENCH_EXPORT void matmul_bench_fini(struct MatmulBench *mb);
/*
* iter = 3, 全テスト, サイズ自動でパラメータ設定
*/
MATMUL_BENCH_EXPORT struct MatmulBenchConfig *matmul_bench_config_init(struct MatmulBench *mb);
MATMUL_BENCH_EXPORT void matmul_bench_config_fini(struct MatmulBench *mb, struct MatmulBenchConfig *c);
/* 名前が見つからなかったら-1を返す */
MATMUL_BENCH_EXPORT int matmul_bench_config_enable_test(struct MatmulBench *mb,
struct MatmulBenchConfig *config,
const char *test_name);
MATMUL_BENCH_EXPORT int matmul_bench_config_disable_test(struct MatmulBench *mb,
struct MatmulBenchConfig *config,
const char *test_name);
MATMUL_BENCH_EXPORT struct MatmulBenchResult *matmul_bench_run(struct MatmulBench *mb,
struct MatmulBenchConfig *config,
matmul_bench_finish_callback_t callback,
void *callback_data);
MATMUL_BENCH_EXPORT void matmul_bench_result_fini(struct MatmulBench *mb,
struct MatmulBenchResult *param);
MATMUL_BENCH_EXPORT double matmul_bench_calc_gflops(unsigned int n, double sec);
MATMUL_BENCH_EXPORT void matmul_bench_export_csv(FILE *fp,
struct MatmulBench *mb,
struct MatmulBenchConfig *config,
struct MatmulBenchResult *r);
#endif