Skip to content

Commit 3c429ae

Browse files
author
Abdullah Mughrabi
committed
[BENCH] added tutorial and hello afu example, mmtiled is still incomplete
1 parent 3547319 commit 3c429ae

File tree

29 files changed

+1937
-805
lines changed

29 files changed

+1937
-805
lines changed

00_bench/include/algorithms/capi/mmtiled.h

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ struct __attribute__((__packed__)) MatrixArrays
1818
};
1919

2020
#define MIN(a,b) (((a)<(b))?(a):(b))
21+
#define IS_ZERO(a) (((a==0))?(1):(a))
2122

2223
struct MatrixArrays *newMatrixArrays(struct Arguments *arguments);
2324
void freeMatrixArrays(struct MatrixArrays *matrixArrays);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#ifndef TUTORIAL_H
2+
#define TUTORIAL_H
3+
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
#include <unistd.h>
8+
#include <stdint.h>
9+
10+
typedef struct //base
11+
{
12+
uint64_t size;//8
13+
void *stripe1;//8
14+
void *stripe2;//8
15+
void *parity; //8
16+
uint64_t done;// base + 32bytes
17+
} parity_request;
18+
19+
20+
int tutorial_main_call(int argc, char *argv[]);
21+
22+
#endif

00_bench/include/algorithms/openmp/mmtiled.h

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ struct __attribute__((__packed__)) MatrixArrays
1818
};
1919

2020
#define MIN(a,b) (((a)<(b))?(a):(b))
21+
#define IS_ZERO(a,b) (((a)==(0))?(1):(a))
2122

2223
struct MatrixArrays *newMatrixArrays(struct Arguments *arguments);
2324
void freeMatrixArrays(struct MatrixArrays *matrixArrays);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#ifndef TUTORIAL_H
2+
#define TUTORIAL_H
3+
4+
#include <stdio.h>
5+
#include <stdlib.h>
6+
#include <string.h>
7+
#include <unistd.h>
8+
#include <stdint.h>
9+
10+
typedef struct
11+
{
12+
uint64_t size;
13+
void *stripe1;
14+
void *stripe2;
15+
void *parity;
16+
uint64_t done;
17+
} parity_request;
18+
19+
20+
int tutorial_main_call(int argc, char *argv[]);
21+
22+
#endif

00_bench/include/capi_utils/capienv.h

+35-31
Original file line numberDiff line numberDiff line change
@@ -12,49 +12,51 @@
1212
// *************** MMIO General **************
1313
// ********************************************************************************************
1414
// 0x3fffff8 >> 0x3FFFF10 used
15-
#define AFU_CONFIGURE 0x3FFFFF8
16-
#define AFU_CONFIGURE_2 0x3FFFF30
17-
#define AFU_STATUS 0x3FFFFF0
15+
#define AFU_STATUS 0x3FFFFF8
16+
#define AFU_CONFIGURE 0x3FFFFF0
17+
#define AFU_CONFIGURE_2 0x3FFFFE8
1818

19-
#define CU_CONFIGURE 0x3FFFFE8
20-
#define CU_CONFIGURE_2 0x3FFFF28
2119
#define CU_STATUS 0x3FFFFE0
20+
#define CU_CONFIGURE 0x3FFFFD8
21+
#define CU_CONFIGURE_2 0x3FFFFD0
22+
#define CU_CONFIGURE_3 0x3FFFFC8
23+
#define CU_CONFIGURE_4 0x3FFFFC0
2224

23-
#define CU_RETURN 0x3FFFFD8 // running counters that you can read continuosly
24-
#define CU_RETURN_2 0x3FFFF10
25-
#define CU_RETURN_ACK 0x3FFFFD0
25+
#define CU_RETURN 0x3FFFFB8 // running counters that you can read continuosly
26+
#define CU_RETURN_2 0x3FFFFB0
27+
#define CU_RETURN_ACK 0x3FFFFA8
2628

27-
#define CU_RETURN_DONE 0x3FFFFC8
28-
#define CU_RETURN_DONE_ACK 0x3FFFFC0
29+
#define CU_RETURN_DONE 0x3FFFFA0
30+
#define CU_RETURN_DONE_ACK 0x3FFFF98
2931

30-
#define ERROR_REG 0x3FFFFB8
31-
#define ERROR_REG_ACK 0x3FFFFB0
32+
#define ERROR_REG 0x3FFFF90
33+
#define ERROR_REG_ACK 0x3FFFF88
3234

3335
// ********************************************************************************************
3436
// *************** AFU Stats **************
3537
// ********************************************************************************************
3638

37-
#define DONE_COUNT_REG 0x3FFFFA8
38-
#define DONE_RESTART_COUNT_REG 0x3FFFFA0
39-
#define DONE_READ_COUNT_REG 0x3FFFF98
40-
#define DONE_WRITE_COUNT_REG 0x3FFFF90
41-
#define DONE_PREFETCH_READ_COUNT_REG 0x3FFFF88
42-
#define DONE_PREFETCH_WRITE_COUNT_REG 0x3FFFF80
39+
#define DONE_COUNT_REG 0x3FFFF78
40+
#define DONE_RESTART_COUNT_REG 0x3FFFF70
41+
#define DONE_READ_COUNT_REG 0x3FFFF68
42+
#define DONE_WRITE_COUNT_REG 0x3FFFF60
43+
#define DONE_PREFETCH_READ_COUNT_REG 0x3FFFF58
44+
#define DONE_PREFETCH_WRITE_COUNT_REG 0x3FFFF50
4345

44-
#define PAGED_COUNT_REG 0x3FFFF78
45-
#define FLUSHED_COUNT_REG 0x3FFFF70
46-
#define AERROR_COUNT_REG 0x3FFFF68
47-
#define DERROR_COUNT_REG 0x3FFFF60
48-
#define FAILED_COUNT_REG 0x3FFFF58
49-
#define FAULT_COUNT_REG 0x3FFFF50
50-
#define NRES_COUNT_REG 0x3FFFF48
51-
#define NLOCK_COUNT_REG 0x3FFFF40
52-
#define CYCLE_COUNT_REG 0x3FFFF38
46+
#define PAGED_COUNT_REG 0x3FFFF48
47+
#define FLUSHED_COUNT_REG 0x3FFFF40
48+
#define AERROR_COUNT_REG 0x3FFFF38
49+
#define DERROR_COUNT_REG 0x3FFFF30
50+
#define FAILED_COUNT_REG 0x3FFFF28
51+
#define FAULT_COUNT_REG 0x3FFFF20
52+
#define NRES_COUNT_REG 0x3FFFF18
53+
#define NLOCK_COUNT_REG 0x3FFFF10
54+
#define CYCLE_COUNT_REG 0x3FFFF08
5355

54-
#define PREFETCH_READ_BYTE_COUNT_REG 0x3FFFF30
55-
#define PREFETCH_WRITE_BYTE_COUNT_REG 0x3FFFF28
56-
#define READ_BYTE_COUNT_REG 0x3FFFF20
57-
#define WRITE_BYTE_COUNT_REG 0x3FFFF18
56+
#define PREFETCH_READ_BYTE_COUNT_REG 0x3FFFF00
57+
#define PREFETCH_WRITE_BYTE_COUNT_REG 0x3FFFEF8
58+
#define READ_BYTE_COUNT_REG 0x3FFFEF0
59+
#define WRITE_BYTE_COUNT_REG 0x3FFFEE8
5860

5961

6062
#ifdef SIM
@@ -70,6 +72,8 @@ struct AFUStatus
7072
uint64_t cu_stop; // afu stopping condition
7173
uint64_t cu_config;
7274
uint64_t cu_config_2;
75+
uint64_t cu_config_3;
76+
uint64_t cu_config_4;
7377
uint64_t cu_status;
7478
uint64_t cu_mode;
7579
uint64_t afu_config;

00_bench/src/algorithms/capi/memcpy.c

+2
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ void copyDataArrays(struct DataArrays *dataArrays, struct Arguments *arguments)
115115
afu_status.cu_config = arguments->cu_config; // non zero CU triggers the AFU to work
116116
afu_status.cu_config = ((afu_status.cu_config << 24) | (arguments->numThreads));
117117
afu_status.cu_config_2 = afu_status.cu_config_2;
118+
afu_status.cu_config_3 = 1 ;
119+
afu_status.cu_config_4 = 1 ;
118120
afu_status.cu_stop = wed->size_send;
119121

120122
startAFU(&afu, &afu_status);

00_bench/src/algorithms/capi/mmtiled.c

+7-6
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ void initializeMatrixArrays(struct MatrixArrays *matrixArrays)
7575
// matrixArrays->B[(i * matrixArrays->size_n) + j] = generateRandInt(mt19937var) % 512;
7676
matrixArrays->A[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
7777
matrixArrays->B[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
78-
matrixArrays->C[(i * matrixArrays->size_n) + j] = 0;
78+
matrixArrays->C[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
7979
}
8080
}
8181

@@ -92,7 +92,7 @@ void resetMatrixArrays(struct MatrixArrays *matrixArrays)
9292
{
9393
for(j = 0; j < matrixArrays->size_n; j++)
9494
{
95-
matrixArrays->C[(i * matrixArrays->size_n) + j] = 0;
95+
matrixArrays->C[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
9696
}
9797
}
9898

@@ -274,7 +274,7 @@ void matrixMultiplyTiledTransposed(struct MatrixArrays *matrixArrays, struct Arg
274274
afu_status.cu_config = 0; // non zero CU triggers the AFU to work
275275
afu_status.cu_config = ((afu_status.cu_config << 32) | (arguments->numThreads));
276276
afu_status.cu_config_2 = 0;
277-
afu_status.cu_stop = wed->size_tile;
277+
afu_status.cu_stop = wed->size_tile * wed->size_tile;
278278

279279
// ********************************************************************************************
280280
// *************** START AFU **************
@@ -295,10 +295,11 @@ void matrixMultiplyTiledTransposed(struct MatrixArrays *matrixArrays, struct Arg
295295
// ********************************************************************************************
296296
// *************** START CU **************
297297
// ********************************************************************************************
298-
afu_status.cu_config = i; // non zero CU triggers the AFU to work
298+
afu_status.cu_config = arguments->cu_config; // non zero CU triggers the AFU to work
299299
afu_status.cu_config = ((afu_status.cu_config << 32) | (arguments->numThreads));
300-
afu_status.cu_config_2 = j;
301-
afu_status.cu_config_2 = ((afu_status.cu_config_2 << 32) | (k));
300+
afu_status.cu_config_2 = ((i << 1) | 1);
301+
afu_status.cu_config_3 = ((j << 1) | 1);
302+
afu_status.cu_config_4 = ((k << 1) | 1);
302303
startCU(&afu, &afu_status);
303304

304305
// ********************************************************************************************
+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <stdint.h>
4+
#include <unistd.h>
5+
#include <string.h>
6+
#include <math.h>
7+
#include <omp.h>
8+
9+
#include "timer.h"
10+
#include "myMalloc.h"
11+
#include "config.h"
12+
13+
#include "libcxl.h"
14+
#include "capienv.h"
15+
16+
17+
#include "tutorial.h"
18+
19+
20+
int tutorial_main_call(int argc, char *argv[])
21+
{
22+
struct cxl_afu_h *afu;
23+
24+
parity_request *example;
25+
size_t size = 128, alignment = 128;
26+
27+
28+
afu = cxl_afu_open_dev("/dev/cxl/afu0.0d");
29+
if(!afu)
30+
{
31+
printf("Failed to open AFU: %m\n");
32+
return 1;
33+
}
34+
35+
example = aligned_alloc(alignment, sizeof(*example));
36+
example->size = size;
37+
example->stripe1 = aligned_alloc(alignment, size);
38+
example->stripe2 = aligned_alloc(alignment, size);
39+
example->parity = aligned_alloc(alignment, size);
40+
41+
memcpy(example->stripe1,
42+
"asfb190jwqsefx0amxAqa1nlkaf78sa0g&0ha8dngj3t21078fnajl38n32j3np2"
43+
"x3t8wefiankxkfmgm ncmbqx8ehn2jkaeubgfbuapwnjxkg09f0w9es80872981",
44+
size);
45+
memcpy(example->stripe2,
46+
"\x35\x1b\x07\x16\x11\x50\x43\x4a\x04\x1e\x1e\x00\x46\x08\x42\x0e"
47+
"\x1d\x1d\x33\x51\x11\x50\x1c\x05\x1f\x18\x47\x17\x6c\x1b\x08\x43"
48+
"\x47\x4f\x43\x48\x04\x40\x05\x0d\x13\x06\x4a\x54\x45\x59\x51\x43"
49+
"\x18\x2f\x49\x0c\x4a\x09\x4b\x48\x0b\x50\x46\x03\x5d\x09\x50\x46"
50+
"\x17\x13\x07\x5d\x12\x4b\x46\x20\x46\x0a\x4b\x19\x07\x15\x02\x47"
51+
"\x01\x49\x05\x06\x4d\x16\x1e\x58\x4b\x00\x0d\x4e\x46\x02\x02\x12"
52+
"\x45\x07\x17\x09\x08\x0b\x1b\x06\x50\x18\x00\x4a\x0b\x04\x0a\x55"
53+
"\x19\x14\x55\x16\x55\x45\x14\x5d\x51\x4a\x17\x41\x56\x57\x5f",
54+
size);
55+
example->done = 0;
56+
57+
58+
// That is some proper parity! This is exactly what I'm expecting to see. I'd also like to see this running on some real gear soon
59+
60+
printf("[example structure\n");
61+
printf(" example: %p\n", example);
62+
printf(" example->size: %llu\n", example->size);
63+
printf(" example->stripe1: %p\n", example->stripe1);
64+
printf(" example->stripe2: %p\n", example->stripe2);
65+
printf(" example->parity: %p\n", example->parity);
66+
printf(" &(example->done): %p\n", &(example->done));
67+
68+
cxl_afu_attach(afu, (__u64)example);
69+
printf("Attached to AFU\n");
70+
71+
printf("Waiting for completion by AFU\n");
72+
while(!example->done){
73+
sleep(1);
74+
}
75+
76+
printf("PARITY:\n%s\n", (char *)example->parity);
77+
78+
printf("Releasing AFU\n");
79+
cxl_afu_free(afu);
80+
81+
return 0;
82+
}

00_bench/src/algorithms/openmp/mmtiled.c

+12-7
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ void initializeMatrixArrays(struct MatrixArrays *matrixArrays)
7373
// matrixArrays->B[(i * matrixArrays->size_n) + j] = generateRandInt(mt19937var) % 512;
7474
matrixArrays->A[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
7575
matrixArrays->B[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
76-
matrixArrays->C[(i * matrixArrays->size_n) + j] = 0;
76+
matrixArrays->C[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
7777
}
7878
}
7979

@@ -90,7 +90,7 @@ void resetMatrixArrays(struct MatrixArrays *matrixArrays)
9090
{
9191
for(j = 0; j < matrixArrays->size_n; j++)
9292
{
93-
matrixArrays->C[(i * matrixArrays->size_n) + j] = 0;
93+
matrixArrays->C[(i * matrixArrays->size_n) + j] = (i * matrixArrays->size_n) + j;
9494
}
9595
}
9696

@@ -256,7 +256,7 @@ void matrixMultiplyTiledTransposed(struct MatrixArrays *matrixArrays, struct Arg
256256
uint64_t kk;
257257
uint32_t sum;
258258

259-
#pragma omp parallel for private(j,k,ii,jj,kk,sum) schedule(dynamic)
259+
// #pragma omp parallel for private(j,k,ii,jj,kk,sum) schedule(dynamic)
260260
for(i = 0; i < matrixArrays->size_n; i += matrixArrays->size_tile)
261261
{
262262
for(j = 0; j < matrixArrays->size_n; j += matrixArrays->size_tile)
@@ -269,15 +269,20 @@ void matrixMultiplyTiledTransposed(struct MatrixArrays *matrixArrays, struct Arg
269269
{
270270
sum = 0;
271271
//#pragma omp parallel for reduction(+:sum)
272-
for (kk = k; kk < MIN(k + matrixArrays->size_tile, matrixArrays->size_n); kk++)
273-
{
274-
sum += matrixArrays->A[(ii * matrixArrays->size_n) + kk] * matrixArrays->B[(jj * matrixArrays->size_n) + kk];
275-
}
272+
// for (kk = k; kk < MIN(k + matrixArrays->size_tile, matrixArrays->size_n); kk++)
273+
// {
274+
// sum += matrixArrays->A[(ii * matrixArrays->size_n) + kk] * matrixArrays->B[(jj * matrixArrays->size_n) + kk];
275+
// }
276276
matrixArrays->C[(ii * matrixArrays->size_n) + jj] += sum;
277+
printf("i:%lu j:%lu C:%u \n",ii,jj, matrixArrays->C[(ii * matrixArrays->size_n) + jj]);
277278
}
278279
}
280+
281+
break;
279282
}
283+
break;
280284
}
285+
break;
281286
}
282287

283288
}

00_bench/src/capi_utils/capienv.c

+2
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ void startCU(struct cxl_afu_h **afu, struct AFUStatus *afu_status)
114114
{
115115
cxl_mmio_write64((*afu), CU_CONFIGURE, (uint64_t)afu_status->cu_config);
116116
cxl_mmio_write64((*afu), CU_CONFIGURE_2, (uint64_t)afu_status->cu_config_2);
117+
cxl_mmio_write64((*afu), CU_CONFIGURE_3, (uint64_t)afu_status->cu_config_3);
118+
cxl_mmio_write64((*afu), CU_CONFIGURE_4, (uint64_t)afu_status->cu_config_4);
117119
cxl_mmio_read64((*afu), CU_STATUS, (uint64_t *) & (afu_status->cu_status));
118120
}
119121
while(!((afu_status->cu_status)));
+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <unistd.h>
5+
6+
7+
#include "myMalloc.h"
8+
#include "mt19937.h"
9+
#include "timer.h"
10+
11+
#include "config.h"
12+
#include "tutorial.h"
13+
14+
15+
int main(int argc, char *argv[])
16+
{
17+
18+
return tutorial_main_call(argc, argv);
19+
20+
}

01_capi_integration/accelerator_rtl/afu_control/mmio.sv

+6
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,12 @@ module mmio (
242242
CU_CONFIGURE_2 : begin
243243
cu_configure.var2 <= data_in_latched;
244244
end
245+
CU_CONFIGURE_3 : begin
246+
cu_configure.var3 <= data_in_latched;
247+
end
248+
CU_CONFIGURE_4 : begin
249+
cu_configure.var4 <= data_in_latched;
250+
end
245251
AFU_CONFIGURE : begin
246252
afu_configure.var1 <= data_in_latched;
247253
end

01_capi_integration/accelerator_rtl/afu_pkgs/afu_pkg.sv

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ package AFU_PKG;
3131
typedef struct packed {
3232
logic [0:63] var1;
3333
logic [0:63] var2;
34+
logic [0:63] var3;
35+
logic [0:63] var4;
3436
} cu_configure_type;
3537

3638
typedef struct packed {

0 commit comments

Comments
 (0)