Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement local and global variables handling for wasm simd #4056

Open
wants to merge 12 commits into
base: dev/simd_for_interp
Choose a base branch
from
14 changes: 10 additions & 4 deletions build-scripts/config_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,18 @@ else ()
message (" Wakeup of blocking operations enabled")
endif ()
if (WAMR_BUILD_SIMD EQUAL 1)
if (NOT WAMR_BUILD_TARGET MATCHES "RISCV64.*")
add_definitions (-DWASM_ENABLE_SIMD=1)
message (" SIMD enabled")
else ()
set(SIMD_ENABLED 0)
if (WAMR_BUILD_TARGET MATCHES "RISCV64.*")
set(WAMR_BUILD_SIMD 0)
message (" SIMD disabled due to not supported on target RISCV64")
elseif (WAMR_BUILD_FAST_INTERP EQUAL 1 AND WAMR_BUILD_SIMDE EQUAL 0)
set(WAMR_BUILD_SIMD 0)
message(" SIMD disabled as the simde is not built in fast interpreter mode")
else()
set(SIMD_ENABLED 1)
message (" SIMD enabled")
endif ()
add_definitions(-DWASM_ENABLE_SIMD=${SIMD_ENABLED})
endif ()
if (WAMR_BUILD_AOT_STACK_FRAME EQUAL 1)
add_definitions (-DWASM_ENABLE_AOT_STACK_FRAME=1)
Expand Down
10 changes: 8 additions & 2 deletions build-scripts/runtime_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,14 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1)
include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake)
endif ()

if (WAMR_BUILD_LIB_SIMDE EQUAL 1)
include (${IWASM_DIR}/libraries/simde/simde.cmake)
if (WAMR_BUILD_SIMD EQUAL 1 AND WAMR_BUILD_FAST_INTERP EQUAL 1)
if (NOT (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR WAMR_BUILD_TARGET MATCHES "ARM.*"))
message(STATUS "SIMDe doesnt support platform " ${WAMR_BUILD_TARGET})
set(WAMR_BUILD_SIMDE 0)
else()
include (${IWASM_DIR}/libraries/simde/simde.cmake)
set (WAMR_BUILD_SIMDE 1)
endif()
endif ()

if (WAMR_BUILD_WASM_CACHE EQUAL 1)
Expand Down
197 changes: 137 additions & 60 deletions core/iwasm/interpreter/wasm_interp_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -1699,6 +1699,11 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
GET_OPERAND(uint64, I64, off));
ret_offset += 2;
}
else if (ret_types[ret_idx] == VALUE_TYPE_V128) {
PUT_V128_TO_ADDR(prev_frame->lp + ret_offset,
GET_OPERAND_V128(off));
ret_offset += 4;
}
#if WASM_ENABLE_GC != 0
else if (wasm_is_type_reftype(ret_types[ret_idx])) {
PUT_REF_TO_ADDR(prev_frame->lp + ret_offset,
Expand Down Expand Up @@ -3536,6 +3541,24 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
HANDLE_OP_END();
}

#if WASM_ENABLE_SIMD != 0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it's repetitive but let's do it in a separate PR (same for all the places where v128 repeats i64 a lot

HANDLE_OP(EXT_OP_SET_LOCAL_FAST_V128)
HANDLE_OP(EXT_OP_TEE_LOCAL_FAST_V128)
{
/* clang-format off */
#if WASM_CPU_SUPPORTS_UNALIGNED_ADDR_ACCESS != 0
local_offset = *frame_ip++;
#else
local_offset = *frame_ip;
frame_ip += 2;
#endif
/* clang-format on */
PUT_V128_TO_ADDR((uint32 *)(frame_lp + local_offset),
GET_OPERAND_V128(0));
frame_ip += 2;
HANDLE_OP_END();
}
#endif
HANDLE_OP(WASM_OP_GET_GLOBAL)
{
global_idx = read_uint32(frame_ip);
Expand Down Expand Up @@ -3572,7 +3595,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
GET_I64_FROM_ADDR((uint32 *)global_addr));
HANDLE_OP_END();
}

#if WASM_ENABLE_SIMD != 0
HANDLE_OP(WASM_OP_GET_GLOBAL_V128)
{
global_idx = read_uint32(frame_ip);
bh_assert(global_idx < module->e->global_count);
global = globals + global_idx;
global_addr = get_global_addr(global_data, global);
addr_ret = GET_OFFSET();
PUT_V128_TO_ADDR(frame_lp + addr_ret,
GET_V128_FROM_ADDR((uint32 *)global_addr));
HANDLE_OP_END();
}
#endif
HANDLE_OP(WASM_OP_SET_GLOBAL)
{
global_idx = read_uint32(frame_ip);
Expand Down Expand Up @@ -3639,6 +3674,19 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
GET_I64_FROM_ADDR(frame_lp + addr1));
HANDLE_OP_END();
}
#if WASM_ENABLE_SIMDE != 0
HANDLE_OP(WASM_OP_SET_GLOBAL_V128)
{
global_idx = read_uint32(frame_ip);
bh_assert(global_idx < module->e->global_count);
global = globals + global_idx;
global_addr = get_global_addr(global_data, global);
addr1 = GET_OFFSET();
PUT_V128_TO_ADDR((uint32 *)global_addr,
GET_V128_FROM_ADDR(frame_lp + addr1));
HANDLE_OP_END();
}
#endif

/* memory load instructions */
HANDLE_OP(WASM_OP_I32_LOAD)
Expand Down Expand Up @@ -4884,6 +4932,28 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,

HANDLE_OP_END();
}
#if WASM_ENABLE_SIMD != 0
HANDLE_OP(EXT_OP_COPY_STACK_TOP_V128)
{
addr1 = GET_OFFSET();
addr2 = GET_OFFSET();

PUT_V128_TO_ADDR(frame_lp + addr2,
GET_V128_FROM_ADDR(frame_lp + addr1));

#if WASM_ENABLE_GC != 0
/* Ignore constants because they are not reference */
if (addr1 >= 0) {
if (*FRAME_REF(addr1)) {
CLEAR_FRAME_REF(addr1);
SET_FRAME_REF(addr2);
}
}
#endif

HANDLE_OP_END();
}
#endif

HANDLE_OP(EXT_OP_COPY_STACK_VALUES)
{
Expand Down Expand Up @@ -5766,82 +5836,68 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
case SIMD_v128_load:
{
uint32 offset, addr;
offset = read_uint32(
frame_ip); // TODO: Check with an offset!
offset = read_uint32(frame_ip);
addr = GET_OPERAND(uint32, I32, 0);
frame_ip += 2;
addr_ret = GET_OFFSET();
CHECK_MEMORY_OVERFLOW(16);
PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr));
break;
}
#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \
do { \
uint32 offset, addr; \
offset = read_uint32(frame_ip); \
addr = GET_OPERAND(uint32, I32, 0); \
frame_ip += 2; \
addr_ret = GET_OFFSET(); \
CHECK_MEMORY_OVERFLOW(16); \
\
simde_v128_t simde_result = simde_func(maddr); \
\
V128 result; \
SIMDE_V128_TO_SIMD_V128(simde_result, result); \
\
V128 reversed_result; \
for (int i = 0; i < num_elements; i++) { \
reversed_result.i##element_size##x##num_elements[i] = \
result.i##element_size##x##num_elements[num_elements - 1 - i]; \
} \
PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \
\
break; \
#define SIMD_LOAD_OP(simde_func, element_size, num_elements) \
do { \
uint32 offset, addr; \
offset = read_uint32(frame_ip); \
addr = GET_OPERAND(uint32, I32, 0); \
frame_ip += 2; \
addr_ret = GET_OFFSET(); \
CHECK_MEMORY_OVERFLOW(16); \
\
simde_v128_t simde_result = simde_func(maddr); \
\
V128 result; \
SIMDE_V128_TO_SIMD_V128(simde_result, result); \
PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \
\
} while (0)
case SIMD_v128_load8x8_s:
{
SIMD_LOAD_OP(SIMD_v128_load8x8_s,
simde_wasm_i16x8_load8x8, 16, 8);
SIMD_LOAD_OP(simde_wasm_i16x8_load8x8, 16, 8);
break;
}
case SIMD_v128_load8x8_u:
{
SIMD_LOAD_OP(SIMD_v128_load8x8_u,
simde_wasm_u16x8_load8x8, 16, 8);
SIMD_LOAD_OP(simde_wasm_u16x8_load8x8, 16, 8);
break;
}
case SIMD_v128_load16x4_s:
{
SIMD_LOAD_OP(SIMD_v128_load16x4_s,
simde_wasm_i32x4_load16x4, 32, 4);
SIMD_LOAD_OP(simde_wasm_i32x4_load16x4, 32, 4);
break;
}
case SIMD_v128_load16x4_u:
{
SIMD_LOAD_OP(SIMD_v128_load16x4_u,
simde_wasm_u32x4_load16x4, 32, 4);
SIMD_LOAD_OP(simde_wasm_u32x4_load16x4, 32, 4);
break;
}
case SIMD_v128_load32x2_s:
{
SIMD_LOAD_OP(SIMD_v128_load32x2_s,
simde_wasm_i64x2_load32x2, 64, 2);
SIMD_LOAD_OP(simde_wasm_i64x2_load32x2, 64, 2);
break;
}
case SIMD_v128_load32x2_u:
{
SIMD_LOAD_OP(SIMD_v128_load32x2_u,
simde_wasm_u64x2_load32x2, 64, 2);
SIMD_LOAD_OP(simde_wasm_u64x2_load32x2, 64, 2);
break;
}
#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \
#define SIMD_LOAD_SPLAT_OP(simde_func) \
do { \
uint32 offset, addr; \
offset = read_uint32(frame_ip); \
addr = GET_OPERAND(uint32, I32, 0); \
frame_ip += 2; \
addr_ret = GET_OFFSET(); \
CHECK_MEMORY_OVERFLOW(16); \
CHECK_MEMORY_OVERFLOW(4); \
\
simde_v128_t simde_result = simde_func(maddr); \
\
Expand All @@ -5853,38 +5909,33 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,

case SIMD_v128_load8_splat:
{
SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat,
simde_wasm_v128_load8_splat);
SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load8_splat);
break;
}
case SIMD_v128_load16_splat:
{
SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat,
simde_wasm_v128_load16_splat);
SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load16_splat);
break;
}
case SIMD_v128_load32_splat:
{
SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat,
simde_wasm_v128_load32_splat);
SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load32_splat);
break;
}
case SIMD_v128_load64_splat:
{
SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat,
simde_wasm_v128_load64_splat);
SIMD_LOAD_SPLAT_OP(simde_wasm_v128_load64_splat);
break;
}
case SIMD_v128_store:
{
uint32 offset, addr;
offset = read_uint32(frame_ip);
frame_ip += 2;
V128 data = POP_V128();
int32 base = POP_I32();
offset += base;
addr = GET_OPERAND(uint32, I32, 0);

V128 data;
data = POP_V128();

CHECK_MEMORY_OVERFLOW(16);
STORE_V128(maddr, data);
break;
Expand All @@ -5905,14 +5956,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
case SIMD_v8x16_shuffle:
{
V128 indices;
V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();

bh_memcpy_s(&indices, sizeof(V128), frame_ip,
sizeof(V128));
frame_ip += sizeof(V128);

V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();

V128 result;
for (int i = 0; i < 16; i++) {
uint8_t index = indices.i8x16[i];
Expand Down Expand Up @@ -5940,6 +5991,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
SIMDE_V128_TO_SIMD_V128(simde_result, result);

PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break;
}

/* Splat */
Expand All @@ -5965,7 +6017,15 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,

case SIMD_i8x16_splat:
{
SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat);
uint32 val = POP_I32();
addr_ret = GET_OFFSET();

simde_v128_t simde_result = simde_wasm_i8x16_splat(val);

V128 result;
SIMDE_V128_TO_SIMD_V128(simde_result, result);

PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break;
}
case SIMD_i16x8_splat:
Expand Down Expand Up @@ -6081,8 +6141,8 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,

#define SIMD_DOUBLE_OP(simde_func) \
do { \
V128 v1 = POP_V128(); \
V128 v2 = POP_V128(); \
V128 v1 = POP_V128(); \
addr_ret = GET_OFFSET(); \
\
simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \
Expand All @@ -6097,7 +6157,18 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
/* i8x16 comparison operations */
case SIMD_i8x16_eq:
{
SIMD_DOUBLE_OP(simde_wasm_i8x16_eq);
V128 v2 = POP_V128();
V128 v1 = POP_V128();
addr_ret = GET_OFFSET();

simde_v128_t simde_result =
simde_wasm_i8x16_eq(SIMD_V128_TO_SIMDE_V128(v1),
SIMD_V128_TO_SIMDE_V128(v2));

V128 result;
SIMDE_V128_TO_SIMD_V128(simde_result, result);

PUT_V128_TO_ADDR(frame_lp + addr_ret, result);
break;
}
case SIMD_i8x16_ne:
Expand Down Expand Up @@ -7488,8 +7559,14 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module,
}

for (i = 0; i < cur_func->param_count; i++) {
if (cur_func->param_types[i] == VALUE_TYPE_I64
|| cur_func->param_types[i] == VALUE_TYPE_F64) {
if (cur_func->param_types[i] == VALUE_TYPE_V128) {
PUT_V128_TO_ADDR(
outs_area->lp,
GET_OPERAND_V128(2 * (cur_func->param_count - i - 1)));
outs_area->lp += 4;
}
else if (cur_func->param_types[i] == VALUE_TYPE_I64
|| cur_func->param_types[i] == VALUE_TYPE_F64) {
PUT_I64_TO_ADDR(
outs_area->lp,
GET_OPERAND(uint64, I64,
Expand Down
Loading
Loading