From 6757af7d214a49b06e5289b9784b4b99b4a35aaf Mon Sep 17 00:00:00 2001 From: Alexander Heinecke Date: Sat, 8 Feb 2025 17:56:15 +0530 Subject: [PATCH] Bump LIBXSMM (#38) * update to latest libxsmm * fixed CI slurm partition --- libxsmm | 2 +- scripts/buildkite/libxsmm-tpp.yml | 2 +- src/libxsmm_dnn_conv.c | 83 +------------------------------ 3 files changed, 3 insertions(+), 84 deletions(-) diff --git a/libxsmm b/libxsmm index 2007547..ba10d53 160000 --- a/libxsmm +++ b/libxsmm @@ -1 +1 @@ -Subproject commit 2007547c8993be00b4fa7df803c2578b14e17d52 +Subproject commit ba10d530a62a3592bb5c6a5e85020bb3bd30afe3 diff --git a/scripts/buildkite/libxsmm-tpp.yml b/scripts/buildkite/libxsmm-tpp.yml index 3f3f4d0..368d1fb 100644 --- a/scripts/buildkite/libxsmm-tpp.yml +++ b/scripts/buildkite/libxsmm-tpp.yml @@ -1,6 +1,6 @@ env: TOOL_TEST: "$${BUILDKITE_BUILD_CHECKOUT_PATH}/libxsmm/scripts/tool_test.sh" - PARTITIONS: "spr-all" + PARTITIONS: "emr" CONFIG: "intel-2022" LOGRPTBND: "+" ENV_PYTHON: 1 diff --git a/src/libxsmm_dnn_conv.c b/src/libxsmm_dnn_conv.c index 5cdfcc0..d4d0e8e 100644 --- a/src/libxsmm_dnn_conv.c +++ b/src/libxsmm_dnn_conv.c @@ -1208,8 +1208,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets = NULL; res.B_offsets = NULL; @@ -1223,13 +1221,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ l_flags |= res.fwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -1449,8 +1440,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets = NULL; res.B_offsets = NULL; @@ -1464,13 +1453,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ l_flags |= res.fwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -1751,8 +1733,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets = NULL; res.B_offsets = NULL; @@ -1766,13 +1746,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_fwd_kernels( libxsmm_dnn_conv_ l_flags |= res.fwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -2053,8 +2026,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets_bwd = NULL; res.B_offsets_bwd = NULL; @@ -2065,13 +2036,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ l_flags |= res.bwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -2228,8 +2192,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets_bwd = NULL; res.B_offsets_bwd = NULL; @@ -2241,13 +2203,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ l_flags |= res.bwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -2457,8 +2412,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); res.A_offsets_bwd = NULL; res.B_offsets_bwd = NULL; @@ -2470,13 +2423,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_bwd_kernels( libxsmm_dnn_conv_ l_flags |= res.bwd_flags; l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -2697,9 +2643,7 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_upd_kernels( libxsmm_dnn_conv_ libxsmm_gemm_batch_reduce_config l_brconfig; libxsmm_bitfield l_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); libxsmm_bitfield l_prefetch_flags = 0; - int prefetch_mode = (res.u == 2 || (res.R == 3 && res.ofw == 7) ) ? (int)LIBXSMM_GEMM_PREFETCH_NONE : (int)LIBXSMM_GEMM_PREFETCH_BL1; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); + int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; const int img_work = res.N; const int img_chunksize = (img_work % res.threads == 0) ? (img_work / res.threads) : (img_work / res.threads) + 1; int n_blocks; @@ -2711,13 +2655,6 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_upd_kernels( libxsmm_dnn_conv_ LDB = (res.upd_pack_input == 1) ? res.ifmblock : res.v * res.ifmblock; LDC = res.ofmblock; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = prefetch_mode | (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Regular GEMM -- no tasklist*/ @@ -2929,17 +2866,8 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_upd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); beta = (res.use_intermediate_f32_wt_tensor ? 1.f : 0.f); l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */ @@ -3357,17 +3285,8 @@ LIBXSMM_API_INLINE void libxsmm_dnn_conv_generate_upd_kernels( libxsmm_dnn_conv_ libxsmm_bitfield l_flags = LIBXSMM_GEMM_VNNI_FLAGS('N', 'N', 'V', 'N'); libxsmm_bitfield l_prefetch_flags = 0; int prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_NONE; - int brgemm_pf_oob = 0; - const char *const env_brgemm_pf_oob = getenv("BRGEMM_PF_OOB"); beta = (res.use_intermediate_f32_wt_tensor ? 1.f : 0.f); l_flags |= ( beta == 0 ) ? LIBXSMM_GEMM_FLAG_BETA_0 : 0; - if ( 0 == env_brgemm_pf_oob ) { - } else { - brgemm_pf_oob = atoi(env_brgemm_pf_oob); - } - if (brgemm_pf_oob > 0) { - prefetch_mode = (int)LIBXSMM_GEMM_PREFETCH_BRGEMM_OOB; - } l_prefetch_flags = prefetch_mode; /* Strided kernel */