20
20
21
21
#include " ../public/bit_depth.h"
22
22
#include " common.h"
23
+ #include " kernel.h"
23
24
#include " kernel_reference.h"
24
25
25
26
namespace gemmlowp {
26
27
27
- template <bool MaxProductIsLessThan4096, bool LhsAlwaysNonzero >
28
+ template <bool MaxProductIsLessThan4096, bool IsUnsigned, bool LhsNonZero >
28
29
struct DefaultKernelImpl {};
29
30
30
- // Partial specialization implementing the logic that if we want to use
31
- // a kernel for LhsAlwaysNonzero but do not have such a kernel, then we fall
32
- // back to a generic kernel not taking advantage of LhsAlwaysNonzero.
33
- template <bool LhsAlwaysNonzero>
34
- struct DefaultKernelImpl <true , LhsAlwaysNonzero>
35
- : DefaultKernelImpl<false , LhsAlwaysNonzero> {};
36
-
37
31
// Partial specialization implementing the logic that if we want to use
38
32
// a kernel for MaxProductIsLessThan4096 but do not have such a kernel, then we
39
33
// fall back to a generic kernel not taking advantage of
40
34
// MaxProductIsLessThan4096.
35
+ template <bool LhsNonZero>
36
+ struct DefaultKernelImpl <true , true , LhsNonZero>
37
+ : DefaultKernelImpl<false , true , LhsNonZero> {};
38
+
39
+ // Partial specialization implementing the logic that if we want to use
40
+ // a kernel for LhsNonZero but do not have such a kernel, then we fall
41
+ // back to a generic kernel not taking advantage of LhsNonZero.
41
42
template <bool MaxProductIsLessThan4096>
42
- struct DefaultKernelImpl <MaxProductIsLessThan4096, true >
43
- : DefaultKernelImpl<MaxProductIsLessThan4096, false > {};
43
+ struct DefaultKernelImpl <MaxProductIsLessThan4096, true , true >
44
+ : DefaultKernelImpl<MaxProductIsLessThan4096, true , false > {};
44
45
45
46
template <typename BitDepthParams>
46
47
struct DefaultKernel
47
48
: DefaultKernelImpl<(BitDepthParams::LhsRange::kMaxValue *
48
49
BitDepthParams::RhsRange::kMaxValue <
49
50
4096 ),
50
- (BitDepthParams::LhsRange::kMinValue > 0 )> {};
51
+ (BitDepthParams::LhsRange::kMinValue >= 0 ),
52
+ (BitDepthParams::LhsRange::kMinValue > 0 ||
53
+ (BitDepthParams::LhsRange::kMaxValue <= 127 &&
54
+ BitDepthParams::LhsRange::kMinValue > -128 ))> {};
51
55
52
56
} // end namespace gemmlowp
53
57
54
- #define GEMMLOWP_SET_DEFAULT_KERNEL (MaxProductIsLessThan4096, \
55
- LhsAlwaysNonzero , Kernel) \
56
- namespace gemmlowp { \
57
- template <> \
58
- struct DefaultKernelImpl <MaxProductIsLessThan4096, LhsAlwaysNonzero> \
59
- : Kernel {}; \
58
+ #define GEMMLOWP_SET_DEFAULT_KERNEL (MaxProductIsLessThan4096, IsUnsigned, \
59
+ LhsAlwaysNonZero , Kernel) \
60
+ namespace gemmlowp { \
61
+ template <> \
62
+ struct DefaultKernelImpl <MaxProductIsLessThan4096, IsUnsigned, \
63
+ LhsAlwaysNonZero> : Kernel {}; \
60
64
}
61
65
66
+ // User-provided int8 inputs is only supported in the NEON path currently.
62
67
#if defined GEMMLOWP_NEON_32
63
68
#include " kernel_neon.h"
64
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , NEON_32_Kernel12x4Depth2)
65
- GEMMLOWP_SET_DEFAULT_KERNEL(true , false ,
69
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , NEON_32_Kernel12x4Depth2)
70
+ GEMMLOWP_SET_DEFAULT_KERNEL(true , true , false ,
66
71
NEON_32_Kernel12x4Depth2Assuming12BitProducts)
67
- GEMMLOWP_SET_DEFAULT_KERNEL(false , true ,
72
+ GEMMLOWP_SET_DEFAULT_KERNEL(false , true , true ,
68
73
NEON_32bit_GEMM_Int8Operands_LhsNonzero)
74
+ GEMMLOWP_SET_DEFAULT_KERNEL(false , false , true ,
75
+ NEON_32bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
69
76
#elif defined GEMMLOWP_NEON_64
70
77
#include " kernel_neon.h"
71
78
#if defined GEMMLOWP_DOTPROD_KERNEL
72
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , NEON_64_Kernel12x8Depth4_dotprod)
79
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false ,
80
+ NEON_64_Kernel12x8Depth4_dotprod)
73
81
#else
74
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , NEON_64_Kernel12x8Depth2)
75
- GEMMLOWP_SET_DEFAULT_KERNEL(false , true ,
82
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , NEON_64_Kernel12x8Depth2)
83
+ GEMMLOWP_SET_DEFAULT_KERNEL(false , true , true ,
76
84
NEON_64bit_GEMM_Int8Operands_LhsNonzero)
77
85
#endif
86
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , false , true ,
87
+ NEON_64bit_GEMM_Int8Operands_LhsNonzero_Int8Inputs)
78
88
#elif defined(GEMMLOWP_MSA)
79
89
#include " kernel_msa.h"
80
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , MSA_Kernel12x8Depth2)
81
- GEMMLOWP_SET_DEFAULT_KERNEL(false , true , MSA_GEMM_Int8Operands_LhsNonzero)
90
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , MSA_Kernel12x8Depth2)
91
+ GEMMLOWP_SET_DEFAULT_KERNEL(false , true , true , MSA_GEMM_Int8Operands_LhsNonzero)
82
92
#elif defined GEMMLOWP_SSE4_32
83
93
#include " kernel_sse.h"
84
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , SSE4_32_Kernel4x4Depth2)
94
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , SSE4_32_Kernel4x4Depth2)
85
95
#elif defined GEMMLOWP_SSE4_64
86
96
#include " kernel_sse.h"
87
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , SSE4_64_Kernel12x4Depth2)
97
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , SSE4_64_Kernel12x4Depth2)
88
98
#elif defined GEMMLOWP_AVX2_64
89
99
#include " kernel_avx.h"
90
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , AVX2_64_Kernel24x8Depth2)
100
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , AVX2_64_Kernel24x8Depth2)
91
101
#else
92
102
#include " kernel_reference.h"
93
103
namespace gemmlowp {
@@ -96,7 +106,7 @@ typedef ReferenceKernel<KernelFormat<
96
106
KernelSideFormat<CellFormat<4 , 16 , CellOrder::WidthMajor>, 1 > > >
97
107
DefaultReferenceKernel;
98
108
}
99
- GEMMLOWP_SET_DEFAULT_KERNEL (false , false , DefaultReferenceKernel)
109
+ GEMMLOWP_SET_DEFAULT_KERNEL (false , true , false , DefaultReferenceKernel)
100
110
#endif
101
111
102
112
#endif // GEMMLOWP_INTERNAL_KERNEL_DEFAULT_H_
0 commit comments