-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsoftmax.cpp
32 lines (28 loc) · 1.67 KB
/
softmax.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#include <torch/extension.h>
#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
void softmax_naive(const float *input, float *output, float *workspace, int M, int N);
void softmax_naive_split(const float *input, float *output, float *workspace, int M, int N);
void softmax_online(const float *input, float *output, float *workspace, int M, int N);
void softmax_online_split(const float *input, float *output, float *workspace, int M, int N);
template<
void softmax(const float *input, float *output, float *workspace, int M, int N),
bool use_workspace>
torch::Tensor softmax_pt(torch::Tensor input) {
CHECK_INPUT(input);
int M = input.size(0);
int N = input.size(1);
torch::Tensor output = torch::empty_like(input);
float *workspace = use_workspace ? torch::empty(M * 2, input.options()).data_ptr<float>() : nullptr;
softmax(input.data_ptr<float>(), output.data_ptr<float>(), workspace, M, N);
return output;
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("softmax_naive", &softmax_pt<softmax_naive, false>, "Naive softmax");
m.def("softmax_naive_split", &softmax_pt<softmax_naive_split, true>, "Naive softmax split");
m.def("softmax_online", &softmax_pt<softmax_online, false>, "Online softmax");
m.def("softmax_online_split", &softmax_pt<softmax_online_split, true>, "Online softmax split");
}