Skip to content

Commit

Permalink
Slice layer (#3055)
Browse files Browse the repository at this point in the history
* Added tensor slice layer

* Added interface description for slice layer

* Add doc for copy_tensor overload
  • Loading branch information
kSkip authored Feb 15, 2025
1 parent ef3d636 commit e8b6c05
Show file tree
Hide file tree
Showing 9 changed files with 707 additions and 0 deletions.
70 changes: 70 additions & 0 deletions dlib/cuda/cpu_dlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3105,6 +3105,76 @@ namespace dlib
}
}

// ------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{
size_t dest_stride_sample = static_cast<size_t>(dest.nc() * dest.nr() * dest.k());
size_t dest_stride_k = static_cast<size_t>(dest.nc() * dest.nr());
size_t dest_stride_nr = static_cast<size_t>(dest.nc());

size_t src_stride_sample = static_cast<size_t>(src.nc() * src.nr() * src.k());
size_t src_stride_k = static_cast<size_t>(src.nc() * src.nr());
size_t src_stride_nr = static_cast<size_t>(src.nc());

DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
DLIB_CASSERT(dest.k() - dk >= k &&
dest.nr() - dnr >= nr &&
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
DLIB_CASSERT(src.k() - sk >= k &&
src.nr() - snr >= nr &&
src.nc() - snc >= nc, "Not enough space in src tensor");

float* dest_p = dest.host() + dk * dest_stride_k \
+ dnr * dest_stride_nr \
+ dnc;

const float* src_p = src.host() + sk * src_stride_k \
+ snr * src_stride_nr \
+ snc;

for (long i = 0; i < src.num_samples(); ++i)
{
float* dest_channel_p = dest_p;
const float* src_channel_p = src_p;

for (long j = 0; j < k; ++j)
{
float* dest_row_p = dest_channel_p;
const float* src_row_p = src_channel_p;

for (long r = 0; r < nr; ++r)
{
if (add_to)
{
for (size_t c = 0; c < nc; ++c)
dest_row_p[c] += src_row_p[c];
}
else
{
::memcpy(dest_row_p, src_row_p, nc * sizeof(float));
}

dest_row_p += dest_stride_nr;
src_row_p += src_stride_nr;
}

dest_channel_p += dest_stride_k;
src_channel_p += src_stride_k;
}

dest_p += dest_stride_sample;
src_p += src_stride_sample;
}
}

// ------------------------------------------------------------------------------------

void transpose(
Expand Down
11 changes: 11 additions & 0 deletions dlib/cuda/cpu_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,17 @@ namespace dlib
size_t count_k
);

// -----------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);

// -----------------------------------------------------------------------------------

void transpose(
Expand Down
71 changes: 71 additions & 0 deletions dlib/cuda/cuda_dlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2623,6 +2623,77 @@ namespace dlib
}
}

__global__ void _cuda_copy_strided_tensor_add_to (float* dest, const float* src,
size_t ns, size_t nk, size_t nr, size_t nc,
size_t dk, size_t dr, size_t dc,
size_t sk, size_t sr, size_t sc)
{
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
{
size_t n,k,r,c;
unpack_idx(i, nk,nr,nc, n,k,r,c);
dest[pack_idx(dk,dr,dc, n,k,r,c)] += src[pack_idx(sk,sr,sc, n,k,r,c)];
}
}

__global__ void _cuda_copy_strided_tensor (float* dest, const float* src,
size_t ns, size_t nk, size_t nr, size_t nc,
size_t dk, size_t dr, size_t dc,
size_t sk, size_t sr, size_t sc)
{
for(auto i : grid_stride_range(0, ns*nk*nr*nc))
{
size_t n,k,r,c;
unpack_idx(i, nk,nr,nc, n,k,r,c);
dest[pack_idx(dk,dr,dc, n,k,r,c)] = src[pack_idx(sk,sr,sc, n,k,r,c)];
}
}

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{

DLIB_CASSERT(dest.num_samples() == src.num_samples(), "All sources should fit into dest tensor size");
DLIB_CASSERT(dest.k() - dk >= k &&
dest.nr() - dnr >= nr &&
dest.nc() - dnc >= nc, "Not enough space in dest tensor");
DLIB_CASSERT(src.k() - sk >= k &&
src.nr() - snr >= nr &&
src.nc() - snc >= nc, "Not enough space in src tensor");

float* dest_p = dest.device() + dk * static_cast<size_t>(dest.nc() * dest.nr()) \
+ dnr * static_cast<size_t>(dest.nc()) \
+ dnc;

const float* src_p = src.device() + sk * static_cast<size_t>(src.nc() * src.nr()) \
+ snr * static_cast<size_t>(src.nc()) \
+ snc;

if (add_to)
{
launch_kernel(_cuda_copy_strided_tensor_add_to, max_jobs(dest.size()),
dest_p, src_p, dest.num_samples(),
k, nr, nc,
dest.k(), dest.nr(), dest.nc(),
src.k(), src.nr(), src.nc());
}
else
{
launch_kernel(_cuda_copy_strided_tensor, max_jobs(dest.size()),
dest_p, src_p, dest.num_samples(),
k, nr, nc,
dest.k(), dest.nr(), dest.nc(),
src.k(), src.nr(), src.nc());
}
}


// ----------------------------------------------------------------------------------------

__global__ void _cuda_transpose(size_t dsize, size_t dk, size_t dnr, size_t dnc, float* d,
Expand Down
11 changes: 11 additions & 0 deletions dlib/cuda/cuda_dlib.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,17 @@ namespace dlib
size_t count_k
);

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);

// ----------------------------------------------------------------------------------------

void transpose(
Expand Down
18 changes: 18 additions & 0 deletions dlib/cuda/tensor_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,24 @@ namespace dlib { namespace tt
#endif
}

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
)
{
#ifdef DLIB_USE_CUDA
cuda::copy_tensor(add_to, dest, dk, dnr, dnc , src, sk, snr, snc, k, nr, nc);
#else
cpu::copy_tensor(add_to, dest, dk, dnr, dnc, src, sk, snr, snc, k, nr, nc);
#endif
}

// ----------------------------------------------------------------------------------------

void inv::
Expand Down
32 changes: 32 additions & 0 deletions dlib/cuda/tensor_tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -2334,6 +2334,38 @@ namespace dlib { namespace tt
i.e., copies content of each sample from src in to corresponding place of sample at dest.
!*/

// ----------------------------------------------------------------------------------------

void copy_tensor(
bool add_to,
tensor& dest,
size_t dk, size_t dnr, size_t dnc,
const tensor& src,
size_t sk, size_t snr, size_t snc,
size_t k, size_t nr, size_t nc
);
/*!
requires
- dest.num_samples() == src.num_samples()
- dest.k() - dk >= k
- dest.nr() - dnr >= nr
- dest.nc() - dnc >= nc
- src.k() - sk >= k
- src.nr() - snr >= nr
- src.nc() - snc >= nc
- is_same_object(dest,src) == false
- The memory areas of src and dest do not overlap.
ensures
- if (add_to) then
- performs: dest[i, j + dk, r + dnr, c + dnc] += src[i, j + sk, r + snr, c + snc], where j in [0..k],
r in [0..nr] and c in [0..nc]
i.e., adds content of each sample from src in to corresponding place of sample at dest.
- else
- performs: dest[i, j + dk, r + dnr, c + dnc] = src[i, j + sk, r + snr, c +snc], where j in [0..k],
r in [0..nr] and c in [0..nc]
i.e., copies content of each sample from src in to corresponding place of sample at dest.
!*/

// ----------------------------------------------------------------------------------------

void transpose(
Expand Down
125 changes: 125 additions & 0 deletions dlib/dnn/layers.h
Original file line number Diff line number Diff line change
Expand Up @@ -4631,6 +4631,131 @@ namespace dlib
>
using extract = add_layer<extract_<offset,k,nr,nc>, SUBNET>;

// ----------------------------------------------------------------------------------------

template <
long _offset_k,
long _offset_nr,
long _offset_nc,
long _k,
long _nr,
long _nc
>
class slice_
{
static_assert(_offset_k >= 0, "The channel offset must be >= 0.");
static_assert(_offset_nr >= 0, "The row offset must be >= 0.");
static_assert(_offset_nc >= 0, "The column offset must be >= 0.");
static_assert(_k > 0, "The number of channels must be > 0.");
static_assert(_nr > 0, "The number of rows must be > 0.");
static_assert(_nc > 0, "The number of columns must be > 0.");
public:
slice_(
)
{
}

template <typename SUBNET>
void setup (const SUBNET& sub)
{
DLIB_CASSERT((long)sub.get_output().size() >= sub.get_output().num_samples()*(_offset_k+_offset_nr+_offset_nc+_k*_nr*_nc),
"The tensor we are trying to slice from the input tensor is too big to fit into the input tensor.");
}

template <typename SUBNET>
void forward(const SUBNET& sub, resizable_tensor& output)
{
output.set_size(sub.get_output().num_samples(), _k, _nr, _nc);
tt::copy_tensor(false, output, 0, 0, 0, sub.get_output(), _offset_k, _offset_nr, _offset_nc, _k, _nr, _nc);
}

template <typename SUBNET>
void backward(const tensor& gradient_input, SUBNET& sub, tensor& /*params_grad*/)
{
tt::copy_tensor(true, sub.get_gradient_input(), _offset_k, _offset_nr, _offset_nc, gradient_input, 0, 0, 0, _k, _nr, _nc);
}

const tensor& get_layer_params() const { return params; }
tensor& get_layer_params() { return params; }

friend void serialize(const slice_& /*item*/, std::ostream& out)
{
serialize("slice_", out);
serialize(_offset_k, out);
serialize(_offset_nr, out);
serialize(_offset_nc, out);
serialize(_k, out);
serialize(_nr, out);
serialize(_nc, out);
}

friend void deserialize(slice_& /*item*/, std::istream& in)
{
std::string version;
deserialize(version, in);
if (version != "slice_")
throw serialization_error("Unexpected version '"+version+"' found while deserializing dlib::slice_.");

long offset_k;
long offset_nr;
long offset_nc;
long k;
long nr;
long nc;
deserialize(offset_k, in);
deserialize(offset_nr, in);
deserialize(offset_nc, in);
deserialize(k, in);
deserialize(nr, in);
deserialize(nc, in);

if (offset_k != _offset_k) throw serialization_error("Wrong offset_k found while deserializing dlib::slice_");
if (offset_nr != _offset_nr) throw serialization_error("Wrong offset_nr found while deserializing dlib::slice_");
if (offset_nc != _offset_nc) throw serialization_error("Wrong offset_nc found while deserializing dlib::slice_");
if (k != _k) throw serialization_error("Wrong k found while deserializing dlib::slice_");
if (nr != _nr) throw serialization_error("Wrong nr found while deserializing dlib::slice_");
if (nc != _nc) throw serialization_error("Wrong nc found while deserializing dlib::slice_");
}

friend std::ostream& operator<<(std::ostream& out, const slice_& /*item*/)
{
out << "slice\t ("
<< "offset_k="<<_offset_k
<< "offset_nr="<<_offset_nr
<< "offset_nc="<<_offset_nc
<< ", k="<<_k
<< ", nr="<<_nr
<< ", nc="<<_nc
<< ")";
return out;
}

friend void to_xml(const slice_& /*item*/, std::ostream& out)
{
out << "<slice";
out << " offset_k='"<<_offset_k<<"'";
out << " offset_nr='"<<_offset_nr<<"'";
out << " offset_nr='"<<_offset_nc<<"'";
out << " k='"<<_k<<"'";
out << " nr='"<<_nr<<"'";
out << " nc='"<<_nc<<"'";
out << "/>\n";
}
private:
resizable_tensor params; // unused
};

template <
long offset_k,
long offset_nr,
long offset_nc,
long k,
long nr,
long nc,
typename SUBNET
>
using slice = add_layer<slice_<offset_k,offset_nr,offset_nc,k,nr,nc>, SUBNET>;

// ----------------------------------------------------------------------------------------

template <long long row_stride = 2, long long col_stride = 2>
Expand Down
Loading

0 comments on commit e8b6c05

Please sign in to comment.