Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Upsample op in CAFFE #625

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions mace/ops/opencl/image/resize_nearest_neighbor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,24 +24,15 @@ namespace image {
MaceStatus ResizeNearestNeighborKernel::Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
const std::vector<index_t> &dims,
Tensor *output) {
const index_t batch = input->dim(0);
const index_t in_height = input->dim(1);
const index_t in_width = input->dim(2);
const index_t channels = input->dim(3);
index_t out_height = 0;
index_t out_width = 0;
if (dims.size() < 2) {
Tensor::MappingGuard size_mapper(size);
out_height = size->data<int32_t>()[0];
out_width = size->data<int32_t>()[1];
} else {
out_height = dims[0];
out_width = dims[1];
}

const index_t channel_blocks = RoundUpDiv4(channels);
const index_t out_height = in_height*scale_;
const index_t out_width = in_width*scale_;

const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
static_cast<uint32_t>(out_width),
Expand Down
9 changes: 5 additions & 4 deletions mace/ops/opencl/image/resize_nearest_neighbor.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,19 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,

class ResizeNearestNeighborKernel : public OpenCLResizeNearestNeighborKernel {
public:
explicit ResizeNearestNeighborKernel(bool align_corners)
: align_corners_(align_corners) {}
ResizeNearestNeighborKernel(bool align_corners,
const index_t scale)
: align_corners_(align_corners),
scale_(scale) {}

MaceStatus Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
const std::vector<index_t> &dims,
Tensor *output) override;

private:
bool align_corners_;
index_t scale_;
cl::Kernel kernel_;
uint32_t kwg_size_;
std::vector<index_t> input_shape_;
Expand Down
2 changes: 0 additions & 2 deletions mace/ops/opencl/resize_nearest_neighbor.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ class OpenCLResizeNearestNeighborKernel {
virtual MaceStatus Compute(
OpContext *context,
const Tensor *input,
const Tensor *size,
const std::vector<index_t> &dims,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeNearestNeighborKernel);
};
Expand Down
35 changes: 18 additions & 17 deletions mace/ops/resize_nearest_neighbor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,28 +77,27 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {
public:
explicit ResizeNearestNeighborOp(OpConstructContext *context)
: Operation(context),
align_corners_(Operation::GetOptionalArg<bool>("align_corners",
false)) {}
align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
size_(Operation::GetRepeatedArgs<index_t>("size", {-1})) {}

MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
const Tensor *input = this->Input(0);
const Tensor *size = this->Input(1);
Tensor::MappingGuard size_mapper(size);
Tensor *output = this->Output(0);

MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1,
"input must be 4-dimensional and size must be 1-dimensional. ",
input->dim_size(), size->dim_size());
MACE_CHECK(input->dim_size() == 4,
"input must be 4-dimensional. ",
input->dim_size());

const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t in_height = input->dim(2);
const index_t in_width = input->dim(3);

const index_t out_height = size->data<int32_t>()[0];
const index_t out_width = size->data<int32_t>()[1];
MACE_CHECK(out_height > 0 && out_width > 0, out_height, out_width);
index_t scale = size_[0];
MACE_CHECK(scale > 0);
const index_t out_height = in_height*scale;
const index_t out_width = in_width*scale;
std::vector<index_t> out_shape{batch, channels, out_height, out_width};
MACE_RETURN_IF_ERROR(output->Resize(out_shape));
Tensor::MappingGuard input_mapper(input);
Expand Down Expand Up @@ -138,36 +137,38 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {

private:
bool align_corners_;
std::vector<index_t> size_;
};

#ifdef MACE_ENABLE_OPENCL
template<>
class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {
public:
explicit ResizeNearestNeighborOp(OpConstructContext *context)
: Operation(context), dim_(Operation::GetRepeatedArgs<index_t>("dim")) {
: Operation(context) {
bool align_corners = Operation::GetOptionalArg<bool>(
"align_corners", false);
std::vector<index_t> size = Operation::GetRepeatedArgs<index_t>(
"size", {-1});
MACE_CHECK(size.size() == 1);
if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
kernel_ = make_unique<opencl::image::ResizeNearestNeighborKernel>(
align_corners);
align_corners, size[0]);
} else {
MACE_NOT_IMPLEMENTED;
}
}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(0);
const Tensor *size = this->Input(1);
Tensor *output = this->Output(0);
MACE_CHECK(input->dim_size() == 4 && size->dim_size() == 1,
MACE_CHECK(input->dim_size() == 4,
"input must be 4-dimensional and size must be 1-dimensional.",
input->dim_size(), size->dim_size());
input->dim_size());

return kernel_->Compute(context, input, size, dim_, output);
return kernel_->Compute(context, input, output);
}

private:
std::vector<index_t> dim_;
std::unique_ptr<OpenCLResizeNearestNeighborKernel> kernel_;
};
#endif // MACE_ENABLE_OPENCL
Expand Down
9 changes: 7 additions & 2 deletions third_party/caffe/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ ENV CLONE_TAG=1.0
# https://github.com/pypa/pip/issues/5599
RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
python -m pip install --upgrade pip && \
cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. && \
mkdir build && cd build && \
cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd ..

COPY upsample.patch .
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late review.
Thanks for your wonderful code, but as you know, we can not modify the caffe code and maintain a nonstandard version, perhaps we can make this pull request stay here and help the others, but it can not be merged.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about keeping the original Dockerfile as it was and create a Dockerfile-upsample that contains the patched version?

Copy link
Collaborator

@lu229 lu229 Apr 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gasgallo I read the code and had a question, how about developping a ResizeNearestNeighbor op in caffe like in tensorflow? or developping a UpSample op like in ONNX, in that cases, there are no need to change the mace's code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu229 can you elaborate? I'm not sure I understand what you mean

Copy link
Collaborator

@lu229 lu229 Apr 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gasgallo MACE support the UpSample operator for ONNX, and support ResizeNearestNeighbor operator for Tensorflow, What I means is that, If you simulated UpSample op in CAFFE like in the ONNX, or simulated ResizeNearestNeighbor op in CAFFE likein the Tensorflow, there is no need to modify MACE's c++ code(only need to modify the python code for convert). I think perhaps this is a better selection?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gasgallo Perhaps I did not understand your problem, why dose you need calling shape_inference before convert_ops? The shape inferences in ONNX and caffe and tensorflow are different, I think if you add the op in caffe, perhaps you need add it as the same the other caffe operators?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu229 because current caffe upsample layer is like onnx one, that provides a scale factor, therefore we need to know upsample input size to calculate its output size that is required by MACE resizenearestneighbor op.

The other option is that caffe upsample layer directly provides output shape, like happens in tf.image.resize_nearest_neighbor.

I was wondering if the first solution would work? If not, I will consider the second option.

Copy link
Collaborator

@lu229 lu229 Apr 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gasgallo Perhaps I still did not understand your problem, but I can try to reply. Yes you can refer to the infer_shape_conv_pool_shape function in tools/python/transform/shape_inference.py, which is used to infer the output shape of conv operator, when you infer the Upsample's output shape, you can get the input shape by the _output_shape_cache variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu229 sorry for confusion, I'll write more in detail right now.

My CAFFE upsample op is defined in a similar way as ONNX upsample op, that takes a scale parameter, so we don't know output shape a priori. For MACE we need to know output shape when calling self.add_tensor.

This works in onnx_converter.py because we can use self._graph_shapes_dict object that contains all pre-computed shapes, and it is created in self.extract_shape_info before calling self.convert_ops.

This is convert_upsample in onnx_converter.py:

    def convert_upsample(self, node):
        op = self.convert_general_op(node)
        del op.input[1:]  # cut all unnecessary inputs (onnx>=1.5)

        output_size = self._graph_shapes_dict[op.output[0]]
        output_size = np.array(output_size[-2:]).astype(np.int32)
        if node.attrs['mode'] == 'nearest':
            op.type = MaceOp.ResizeNearestNeighbor.name
            size_tensor_name = op.name + ":size"
            self.add_tensor(size_tensor_name, output_size.shape,
                            mace_pb2.DT_INT32, output_size)
            op.input.append(size_tensor_name)
        else:
            op.type = MaceOp.ResizeBilinear.name
            size_arg = op.arg.add()
            size_arg.name = MaceKeyword.mace_resize_size_str
            size_arg.ints.extend(output_size.tolist())

        align_corners_arg = op.arg.add()
        align_corners_arg.name = MaceKeyword.mace_align_corners_str
        align_corners_arg.i = node.attrs.get('align_corners', 0)

And this is run() in onnx_converter.py:

    def run(self):
        graph_def = self._onnx_model.graph
        self.extract_shape_info(graph_def)
        self.convert_tensors(graph_def)
        self.convert_ops(graph_def)
        return self._mace_net_def

In caffe_converter.py instead, shape_inferer.run() is called after self.convert_ops, therefore all output shapes are not available during ops conversion, so I cannot compute output shape of upsample op.

This is run() in caffe_converter.py:

    def run(self):
        self.convert_ops()
        shape_inferer = shape_inference.ShapeInference(
            self._mace_net_def,
            self._option.input_nodes.values())
        shape_inferer.run()
        self.replace_output_tensor_name()
        return self._mace_net_def

And shape_inferer.run() cannot be called before self.convert_ops because it requires self._mace_net_def to be already completely defined (that happens in self.convert_ops).

@lu229 Let me know if this makes my point clear.

Copy link
Collaborator

@lu229 lu229 Apr 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gasgallo Thanks for your detailed description, now I understand your problem, yes you need to define the upsample as same as the other Caffe operators, for the shape inference in Caffe and ONNX is different.


RUN git apply upsample.patch

RUN mkdir build && cd build && \
cmake -DCPU_ONLY=1 .. && \
make -j"$(nproc)"

Expand Down
5 changes: 5 additions & 0 deletions third_party/caffe/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ message LayerParameter {
optional TanHParameter tanh_param = 127;
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional UpsampleParameter upsample_param = 149;
optional VideoDataParameter video_data_param = 207;
optional WindowDataParameter window_data_param = 129;
optional ShuffleChannelParameter shuffle_channel_param = 164;
Expand Down Expand Up @@ -1939,3 +1940,7 @@ message ShuffleChannelParameter {
message L2NormalizationParameter {
optional int32 axis = 1 [default = 1];
}

message UpsampleParameter {
optional int32 scale = 1 [default = 1];
}
Loading