From 93328b896d35ac3b0ffc2be6e7078f07e5172909 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 17:43:43 -0700 Subject: [PATCH 01/42] WIP --- csrc/python_frontend/python_bindings.cpp | 129 ++++++++--------------- 1 file changed, 46 insertions(+), 83 deletions(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 79e7047a54f..c09ff5811f1 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -2712,92 +2712,55 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("index"), py::return_value_policy::reference); - nvf_ops.def( - "slice", - [](FusionDefinition::Operators& self, - Tensor arg, - const std::vector& start_indices, - const std::vector& end_indices, - // NOTE: Tried to use std::reference_wrapper to a vector and during - // testing, I was not getting the proper value back. It was like - // like the code was referencing the strides vector that holds the - // default value. - std::optional> opt_strides = - std::nullopt) -> Tensor { - FUSER_PERF_SCOPE("Operators.slice"); - NVF_CHECK( - self.validUse(), "Attempting to add to a completed definition!"); +template +Tensor slice_fn( + FusionDefinition::Operators& self, + Tensor arg, + ShapeType start, + ShapeType end, + std::optional opt_stride) { + NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); - std::vector strides; - if (opt_strides.has_value()) { - NVF_CHECK( - start_indices.size() == opt_strides.value().size(), - "Slice start_indices and strides don't match! Start Indices: ", - start_indices.size(), - " Strides: ", - opt_strides.value().size()); - strides.assign( - opt_strides.value().begin(), opt_strides.value().end()); - } else { - strides.resize(start_indices.size(), 1); - } + FusionDefinition* fd = self.fusion_definition; + Vector new_start = ShapeAsVector(start, *fd); + Vector new_end = ShapeAsVector(end, *fd); + Vector new_stride; - NVF_CHECK( - arg.dims == start_indices.size(), - "Number of tensor dimensions does not match slice dimensions! Tensor-dims: ", - arg.dims, - " Slice-dims: ", - start_indices.size()); - NVF_CHECK( - start_indices.size() == end_indices.size(), - "Slice indexing attribute dimensions don't match! Start Indices: ", - start_indices.size(), - " End Indices: ", - end_indices.size(), - " Strides: ", - strides.size()); - for (const auto i : c10::irange(arg.dims)) { - auto start_idx = start_indices[i]; - auto end_idx = end_indices[i]; - auto stride = strides[i]; - NVF_CHECK( - start_idx >= 0, - "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", - start_indices, - " End Indices: ", - end_indices, - " Strides: ", - strides); - NVF_CHECK( - end_idx >= start_idx, - "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", - start_indices, - " End Indices: ", - end_indices, - " Strides: ", - strides); - NVF_CHECK( - stride == 1, - "nvFuser Limitation: All slice operation strides must be of size 1. Start Indices: ", - start_indices, - " End Indices: ", - end_indices, - " Strides: ", - strides); - } - FusionDefinition* fd = self.fusion_definition; - Tensor output = fd->defineTensor(arg.dims); - fd->defineRecord(new SliceOpRecord( - {fd->recordingState(arg())}, - {fd->recordingState(output())}, - start_indices, - end_indices, - strides)); - return output; - }, + if (opt_strides.has_value()) { + new_stride = ShapeAsVector(opt_stride.value(), *fd); + } else { + // set stride 1; + } + + Tensor output = fd->defineTensor(new_shape.size); + fd->defineRecord(new ReshapeOpRecord( + {fd->recordingState(arg()), fd->recordingState(new_shape())}, + {fd->recordingState(output())})); + return output; +} + + nvf_ops.def( + "slice", + slice_fn, + py::arg("arg"), + py::arg("start"), + py::arg("end"), + py::arg("strides") = py::none(), + py::return_value_policy::reference); + nvf_ops.def( + "slice", + slice_fn, + py::arg("arg"), + py::arg("start"), + py::arg("end"), + py::arg("strides") = py::none(), + py::return_value_policy::reference); + nvf_ops.def( + "slice", + slice_fn, py::arg("arg"), - py::arg("start_indices"), - py::arg("end_indices"), + py::arg("start"), + py::arg("end"), py::arg("strides") = py::none(), py::return_value_policy::reference); nvf_ops.def( From 77e04da02d370ebdbfa77f0f2c379d25a5870f6a Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 19:48:13 -0700 Subject: [PATCH 02/42] WIP --- csrc/python_frontend/fusion_record.h | 269 +++++++++++++---------- csrc/python_frontend/python_bindings.cpp | 41 +++- 2 files changed, 187 insertions(+), 123 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 37e9ace7ac7..663385d686e 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -366,6 +366,39 @@ struct OpRecord : RecordFunctor { std::function fusion_op_; }; +struct SliceOpRecord : RecordFunctor { + SliceOpRecord(std::vector _args, std::vector _outputs) + : RecordFunctor( + std::move(_args), + std::move(_outputs), + "ops.slice", + serde::RecordType::SliceOp) { + arg_names_[1] = "start"; + arg_names_[2] = "end"; + arg_names_[3] = "stride"; + } + ~SliceOpRecord() override = default; + RecordFunctor* clone() final { + return new SliceOpRecord(*this); + } + + void operator()(FusionState& fd) final { + TensorView* arg = fd.getFusionState(args_.at(0).index)->as(); + const std::vector& start = + fd.getFusionStateVector(args_.at(1).index); + const std::vector& end = + fd.getFusionStateVector(args_.at(2).index); + const std::vector& stride = + fd.getFusionStateVector(args_.at(3).index); + std::vector vec_slice; + for (const auto idx : c10::irange(arg->nDims())) { + vec_slice.emplace(start[idx], end[idx], stride[idx]); + } + auto output = slice(arg, vec_slice); + fd.setFusionState(outputs_.at(0).index, output); + } +}; + struct ReshapeOpRecord : RecordFunctor { ReshapeOpRecord(std::vector _args, std::vector _outputs) : RecordFunctor( @@ -1969,124 +2002,124 @@ struct ScalarRecord : RecordFunctor { PrimDataType dtype_; }; -struct SliceOpRecord : RecordFunctor { - SliceOpRecord( - std::vector _args, - std::vector _outputs, - std::vector start_indices, - std::vector end_indices, - std::vector strides) - : RecordFunctor( - std::move(_args), - std::move(_outputs), - "ops.slice", - serde::RecordType::SliceOp), - start_indices_(std::move(start_indices)), - end_indices_(std::move(end_indices)), - strides_(std::move(strides)) {} - ~SliceOpRecord() override = default; - RecordFunctor* clone() final { - return new SliceOpRecord(*this); - } - - //! Child specific hash function in lower 32 bits. - //! | 31 -------- 20 | 19 -------- 8 | 7 ------ 0 | - //! | start_indices | end_indices | strides | - size_t hash() const final { - auto result = RecordFunctor::hash(); - size_t start_idx_hash = 0; - for (auto i : start_indices_) { - start_idx_hash ^= static_cast(i); - } - size_t end_idx_hash = 0; - for (auto i : end_indices_) { - end_idx_hash ^= static_cast(i); - } - size_t stride_hash = 0; - for (auto i : strides_) { - stride_hash ^= static_cast(i); - } - - result |= (start_idx_hash & 0xfff) << 20; - result |= (end_idx_hash & 0xfff) << 8; - return result | (stride_hash & 0xff); - } - - bool operator==(const RecordFunctor& other) const final { - auto result = false; - if (auto child_ptr = dynamic_cast(&other)) { - result = RecordFunctor::operator==(other) && - (start_indices_ == child_ptr->start_indices_) && - (end_indices_ == child_ptr->end_indices_) && - (strides_ == child_ptr->strides_); - } - return result; - } - - void operator()(FusionState& fd) final { - auto arg = fd.getFusionState(args_.at(0).index)->as(); - TensorView* output = slice(arg, start_indices_, end_indices_, strides_); - fd.setFusionState(outputs_.at(0).index, output); - } - - void print(std::ostream& os, bool close_function = true) const final { - RecordFunctor::print(os, false); - os << ", start_indices=["; - bool first_arg = true; - for (auto idx : start_indices_) { - if (first_arg) { - first_arg = false; - } else { - os << ", "; - } - os << idx; - } - os << "], end_indices=["; - first_arg = true; - for (auto idx : end_indices_) { - if (first_arg) { - first_arg = false; - } else { - os << ", "; - } - os << idx; - } - os << "], strides=["; - first_arg = true; - for (auto stride : strides_) { - if (first_arg) { - first_arg = false; - } else { - os << ", "; - } - os << stride; - } - os << "]"; - if (close_function) { - os << ")"; - } - } - - std::pair> recordData( - flatbuffers::FlatBufferBuilder& builder) const final { - return { - serde::RecordData::Slice, - serde::CreateSliceDirect( - builder, &start_indices_, &end_indices_, &strides_) - .Union()}; - } - - private: - //! A slices beginning index for each dimension - //! Values must be greater-than or equal to 0 - std::vector start_indices_; - //! A slices end index for each dimension (excluded from the slice) - //! Values are greater than or equal to the start index for a dimension - std::vector end_indices_; - //! For a dim, the step between start and end. - //! NOTE: Strides are currently limited to steps of 1 - std::vector strides_; -}; +// struct SliceOpRecord : RecordFunctor { +// SliceOpRecord( +// std::vector _args, +// std::vector _outputs, +// std::vector start_indices, +// std::vector end_indices, +// std::vector strides) +// : RecordFunctor( +// std::move(_args), +// std::move(_outputs), +// "ops.slice", +// serde::RecordType::SliceOp), +// start_indices_(std::move(start_indices)), +// end_indices_(std::move(end_indices)), +// strides_(std::move(strides)) {} +// ~SliceOpRecord() override = default; +// RecordFunctor* clone() final { +// return new SliceOpRecord(*this); +// } +// +// //! Child specific hash function in lower 32 bits. +// //! | 31 -------- 20 | 19 -------- 8 | 7 ------ 0 | +// //! | start_indices | end_indices | strides | +// size_t hash() const final { +// auto result = RecordFunctor::hash(); +// size_t start_idx_hash = 0; +// for (auto i : start_indices_) { +// start_idx_hash ^= static_cast(i); +// } +// size_t end_idx_hash = 0; +// for (auto i : end_indices_) { +// end_idx_hash ^= static_cast(i); +// } +// size_t stride_hash = 0; +// for (auto i : strides_) { +// stride_hash ^= static_cast(i); +// } +// +// result |= (start_idx_hash & 0xfff) << 20; +// result |= (end_idx_hash & 0xfff) << 8; +// return result | (stride_hash & 0xff); +// } +// +// bool operator==(const RecordFunctor& other) const final { +// auto result = false; +// if (auto child_ptr = dynamic_cast(&other)) { +// result = RecordFunctor::operator==(other) && +// (start_indices_ == child_ptr->start_indices_) && +// (end_indices_ == child_ptr->end_indices_) && +// (strides_ == child_ptr->strides_); +// } +// return result; +// } +// +// void operator()(FusionState& fd) final { +// auto arg = fd.getFusionState(args_.at(0).index)->as(); +// TensorView* output = slice(arg, start_indices_, end_indices_, strides_); +// fd.setFusionState(outputs_.at(0).index, output); +// } +// +// void print(std::ostream& os, bool close_function = true) const final { +// RecordFunctor::print(os, false); +// os << ", start_indices=["; +// bool first_arg = true; +// for (auto idx : start_indices_) { +// if (first_arg) { +// first_arg = false; +// } else { +// os << ", "; +// } +// os << idx; +// } +// os << "], end_indices=["; +// first_arg = true; +// for (auto idx : end_indices_) { +// if (first_arg) { +// first_arg = false; +// } else { +// os << ", "; +// } +// os << idx; +// } +// os << "], strides=["; +// first_arg = true; +// for (auto stride : strides_) { +// if (first_arg) { +// first_arg = false; +// } else { +// os << ", "; +// } +// os << stride; +// } +// os << "]"; +// if (close_function) { +// os << ")"; +// } +// } +// +// std::pair> recordData( +// flatbuffers::FlatBufferBuilder& builder) const final { +// return { +// serde::RecordData::Slice, +// serde::CreateSliceDirect( +// builder, &start_indices_, &end_indices_, &strides_) +// .Union()}; +// } +// +// private: +// //! A slices beginning index for each dimension +// //! Values must be greater-than or equal to 0 +// std::vector start_indices_; +// //! A slices end index for each dimension (excluded from the slice) +// //! Values are greater than or equal to the start index for a dimension +// std::vector end_indices_; +// //! For a dim, the step between start and end. +// //! NOTE: Strides are currently limited to steps of 1 +// std::vector strides_; +//}; //! Specialized Record Functor for recording FusionDefinition Start. //! There should only ever be one instance of this Record in the diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index c09ff5811f1..a61cc676e73 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -2728,14 +2728,45 @@ Tensor slice_fn( if (opt_strides.has_value()) { new_stride = ShapeAsVector(opt_stride.value(), *fd); + NVF_CHECK( + new_start.size == new_stride.size, + "Slice start_indices and strides don't match! Start Indices: ", + new_start.size, + " Strides: ", + new_stride.size); } else { - // set stride 1; + // TODO: should I kept it as none instead? + // set stride 1 with the proper size; + std::vector stride_vec; + Scalar out = fd->defineScalar(); + fd->defineRecord(new ScalarRecord( + {fd->recordingState(out())}, + 1, + DataType::Int, + /*inline_def=*/true)); + stride_vec.resize(new_start.size, out); + new_stride = define_vector_base_fn(*fd, stride_vec, true); } - Tensor output = fd->defineTensor(new_shape.size); - fd->defineRecord(new ReshapeOpRecord( - {fd->recordingState(arg()), fd->recordingState(new_shape())}, - {fd->recordingState(output())})); + NVF_CHECK( + arg.dims == new_start.size, + "Number of tensor dimensions does not match slice dimensions! Tensor-dims: ", + arg.dims, + " Slice-dims: ", + new_start.size); + NVF_CHECK( + new_start.size == new_end.size, + "Slice indexing attribute dimensions don't match! Start Indices: ", + new_start.size, + " End Indices: ", + new_end.size, + " Strides: ", + strides.size()); + + Tensor output = fd->defineTensor(arg.dims); + fd->defineRecord(new SliceOpRecord( + {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(new_stride())}, + {fd->recordingState(output())}, return output; } From 0dda7a96d0988cffe427c4e25d0104f2219f9927 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 19:52:40 -0700 Subject: [PATCH 03/42] WIP --- csrc/python_frontend/fusion_record.h | 51 ++-- csrc/python_frontend/python_bindings.cpp | 301 +++++++++++------------ 2 files changed, 168 insertions(+), 184 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 663385d686e..7ebda6e45f0 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -384,10 +384,8 @@ struct SliceOpRecord : RecordFunctor { void operator()(FusionState& fd) final { TensorView* arg = fd.getFusionState(args_.at(0).index)->as(); - const std::vector& start = - fd.getFusionStateVector(args_.at(1).index); - const std::vector& end = - fd.getFusionStateVector(args_.at(2).index); + const std::vector& start = fd.getFusionStateVector(args_.at(1).index); + const std::vector& end = fd.getFusionStateVector(args_.at(2).index); const std::vector& stride = fd.getFusionStateVector(args_.at(3).index); std::vector vec_slice; @@ -1591,32 +1589,21 @@ struct ReductionOpRecord : RecordFunctor { result = result && (*fusion_op_.template target< - TensorView* (*)(TensorView*, - const std::vector&, - bool, - DataType)>() == + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() == *child_ptr->fusion_op_.template target< - TensorView* (*)(TensorView*, - const std::vector&, - bool, - DataType)>()); + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>()); if (isDebugDumpEnabled(DebugDumpOption::PythonFrontendDebug)) { - debug() << " Target Ptr [self: 0x" << std::hex - << (size_t)*fusion_op_.template target< + debug() + << " Target Ptr [self: 0x" << std::hex + << (size_t)*fusion_op_.template target< - TensorView* (*)(TensorView*, - const std::vector&, - bool, - DataType)>() - << "] [other: 0x" << std::hex - << (size_t)*child_ptr->fusion_op_.template target< + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() + << "] [other: 0x" << std::hex + << (size_t)*child_ptr->fusion_op_.template target< - TensorView* (*)(TensorView*, - const std::vector&, - bool, - DataType)>() - << "]\n"; + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() + << "]\n"; } result = result && (keep_dim_ == child_ptr->keep_dim_); result = result && (dtype_ == child_ptr->dtype_); @@ -2021,7 +2008,7 @@ struct ScalarRecord : RecordFunctor { // RecordFunctor* clone() final { // return new SliceOpRecord(*this); // } -// +// // //! Child specific hash function in lower 32 bits. // //! | 31 -------- 20 | 19 -------- 8 | 7 ------ 0 | // //! | start_indices | end_indices | strides | @@ -2039,12 +2026,12 @@ struct ScalarRecord : RecordFunctor { // for (auto i : strides_) { // stride_hash ^= static_cast(i); // } -// +// // result |= (start_idx_hash & 0xfff) << 20; // result |= (end_idx_hash & 0xfff) << 8; // return result | (stride_hash & 0xff); // } -// +// // bool operator==(const RecordFunctor& other) const final { // auto result = false; // if (auto child_ptr = dynamic_cast(&other)) { @@ -2055,13 +2042,13 @@ struct ScalarRecord : RecordFunctor { // } // return result; // } -// +// // void operator()(FusionState& fd) final { // auto arg = fd.getFusionState(args_.at(0).index)->as(); // TensorView* output = slice(arg, start_indices_, end_indices_, strides_); // fd.setFusionState(outputs_.at(0).index, output); // } -// +// // void print(std::ostream& os, bool close_function = true) const final { // RecordFunctor::print(os, false); // os << ", start_indices=["; @@ -2099,7 +2086,7 @@ struct ScalarRecord : RecordFunctor { // os << ")"; // } // } -// +// // std::pair> recordData( // flatbuffers::FlatBufferBuilder& builder) const final { // return { @@ -2108,7 +2095,7 @@ struct ScalarRecord : RecordFunctor { // builder, &start_indices_, &end_indices_, &strides_) // .Union()}; // } -// +// // private: // //! A slices beginning index for each dimension // //! Values must be greater-than or equal to 0 diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index a61cc676e73..dc0022f0c82 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -235,6 +235,64 @@ struct DimInfo { } }; +template +Tensor slice_fn( + FusionDefinition::Operators& self, + Tensor arg, + ShapeType start, + ShapeType end, + std::optional opt_stride) { + NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); + + FusionDefinition* fd = self.fusion_definition; + Vector new_start = ShapeAsVector(start, *fd); + Vector new_end = ShapeAsVector(end, *fd); + Vector new_stride; + + if (opt_strides.has_value()) { + new_stride = ShapeAsVector(opt_stride.value(), *fd); + NVF_CHECK( + new_start.size == new_stride.size, + "Slice start_indices and strides don't match! Start Indices: ", + new_start.size, + " Strides: ", + new_stride.size); + } else { + // TODO: should I kept it as none instead? + // set stride 1 with the proper size; + std::vector stride_vec; + Scalar out = fd->defineScalar(); + fd->defineRecord(new ScalarRecord( + {fd->recordingState(out())}, + 1, + DataType::Int, + /*inline_def=*/true)); + stride_vec.resize(new_start.size, out); + new_stride = define_vector_base_fn(*fd, stride_vec, true); + } + + NVF_CHECK( + arg.dims == new_start.size, + "Number of tensor dimensions does not match slice dimensions! Tensor-dims: ", + arg.dims, + " Slice-dims: ", + new_start.size); + NVF_CHECK( + new_start.size == new_end.size, + "Slice indexing attribute dimensions don't match! Start Indices: ", + new_start.size, + " End Indices: ", + new_end.size, + " Strides: ", + strides.size()); + + Tensor output = fd->defineTensor(arg.dims); + fd->defineRecord(new SliceOpRecord( + {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(new_stride())}, + {fd->recordingState(output())}); + return output; +} + } // namespace std::vector> computeContiguity( @@ -2119,100 +2177,97 @@ void initNvFuserPythonBindings(PyObject* module) { NVFUSER_PYTHON_BINDING_TERNARY_WITH_ALPHA_OP("addcmul", addcmul) #undef NVFUSER_PYTHON_BINDING_TERNARY_WITH_ALPHA_OP -#define NVFUSER_PYTHON_BINDING_REDUCTION_OP(op_str, op_name, record_type) \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = 0; \ - std::vector dims(arg.dims); \ - std::iota(dims.begin(), dims.end(), 0); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast&, \ - bool, \ - DataType)>(op_name), \ - dims, \ - false, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dtype") = DataType::Null, \ - py::return_value_policy::reference); \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - int dim, \ - bool keepdim, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = keepdim ? arg.dims : (arg.dims - 1); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast&, \ - bool, \ - DataType)>(op_name), \ - {dim}, \ - keepdim, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dim"), \ - py::arg("keepdim") = false, \ - py::arg("dtype") = DataType::Null, \ - py::return_value_policy::reference); \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - const std::vector& dims, \ - bool keepdim, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = keepdim ? arg.dims : (arg.dims - dims.size()); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast&, \ - bool, \ - DataType)>(op_name), \ - dims, \ - keepdim, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dims"), \ - py::arg("keepdim") = false, \ - py::arg("dtype") = DataType::Null, \ +#define NVFUSER_PYTHON_BINDING_REDUCTION_OP(op_str, op_name, record_type) \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = 0; \ + std::vector dims(arg.dims); \ + std::iota(dims.begin(), dims.end(), 0); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast< \ + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ + op_name), \ + dims, \ + false, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dtype") = DataType::Null, \ + py::return_value_policy::reference); \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + int dim, \ + bool keepdim, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = keepdim ? arg.dims : (arg.dims - 1); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast< \ + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ + op_name), \ + {dim}, \ + keepdim, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dim"), \ + py::arg("keepdim") = false, \ + py::arg("dtype") = DataType::Null, \ + py::return_value_policy::reference); \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + const std::vector& dims, \ + bool keepdim, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = keepdim ? arg.dims : (arg.dims - dims.size()); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast< \ + TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ + op_name), \ + dims, \ + keepdim, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dims"), \ + py::arg("keepdim") = false, \ + py::arg("dtype") = DataType::Null, \ py::return_value_policy::reference); NVFUSER_PYTHON_BINDING_REDUCTION_OP( @@ -2712,64 +2767,6 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("index"), py::return_value_policy::reference); -template -Tensor slice_fn( - FusionDefinition::Operators& self, - Tensor arg, - ShapeType start, - ShapeType end, - std::optional opt_stride) { - NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); - - FusionDefinition* fd = self.fusion_definition; - Vector new_start = ShapeAsVector(start, *fd); - Vector new_end = ShapeAsVector(end, *fd); - Vector new_stride; - - if (opt_strides.has_value()) { - new_stride = ShapeAsVector(opt_stride.value(), *fd); - NVF_CHECK( - new_start.size == new_stride.size, - "Slice start_indices and strides don't match! Start Indices: ", - new_start.size, - " Strides: ", - new_stride.size); - } else { - // TODO: should I kept it as none instead? - // set stride 1 with the proper size; - std::vector stride_vec; - Scalar out = fd->defineScalar(); - fd->defineRecord(new ScalarRecord( - {fd->recordingState(out())}, - 1, - DataType::Int, - /*inline_def=*/true)); - stride_vec.resize(new_start.size, out); - new_stride = define_vector_base_fn(*fd, stride_vec, true); - } - - NVF_CHECK( - arg.dims == new_start.size, - "Number of tensor dimensions does not match slice dimensions! Tensor-dims: ", - arg.dims, - " Slice-dims: ", - new_start.size); - NVF_CHECK( - new_start.size == new_end.size, - "Slice indexing attribute dimensions don't match! Start Indices: ", - new_start.size, - " End Indices: ", - new_end.size, - " Strides: ", - strides.size()); - - Tensor output = fd->defineTensor(arg.dims); - fd->defineRecord(new SliceOpRecord( - {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(new_stride())}, - {fd->recordingState(output())}, - return output; -} - nvf_ops.def( "slice", slice_fn, From 282974342c4de21df3010ac8a2a4f221f1a2cde3 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 20:07:13 -0700 Subject: [PATCH 04/42] fixing build --- csrc/python_frontend/fusion_record.h | 2 +- csrc/python_frontend/python_bindings.cpp | 18 +++++++++--------- csrc/serde/fusion_record.cpp | 6 +----- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 7ebda6e45f0..fd3c44aa7b1 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -390,7 +390,7 @@ struct SliceOpRecord : RecordFunctor { fd.getFusionStateVector(args_.at(3).index); std::vector vec_slice; for (const auto idx : c10::irange(arg->nDims())) { - vec_slice.emplace(start[idx], end[idx], stride[idx]); + vec_slice.emplace_back(start[idx], end[idx], stride[idx]); } auto output = slice(arg, vec_slice); fd.setFusionState(outputs_.at(0).index, output); diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index dc0022f0c82..217e0ee780d 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -247,16 +247,17 @@ Tensor slice_fn( FusionDefinition* fd = self.fusion_definition; Vector new_start = ShapeAsVector(start, *fd); Vector new_end = ShapeAsVector(end, *fd); - Vector new_stride; + size_t stride_index = 0; - if (opt_strides.has_value()) { - new_stride = ShapeAsVector(opt_stride.value(), *fd); + if (opt_stride.has_value()) { + Vector new_stride = ShapeAsVector(opt_stride.value(), *fd); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", new_start.size, " Strides: ", new_stride.size); + stride_index = new_stride(); } else { // TODO: should I kept it as none instead? // set stride 1 with the proper size; @@ -268,7 +269,8 @@ Tensor slice_fn( DataType::Int, /*inline_def=*/true)); stride_vec.resize(new_start.size, out); - new_stride = define_vector_base_fn(*fd, stride_vec, true); + Vector new_stride = define_vector_base_fn(*fd, stride_vec, true); + stride_index = new_stride(); } NVF_CHECK( @@ -282,14 +284,12 @@ Tensor slice_fn( "Slice indexing attribute dimensions don't match! Start Indices: ", new_start.size, " End Indices: ", - new_end.size, - " Strides: ", - strides.size()); + new_end.size); Tensor output = fd->defineTensor(arg.dims); fd->defineRecord(new SliceOpRecord( - {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(new_stride())}, - {fd->recordingState(output())}); + {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(stride_index)}, + {fd->recordingState(output())})); return output; } diff --git a/csrc/serde/fusion_record.cpp b/csrc/serde/fusion_record.cpp index f55cdedbb26..0f71c61326e 100644 --- a/csrc/serde/fusion_record.cpp +++ b/csrc/serde/fusion_record.cpp @@ -525,13 +525,9 @@ void RecordFunctorFactory::registerAllParsers() { registerParser(RecordType::ReshapeOp, deserializeReshapeRecord); auto deserializeSliceRecord = [](const RecordFunctor* buffer) { - auto data = buffer->data_as_Slice(); return new python_frontend::SliceOpRecord( parseStateArgs(buffer->args()), - parseStateArgs(buffer->outputs()), - parseVector(data->start_indices()), - parseVector(data->end_indices()), - parseVector(data->strides())); + parseStateArgs(buffer->outputs())); }; registerParser(RecordType::SliceOp, deserializeSliceRecord); From 64cfc8cf08dc460307d1f142a39d37a7297035eb Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 20:11:37 -0700 Subject: [PATCH 05/42] fixing build --- csrc/python_frontend/fusion_record.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index fd3c44aa7b1..7c4de0ffbb0 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -390,7 +390,7 @@ struct SliceOpRecord : RecordFunctor { fd.getFusionStateVector(args_.at(3).index); std::vector vec_slice; for (const auto idx : c10::irange(arg->nDims())) { - vec_slice.emplace_back(start[idx], end[idx], stride[idx]); + vec_slice.emplace_back({start.at(idx), end.at(idx), stride.at(idx)}); } auto output = slice(arg, vec_slice); fd.setFusionState(outputs_.at(0).index, output); From 227e2b5e599790e40795585e02886a4dc3ab9a4e Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 20:47:24 -0700 Subject: [PATCH 06/42] fixing build --- csrc/python_frontend/fusion_record.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 7c4de0ffbb0..167ab448e98 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -390,7 +390,9 @@ struct SliceOpRecord : RecordFunctor { fd.getFusionStateVector(args_.at(3).index); std::vector vec_slice; for (const auto idx : c10::irange(arg->nDims())) { - vec_slice.emplace_back({start.at(idx), end.at(idx), stride.at(idx)}); + // NOTE: there's an extra move, we can use emplace_back if we go write + // some constructors for Slice. + vec_slice.push_back({start.at(idx), end.at(idx), stride.at(idx)}); } auto output = slice(arg, vec_slice); fd.setFusionState(outputs_.at(0).index, output); From ae3c2d0ef855e0146f01b9f2c5676baed977a52a Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 23:51:47 -0700 Subject: [PATCH 07/42] quick fix on kwargs --- csrc/python_frontend/fusion_record.h | 6 +++--- csrc/python_frontend/python_bindings.cpp | 25 ++++++++++++------------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 167ab448e98..5aabb920ccd 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -373,9 +373,9 @@ struct SliceOpRecord : RecordFunctor { std::move(_outputs), "ops.slice", serde::RecordType::SliceOp) { - arg_names_[1] = "start"; - arg_names_[2] = "end"; - arg_names_[3] = "stride"; + arg_names_[1] = "start_indices"; + arg_names_[2] = "end_indices"; + arg_names_[3] = "strides"; } ~SliceOpRecord() override = default; RecordFunctor* clone() final { diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 217e0ee780d..342ce6b2977 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -2767,28 +2767,29 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("index"), py::return_value_policy::reference); - nvf_ops.def( - "slice", - slice_fn, - py::arg("arg"), - py::arg("start"), - py::arg("end"), - py::arg("strides") = py::none(), - py::return_value_policy::reference); + // TODO: Add a specialization for this?! + // nvf_ops.def( + // "slice", + // slice_fn, + // py::arg("arg"), + // py::arg("start_indices"), + // py::arg("end_indices"), + // py::arg("strides") = py::none(), + // py::return_value_policy::reference); nvf_ops.def( "slice", slice_fn, py::arg("arg"), - py::arg("start"), - py::arg("end"), + py::arg("start_indices"), + py::arg("end_indices"), py::arg("strides") = py::none(), py::return_value_policy::reference); nvf_ops.def( "slice", slice_fn, py::arg("arg"), - py::arg("start"), - py::arg("end"), + py::arg("start_indices"), + py::arg("end_indices"), py::arg("strides") = py::none(), py::return_value_policy::reference); nvf_ops.def( From c42bfe1b4b8766296c462099d7612e4b74e737a2 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 23:53:34 -0700 Subject: [PATCH 08/42] remove option thing --- csrc/python_frontend/python_bindings.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 342ce6b2977..a10d2405177 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -241,7 +241,7 @@ Tensor slice_fn( Tensor arg, ShapeType start, ShapeType end, - std::optional opt_stride) { + ShapeType strides) { NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); FusionDefinition* fd = self.fusion_definition; @@ -249,8 +249,8 @@ Tensor slice_fn( Vector new_end = ShapeAsVector(end, *fd); size_t stride_index = 0; - if (opt_stride.has_value()) { - Vector new_stride = ShapeAsVector(opt_stride.value(), *fd); + if (!strides.empty()) { + Vector new_stride = ShapeAsVector(stride, *fd); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", @@ -2782,7 +2782,7 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("arg"), py::arg("start_indices"), py::arg("end_indices"), - py::arg("strides") = py::none(), + py::arg("strides") = py::list(), py::return_value_policy::reference); nvf_ops.def( "slice", @@ -2790,7 +2790,7 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("arg"), py::arg("start_indices"), py::arg("end_indices"), - py::arg("strides") = py::none(), + py::arg("strides") = py::tuple(), py::return_value_policy::reference); nvf_ops.def( "squeeze", From 266a4bb1cca29c7af04732e6a43110e235179476 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 10 Sep 2024 23:55:26 -0700 Subject: [PATCH 09/42] typo --- csrc/python_frontend/python_bindings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index a10d2405177..a56a7ffb311 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -250,7 +250,7 @@ Tensor slice_fn( size_t stride_index = 0; if (!strides.empty()) { - Vector new_stride = ShapeAsVector(stride, *fd); + Vector new_stride = ShapeAsVector(strides, *fd); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", From 43bfaeca21bbb56942dacf0bb752863d9b2389f7 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 10:19:31 -0700 Subject: [PATCH 10/42] CLANGFORMAT --- csrc/python_frontend/fusion_record.h | 31 ++-- csrc/python_frontend/python_bindings.cpp | 190 ++++++++++++----------- csrc/serde/fusion_record.cpp | 3 +- 3 files changed, 120 insertions(+), 104 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 0621a12dcbb..5a9defe327b 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -1591,21 +1591,32 @@ struct ReductionOpRecord : RecordFunctor { result = result && (*fusion_op_.template target< - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() == + TensorView* (*)(TensorView*, + const std::vector&, + bool, + DataType)>() == *child_ptr->fusion_op_.template target< - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>()); + TensorView* (*)(TensorView*, + const std::vector&, + bool, + DataType)>()); if (isDebugDumpEnabled(DebugDumpOption::PythonFrontendDebug)) { - debug() - << " Target Ptr [self: 0x" << std::hex - << (size_t)*fusion_op_.template target< + debug() << " Target Ptr [self: 0x" << std::hex + << (size_t)*fusion_op_.template target< - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() - << "] [other: 0x" << std::hex - << (size_t)*child_ptr->fusion_op_.template target< + TensorView* (*)(TensorView*, + const std::vector&, + bool, + DataType)>() + << "] [other: 0x" << std::hex + << (size_t)*child_ptr->fusion_op_.template target< - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>() - << "]\n"; + TensorView* (*)(TensorView*, + const std::vector&, + bool, + DataType)>() + << "]\n"; } result = result && (keep_dim_ == child_ptr->keep_dim_); result = result && (dtype_ == child_ptr->dtype_); diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index a56a7ffb311..a7b5b274fea 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -288,7 +288,10 @@ Tensor slice_fn( Tensor output = fd->defineTensor(arg.dims); fd->defineRecord(new SliceOpRecord( - {fd->recordingState(arg()), fd->recordingState(new_start()),fd->recordingState(new_end()),fd->recordingState(stride_index)}, + {fd->recordingState(arg()), + fd->recordingState(new_start()), + fd->recordingState(new_end()), + fd->recordingState(stride_index)}, {fd->recordingState(output())})); return output; } @@ -2177,97 +2180,100 @@ void initNvFuserPythonBindings(PyObject* module) { NVFUSER_PYTHON_BINDING_TERNARY_WITH_ALPHA_OP("addcmul", addcmul) #undef NVFUSER_PYTHON_BINDING_TERNARY_WITH_ALPHA_OP -#define NVFUSER_PYTHON_BINDING_REDUCTION_OP(op_str, op_name, record_type) \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = 0; \ - std::vector dims(arg.dims); \ - std::iota(dims.begin(), dims.end(), 0); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast< \ - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ - op_name), \ - dims, \ - false, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dtype") = DataType::Null, \ - py::return_value_policy::reference); \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - int dim, \ - bool keepdim, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = keepdim ? arg.dims : (arg.dims - 1); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast< \ - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ - op_name), \ - {dim}, \ - keepdim, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dim"), \ - py::arg("keepdim") = false, \ - py::arg("dtype") = DataType::Null, \ - py::return_value_policy::reference); \ - nvf_ops.def( \ - op_str, \ - [](FusionDefinition::Operators& self, \ - Tensor arg, \ - const std::vector& dims, \ - bool keepdim, \ - PrimDataType dtype) -> Tensor { \ - FUSER_PERF_SCOPE("Operators." op_str); \ - NVF_CHECK( \ - self.validUse(), "Attempting to add to a completed definition!"); \ - FusionDefinition* fd = self.fusion_definition; \ - size_t ndims = keepdim ? arg.dims : (arg.dims - dims.size()); \ - Tensor output = fd->defineTensor(ndims); \ - fd->defineRecord(new ReductionOpRecord( \ - {fd->recordingState(arg())}, \ - {fd->recordingState(output())}, \ - ("ops." op_str), \ - record_type, \ - static_cast< \ - TensorView* (*)(TensorView*, const std::vector&, bool, DataType)>( \ - op_name), \ - dims, \ - keepdim, \ - dtype)); \ - return output; \ - }, \ - py::arg("arg"), \ - py::arg("dims"), \ - py::arg("keepdim") = false, \ - py::arg("dtype") = DataType::Null, \ +#define NVFUSER_PYTHON_BINDING_REDUCTION_OP(op_str, op_name, record_type) \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = 0; \ + std::vector dims(arg.dims); \ + std::iota(dims.begin(), dims.end(), 0); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast&, \ + bool, \ + DataType)>(op_name), \ + dims, \ + false, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dtype") = DataType::Null, \ + py::return_value_policy::reference); \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + int dim, \ + bool keepdim, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = keepdim ? arg.dims : (arg.dims - 1); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast&, \ + bool, \ + DataType)>(op_name), \ + {dim}, \ + keepdim, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dim"), \ + py::arg("keepdim") = false, \ + py::arg("dtype") = DataType::Null, \ + py::return_value_policy::reference); \ + nvf_ops.def( \ + op_str, \ + [](FusionDefinition::Operators& self, \ + Tensor arg, \ + const std::vector& dims, \ + bool keepdim, \ + PrimDataType dtype) -> Tensor { \ + FUSER_PERF_SCOPE("Operators." op_str); \ + NVF_CHECK( \ + self.validUse(), "Attempting to add to a completed definition!"); \ + FusionDefinition* fd = self.fusion_definition; \ + size_t ndims = keepdim ? arg.dims : (arg.dims - dims.size()); \ + Tensor output = fd->defineTensor(ndims); \ + fd->defineRecord(new ReductionOpRecord( \ + {fd->recordingState(arg())}, \ + {fd->recordingState(output())}, \ + ("ops." op_str), \ + record_type, \ + static_cast&, \ + bool, \ + DataType)>(op_name), \ + dims, \ + keepdim, \ + dtype)); \ + return output; \ + }, \ + py::arg("arg"), \ + py::arg("dims"), \ + py::arg("keepdim") = false, \ + py::arg("dtype") = DataType::Null, \ py::return_value_policy::reference); NVFUSER_PYTHON_BINDING_REDUCTION_OP( diff --git a/csrc/serde/fusion_record.cpp b/csrc/serde/fusion_record.cpp index 2291d053850..17ce4a152f0 100644 --- a/csrc/serde/fusion_record.cpp +++ b/csrc/serde/fusion_record.cpp @@ -526,8 +526,7 @@ void RecordFunctorFactory::registerAllParsers() { auto deserializeSliceRecord = [](const RecordFunctor* buffer) { return new python_frontend::SliceOpRecord( - parseStateArgs(buffer->args()), - parseStateArgs(buffer->outputs())); + parseStateArgs(buffer->args()), parseStateArgs(buffer->outputs())); }; registerParser(RecordType::SliceOp, deserializeSliceRecord); From 2e1fac544125833d55c15986b664741982c0bbfa Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 10:34:14 -0700 Subject: [PATCH 11/42] fixing it?! --- csrc/python_frontend/fusion_record.h | 119 ----------------------- csrc/python_frontend/python_bindings.cpp | 31 +++--- 2 files changed, 15 insertions(+), 135 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 5a9defe327b..a2bc704943e 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -2002,125 +2002,6 @@ struct ScalarRecord : RecordFunctor { PrimDataType dtype_; }; -// struct SliceOpRecord : RecordFunctor { -// SliceOpRecord( -// std::vector _args, -// std::vector _outputs, -// std::vector start_indices, -// std::vector end_indices, -// std::vector strides) -// : RecordFunctor( -// std::move(_args), -// std::move(_outputs), -// "ops.slice", -// serde::RecordType::SliceOp), -// start_indices_(std::move(start_indices)), -// end_indices_(std::move(end_indices)), -// strides_(std::move(strides)) {} -// ~SliceOpRecord() override = default; -// RecordFunctor* clone() final { -// return new SliceOpRecord(*this); -// } -// -// //! Child specific hash function in lower 32 bits. -// //! | 31 -------- 20 | 19 -------- 8 | 7 ------ 0 | -// //! | start_indices | end_indices | strides | -// size_t hash() const final { -// auto result = RecordFunctor::hash(); -// size_t start_idx_hash = 0; -// for (auto i : start_indices_) { -// start_idx_hash ^= static_cast(i); -// } -// size_t end_idx_hash = 0; -// for (auto i : end_indices_) { -// end_idx_hash ^= static_cast(i); -// } -// size_t stride_hash = 0; -// for (auto i : strides_) { -// stride_hash ^= static_cast(i); -// } -// -// result |= (start_idx_hash & 0xfff) << 20; -// result |= (end_idx_hash & 0xfff) << 8; -// return result | (stride_hash & 0xff); -// } -// -// bool operator==(const RecordFunctor& other) const final { -// auto result = false; -// if (auto child_ptr = dynamic_cast(&other)) { -// result = RecordFunctor::operator==(other) && -// (start_indices_ == child_ptr->start_indices_) && -// (end_indices_ == child_ptr->end_indices_) && -// (strides_ == child_ptr->strides_); -// } -// return result; -// } -// -// void operator()(FusionState& fd) final { -// auto arg = fd.getFusionState(args_.at(0).index)->as(); -// TensorView* output = slice(arg, start_indices_, end_indices_, strides_); -// fd.setFusionState(outputs_.at(0).index, output); -// } -// -// void print(std::ostream& os, bool close_function = true) const final { -// RecordFunctor::print(os, false); -// os << ", start_indices=["; -// bool first_arg = true; -// for (auto idx : start_indices_) { -// if (first_arg) { -// first_arg = false; -// } else { -// os << ", "; -// } -// os << idx; -// } -// os << "], end_indices=["; -// first_arg = true; -// for (auto idx : end_indices_) { -// if (first_arg) { -// first_arg = false; -// } else { -// os << ", "; -// } -// os << idx; -// } -// os << "], strides=["; -// first_arg = true; -// for (auto stride : strides_) { -// if (first_arg) { -// first_arg = false; -// } else { -// os << ", "; -// } -// os << stride; -// } -// os << "]"; -// if (close_function) { -// os << ")"; -// } -// } -// -// std::pair> recordData( -// flatbuffers::FlatBufferBuilder& builder) const final { -// return { -// serde::RecordData::Slice, -// serde::CreateSliceDirect( -// builder, &start_indices_, &end_indices_, &strides_) -// .Union()}; -// } -// -// private: -// //! A slices beginning index for each dimension -// //! Values must be greater-than or equal to 0 -// std::vector start_indices_; -// //! A slices end index for each dimension (excluded from the slice) -// //! Values are greater than or equal to the start index for a dimension -// std::vector end_indices_; -// //! For a dim, the step between start and end. -// //! NOTE: Strides are currently limited to steps of 1 -// std::vector strides_; -//}; - //! Specialized Record Functor for recording FusionDefinition Start. //! There should only ever be one instance of this Record in the //! Fusion Cache. diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index a7b5b274fea..a38613b9336 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -241,7 +241,7 @@ Tensor slice_fn( Tensor arg, ShapeType start, ShapeType end, - ShapeType strides) { + std::optional strides) { NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); FusionDefinition* fd = self.fusion_definition; @@ -249,8 +249,8 @@ Tensor slice_fn( Vector new_end = ShapeAsVector(end, *fd); size_t stride_index = 0; - if (!strides.empty()) { - Vector new_stride = ShapeAsVector(strides, *fd); + if (stride.has_value()) { + Vector new_stride = ShapeAsVector(strides.value(), *fd); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", @@ -269,8 +269,8 @@ Tensor slice_fn( DataType::Int, /*inline_def=*/true)); stride_vec.resize(new_start.size, out); - Vector new_stride = define_vector_base_fn(*fd, stride_vec, true); - stride_index = new_stride(); + Vector default_stride = define_vector_base_fn(*fd, stride_vec, true); + stride_index = default_stride(); } NVF_CHECK( @@ -2773,22 +2773,21 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("index"), py::return_value_policy::reference); - // TODO: Add a specialization for this?! - // nvf_ops.def( - // "slice", - // slice_fn, - // py::arg("arg"), - // py::arg("start_indices"), - // py::arg("end_indices"), - // py::arg("strides") = py::none(), - // py::return_value_policy::reference); + nvf_ops.def( + "slice", + slice_fn, + py::arg("arg"), + py::arg("start_indices"), + py::arg("end_indices"), + py::arg("strides") = py::none(), + py::return_value_policy::reference); nvf_ops.def( "slice", slice_fn, py::arg("arg"), py::arg("start_indices"), py::arg("end_indices"), - py::arg("strides") = py::list(), + py::arg("strides") = py::none(), py::return_value_policy::reference); nvf_ops.def( "slice", @@ -2796,7 +2795,7 @@ void initNvFuserPythonBindings(PyObject* module) { py::arg("arg"), py::arg("start_indices"), py::arg("end_indices"), - py::arg("strides") = py::tuple(), + py::arg("strides") = py::none(), py::return_value_policy::reference); nvf_ops.def( "squeeze", From 6f011c62ef74f6d92e3b95730eb06991d3437c0f Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 10:41:53 -0700 Subject: [PATCH 12/42] typo --- csrc/python_frontend/python_bindings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index a38613b9336..5ba9b50316b 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -249,7 +249,7 @@ Tensor slice_fn( Vector new_end = ShapeAsVector(end, *fd); size_t stride_index = 0; - if (stride.has_value()) { + if (strides.has_value()) { Vector new_stride = ShapeAsVector(strides.value(), *fd); NVF_CHECK( new_start.size == new_stride.size, From ef56b1be0b58c95a8dbbc99e0a54f129adac04c0 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 13:42:17 -0700 Subject: [PATCH 13/42] fixing test/build --- csrc/python_frontend/python_bindings.cpp | 24 ++++++++++-------- tests/python/test_python_frontend.py | 31 ++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 5ba9b50316b..411836a3b7f 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -259,17 +259,21 @@ Tensor slice_fn( new_stride.size); stride_index = new_stride(); } else { - // TODO: should I kept it as none instead? - // set stride 1 with the proper size; + // set stride with default value; std::vector stride_vec; - Scalar out = fd->defineScalar(); - fd->defineRecord(new ScalarRecord( - {fd->recordingState(out())}, - 1, - DataType::Int, - /*inline_def=*/true)); - stride_vec.resize(new_start.size, out); - Vector default_stride = define_vector_base_fn(*fd, stride_vec, true); + // Note: we cannot re-use the same ScalarRecord, otherwise, serialized python program uses `define_vector`, which would create multiple ScalarRecord, causing a cache miss. + for (auto i : c10::irange(new_start.size)) { + (void)i; // Supress unused variable warning + Scalar out = fd->defineScalar(); + fd->defineRecord(new ScalarRecord( + {fd->recordingState(out())}, + 1, + DataType::Int, + /*inline_def=*/true)); + stride_vec.push_back(out); + } + // Cannot inline definition with `Vector` here, since `FusionDefinition.ops.slice` expects start/end/stride to have the same type. + Vector default_stride = define_vector_base_fn(*fd, stride_vec, std::is_same_v); stride_index = default_stride(); } diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 664b254c8c6..73bec5d255d 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -4324,3 +4324,34 @@ def fusion_func(fd: FusionDefinition) -> None: ] self.exec_nvfuser(fusion_func, inputs) + + def test_slice_api(self): + x = torch.randn((2, 5, 10), dtype=torch.float32, device="cuda:0") + + offset = (0, 1, 2) + + def fusion_func(fd: FusionDefinition) -> None: + T0 = fd.define_tensor( + shape=[-1, -1, -1], + contiguity=[True, True, True], + dtype=DataType.Float, + is_cpu=False, + stride_order=[2, 1, 0], + ) + T1 = fd.ops.slice( + T0, start_indices=offset, end_indices=(2, 5, 10), strides=(1, 1, 1) + ) + fd.add_output(T1) + V_start = fd.define_vector(offset) + V_end = T0.shape() + T2 = fd.ops.slice(T0, V_start, V_end) + fd.add_output(T2) + dynamic_start = fd.define_vector(3) + dynamic_end = fd.define_vector(3) + T3 = fd.ops.slice(T0, dynamic_start, dynamic_end) + fd.add_output(T3) + inputs = [x, *offset, *x.shape] + + nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) + for out in nvf_out: + self.assertTrue(out.allclose(x[:,1:,2:])) From 2be6bebe3c009bd91a7203621af67718d2fa5a35 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 13:45:08 -0700 Subject: [PATCH 14/42] fixing logic --- csrc/python_frontend/python_bindings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 411836a3b7f..4e92e8e36b6 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -273,7 +273,7 @@ Tensor slice_fn( stride_vec.push_back(out); } // Cannot inline definition with `Vector` here, since `FusionDefinition.ops.slice` expects start/end/stride to have the same type. - Vector default_stride = define_vector_base_fn(*fd, stride_vec, std::is_same_v); + Vector default_stride = define_vector_base_fn(*fd, stride_vec, !std::is_same_v); stride_index = default_stride(); } From 69975f18ce8d50992bd70847dfa5d505a2a8b749 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 13:45:33 -0700 Subject: [PATCH 15/42] clangformat --- csrc/python_frontend/python_bindings.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 4e92e8e36b6..02a8c635b06 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -261,7 +261,9 @@ Tensor slice_fn( } else { // set stride with default value; std::vector stride_vec; - // Note: we cannot re-use the same ScalarRecord, otherwise, serialized python program uses `define_vector`, which would create multiple ScalarRecord, causing a cache miss. + // Note: we cannot re-use the same ScalarRecord, otherwise, serialized + // python program uses `define_vector`, which would create multiple + // ScalarRecord, causing a cache miss. for (auto i : c10::irange(new_start.size)) { (void)i; // Supress unused variable warning Scalar out = fd->defineScalar(); @@ -272,8 +274,11 @@ Tensor slice_fn( /*inline_def=*/true)); stride_vec.push_back(out); } - // Cannot inline definition with `Vector` here, since `FusionDefinition.ops.slice` expects start/end/stride to have the same type. - Vector default_stride = define_vector_base_fn(*fd, stride_vec, !std::is_same_v); + // Cannot inline definition with `Vector` here, since + // `FusionDefinition.ops.slice` expects start/end/stride to have the same + // type. + Vector default_stride = define_vector_base_fn( + *fd, stride_vec, !std::is_same_v); stride_index = default_stride(); } From 53ad605dd2250aa8b777ab7f665a21fb74489278 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 14:09:39 -0700 Subject: [PATCH 16/42] some error check/message --- csrc/python_frontend/fusion_record.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index a2bc704943e..216860787d7 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -392,7 +392,23 @@ struct SliceOpRecord : RecordFunctor { for (const auto idx : c10::irange(arg->nDims())) { // NOTE: there's an extra move, we can use emplace_back if we go write // some constructors for Slice. - vec_slice.push_back({start.at(idx), end.at(idx), stride.at(idx)}); + Val start_idx = start.at(idx); + Val end_idx = start.at(idx); + Val stride_idx = start.at(idx); + NVF_CHECK( + !start_idx.isConstInt() || start_idx->evaluate().as() >= 0, + "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", + start_idx->evaluate().as()); + NVF_CHECK( + !start_idx.isConstInt() || !end_idx.isConstInt() || end_idx->evaluate().as() >= start_idx->evaluate().as() >= 0, + "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", + start_idx->evaluate().as(), + " End Indices: ", + end_idx->evaluate().as()); + NVF_CHECK( + stride_idx.isConstInt() && stride_idx->evaluate().as == 1, + "nvFuser Limitation: All slice operation strides must be of const int with size 1"); + vec_slice.push_back({start_idx, end_idx, stride_idx}); } auto output = slice(arg, vec_slice); fd.setFusionState(outputs_.at(0).index, output); From 67ed3e41605e42deaeed1cadd26d832255af8a89 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 14:28:46 -0700 Subject: [PATCH 17/42] fixing build, avoiding check --- csrc/python_frontend/fusion_record.h | 12 ++++++------ csrc/python_frontend/python_bindings.cpp | 25 ++++++++++++------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 216860787d7..ddf742d4a87 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -392,21 +392,21 @@ struct SliceOpRecord : RecordFunctor { for (const auto idx : c10::irange(arg->nDims())) { // NOTE: there's an extra move, we can use emplace_back if we go write // some constructors for Slice. - Val start_idx = start.at(idx); - Val end_idx = start.at(idx); - Val stride_idx = start.at(idx); + Val* start_idx = start.at(idx); + Val* end_idx = end.at(idx); + Val* stride_idx = stride.at(idx); NVF_CHECK( - !start_idx.isConstInt() || start_idx->evaluate().as() >= 0, + !start_idx->isConstInt() || start_idx->evaluate().as() >= 0, "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", start_idx->evaluate().as()); NVF_CHECK( - !start_idx.isConstInt() || !end_idx.isConstInt() || end_idx->evaluate().as() >= start_idx->evaluate().as() >= 0, + !start_idx->isConstInt() || !end_idx->isConstInt() || end_idx->evaluate().as() >= start_idx->evaluate().as(), "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", start_idx->evaluate().as(), " End Indices: ", end_idx->evaluate().as()); NVF_CHECK( - stride_idx.isConstInt() && stride_idx->evaluate().as == 1, + stride_idx->isConstInt() && stride_idx->evaluate().as() == 1, "nvFuser Limitation: All slice operation strides must be of const int with size 1"); vec_slice.push_back({start_idx, end_idx, stride_idx}); } diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 02a8c635b06..982224f5c94 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -60,7 +60,8 @@ template Vector define_vector_fn( FusionDefinition& self, ITERABLE& values, - bool inline_def = false) { + bool inline_def, + bool shape_check) { FUSER_PERF_SCOPE("python_frontend::define_vector_fn"); std::vector args; size_t idx = 0; @@ -68,7 +69,7 @@ Vector define_vector_fn( if (py::isinstance(item)) { auto int_value = py::cast(item); NVF_CHECK( - int_value >= -1, + !shape_check || int_value >= -1, "The value ", int_value, " at index ", @@ -99,11 +100,11 @@ Vector define_vector_explicit_fn( FusionDefinition& self, ITERABLE& values, PrimDataType dtype = DataType::Int) { - return define_vector_fn(self, values, /*inline_def=*/false); + return define_vector_fn(self, values, /*inline_def=*/false, /*shape_check=*/true); } template -Vector ShapeAsVector(ShapeType shape, FusionDefinition& fd) { +Vector SequenceAsVector(ShapeType shape, FusionDefinition& fd, bool shape_check=true) { static_assert( std::is_same_v || std::is_same_v || @@ -121,7 +122,7 @@ Vector ShapeAsVector(ShapeType shape, FusionDefinition& fd) { // ``` // would not work because the compiler would try to instantiate // define_vector_fn and fail. - return define_vector_fn(fd, shape, /*inline_def=*/true); + return define_vector_fn(fd, shape, /*inline_def=*/true, /*shape_check=*/shape_check); } } @@ -134,7 +135,7 @@ Tensor broadcast_in_dim_fn( FUSER_PERF_SCOPE("Operators.broadcast_in_dim"); FusionDefinition* fd = op.fusion_definition; NVF_CHECK(op.validUse(), "Attempting to add to a completed definition!"); - Vector output_shape = ShapeAsVector(generic_output_shape, *fd); + Vector output_shape = SequenceAsVector(generic_output_shape, *fd); NVF_CHECK( output_shape.size >= broadcast_dims.size(), "broadcast_dims vector size is too big for output shape!"); @@ -156,7 +157,7 @@ Tensor full_op_fn( PrimDataType dtype) { NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); FusionDefinition* fd = self.fusion_definition; - Vector output_shape = ShapeAsVector(generic_output_shape, *fd); + Vector output_shape = SequenceAsVector(generic_output_shape, *fd); Tensor output = fd->defineTensor(output_shape.size); fd->defineRecord(new FullOpRecord( {fd->recordingState(output_shape()), fd->recordingState(fill_value())}, @@ -173,7 +174,7 @@ Tensor reshape_fn( NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); FusionDefinition* fd = self.fusion_definition; - Vector new_shape = ShapeAsVector(generic_new_shape, *fd); + Vector new_shape = SequenceAsVector(generic_new_shape, *fd); Tensor output = fd->defineTensor(new_shape.size); fd->defineRecord(new ReshapeOpRecord( @@ -200,7 +201,7 @@ Tensor random_dist_op_fn( "Random distributions only create floating point types! ", dtype); FusionDefinition* fd = self.fusion_definition; - Vector new_shape = ShapeAsVector(generic_new_shape, *fd); + Vector new_shape = SequenceAsVector(generic_new_shape, *fd); Tensor output = fd->defineTensor(new_shape.size); std::vector arg_states = { @@ -245,12 +246,12 @@ Tensor slice_fn( NVF_CHECK(self.validUse(), "Attempting to add to a completed definition!"); FusionDefinition* fd = self.fusion_definition; - Vector new_start = ShapeAsVector(start, *fd); - Vector new_end = ShapeAsVector(end, *fd); + Vector new_start = SequenceAsVector(start, *fd, /*shape_check=*/false); + Vector new_end = SequenceAsVector(end, *fd, /*shape_check=*/false); size_t stride_index = 0; if (strides.has_value()) { - Vector new_stride = ShapeAsVector(strides.value(), *fd); + Vector new_stride = SequenceAsVector(strides.value(), *fd, /*shape_check=*/false); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", From 4f157390850bc676ad426690131f46ebfe2c5ee9 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 14:32:49 -0700 Subject: [PATCH 18/42] fixing error message --- csrc/python_frontend/fusion_record.h | 2 +- tests/python/opinfo_input_generators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index ddf742d4a87..24c8bf7baad 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -407,7 +407,7 @@ struct SliceOpRecord : RecordFunctor { end_idx->evaluate().as()); NVF_CHECK( stride_idx->isConstInt() && stride_idx->evaluate().as() == 1, - "nvFuser Limitation: All slice operation strides must be of const int with size 1"); + "nvFuser Limitation: All slice operation strides must be of const size 1"); vec_slice.push_back({start_idx, end_idx, stride_idx}); } auto output = slice(arg, vec_slice); diff --git a/tests/python/opinfo_input_generators.py b/tests/python/opinfo_input_generators.py index e369c03f9e7..d6cd18efb49 100644 --- a/tests/python/opinfo_input_generators.py +++ b/tests/python/opinfo_input_generators.py @@ -1163,7 +1163,7 @@ def slice_error_generator( check_strides = ErrorSample( {"start_indices": [0, 0], "end_indices": [5, 5], "strides": [5, 5]}, - "nvFuser Limitation: All slice operation strides must be of size 1.", + "nvFuser Limitation: All slice operation strides must be of const size 1.", ) check_tensor_dims = ErrorSample( From 804ec363ea06bda43623b2c2c930f7f9831b24f3 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 16:43:21 -0700 Subject: [PATCH 19/42] quick hack on exception --- csrc/python_frontend/fusion_cache.cpp | 10 ++++++++++ csrc/python_frontend/fusion_cache.h | 3 +++ csrc/python_frontend/fusion_definition.cpp | 18 +++++++++++++----- tests/python/opinfo_input_generators.py | 2 +- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index 8ac10145379..c6184e9862c 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -247,6 +247,16 @@ bool TrieNode::isTerminal() const { return (record.get()->recordType() == serde::RecordType::End); } +void TrieNode::markException(std::exception e) { + std::lock_guard guard(node->trie_node_lock); + exception = e; +} + +std::optional TrieNode::getException() const { + std::lock_guard guard(trie_node_lock); + return exception; +} + flatbuffers::Offset TrieNode::serialize( flatbuffers::FlatBufferBuilder& builder, const std::map& diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index c19e08a6b00..3f418090751 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,6 +104,8 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; + std::optional getException() const; + void markException(std::exception e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( flatbuffers::FlatBufferBuilder& builder, @@ -125,6 +127,7 @@ struct TrieNode { TrieNode* parent; //! For thread-Safe locking of a node std::mutex trie_node_lock; + std::optional exception; }; //! \class FusionCache diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index 4c94bb48ed8..592a38f2f4c 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -87,13 +87,18 @@ void FusionDefinition::finalizeDefinition() { } trie_node_ = fusionCache()->createChild(trie_node_, end_record_.get()); fusion_id_ = std::optional(trie_node_->fusion_id); - NVF_CHECK(id().has_value(), "Invalid fusion id!"); + try { + NVF_CHECK(id().has_value(), "Invalid fusion id!"); - if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { - print(debug()); - } + if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { + print(debug()); + } - buildFusionIr(preschedFusion()); + buildFusionIr(preschedFusion()); + } catch (const std::exception& e) { + trie_node_->markException(e); + throw e; + } if (isDebugDumpEnabled(DebugDumpOption::FusionIrOriginal)) { printIr(); @@ -102,6 +107,9 @@ void FusionDefinition::finalizeDefinition() { if (isDebugDumpEnabled(DebugDumpOption::PythonFrontendDebug)) { debug() << "\nFusionDefinition: Terminal Node found!\n"; } + if (std::optional e = trie_node_->getException()) { + throw e; + } trie_node_ = child_node.value(); fusion_id_ = std::optional(trie_node_->fusion_id); } diff --git a/tests/python/opinfo_input_generators.py b/tests/python/opinfo_input_generators.py index d6cd18efb49..c70572d7681 100644 --- a/tests/python/opinfo_input_generators.py +++ b/tests/python/opinfo_input_generators.py @@ -1163,7 +1163,7 @@ def slice_error_generator( check_strides = ErrorSample( {"start_indices": [0, 0], "end_indices": [5, 5], "strides": [5, 5]}, - "nvFuser Limitation: All slice operation strides must be of const size 1.", + "nvFuser Limitation: All slice operation strides must be of const size 1", ) check_tensor_dims = ErrorSample( From 691198d4567424a2d931387e1d1bb3e566d16439 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 16 Sep 2024 18:05:18 -0700 Subject: [PATCH 20/42] wip --- csrc/python_frontend/fusion_cache.cpp | 4 ++-- csrc/python_frontend/fusion_cache.h | 2 +- csrc/python_frontend/fusion_definition.cpp | 12 ++++++++---- nvfuser/__init__.py | 6 ++++++ 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index c6184e9862c..489e70df95b 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -248,11 +248,11 @@ bool TrieNode::isTerminal() const { } void TrieNode::markException(std::exception e) { - std::lock_guard guard(node->trie_node_lock); + std::lock_guard guard(trie_node_lock); exception = e; } -std::optional TrieNode::getException() const { +std::optional TrieNode::getException() { std::lock_guard guard(trie_node_lock); return exception; } diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index 3f418090751..c7501765b2d 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,7 +104,7 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; - std::optional getException() const; + std::optional getException(); void markException(std::exception e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index 592a38f2f4c..a3a492486a3 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -97,7 +97,9 @@ void FusionDefinition::finalizeDefinition() { buildFusionIr(preschedFusion()); } catch (const std::exception& e) { trie_node_->markException(e); - throw e; + fusion_id_ = std::nullopt; + std::cout << "error: " << e.what() << std::endl; + throw; } if (isDebugDumpEnabled(DebugDumpOption::FusionIrOriginal)) { @@ -107,10 +109,12 @@ void FusionDefinition::finalizeDefinition() { if (isDebugDumpEnabled(DebugDumpOption::PythonFrontendDebug)) { debug() << "\nFusionDefinition: Terminal Node found!\n"; } - if (std::optional e = trie_node_->getException()) { - throw e; - } trie_node_ = child_node.value(); + std::optional opt_e = trie_node_->getException(); + if (opt_e.has_value()) { + std::cout << "cached error: " << opt_e.value().what() << std::endl; + } + NVF_CHECK(!opt_e.has_value(), opt_e.value().what()); fusion_id_ = std::optional(trie_node_->fusion_id); } diff --git a/nvfuser/__init__.py b/nvfuser/__init__.py index e971868426b..43489113408 100644 --- a/nvfuser/__init__.py +++ b/nvfuser/__init__.py @@ -59,7 +59,9 @@ def __enter__(self): def __exit__(self, type, value, traceback): try: + print(f"definition0 {self.id()=}") self._finalize_definition() + print(f"finalized definition0 {self.id()=}") except Exception as err: logger.exception(self.getReproErrorString("defining")) raise @@ -197,10 +199,14 @@ def execute( device = device.index # if definition is not defined by a context manager, try a child class + print(" ------ xxx 0 ----") + print(f" {self.id()=}") if self.id() is None: self._setup_definition() self.definition() + print(" ------ xxx 1 ----") self._finalize_definition() + print(" ------ xxx 2 ----") # If schedule is defined by child class and schedule is not defined for # inputs, make a schedule. From e15be6fa71c7c32a8b1528341aa8b25fdf580a02 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 11:12:58 -0700 Subject: [PATCH 21/42] clangformat; quick fix on error message --- csrc/python_frontend/fusion_cache.cpp | 2 +- csrc/python_frontend/fusion_cache.h | 4 ++-- csrc/python_frontend/fusion_definition.cpp | 3 +-- csrc/python_frontend/fusion_record.h | 28 ++++++++++++---------- csrc/python_frontend/python_bindings.cpp | 14 +++++++---- 5 files changed, 29 insertions(+), 22 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index 489e70df95b..f021ce151ef 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -247,7 +247,7 @@ bool TrieNode::isTerminal() const { return (record.get()->recordType() == serde::RecordType::End); } -void TrieNode::markException(std::exception e) { +void TrieNode::setException(const char* e) { std::lock_guard guard(trie_node_lock); exception = e; } diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index c7501765b2d..8e2f20d3464 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -105,7 +105,7 @@ struct TrieNode { // a the end of Fusion entry in the cache. bool isTerminal() const; std::optional getException(); - void markException(std::exception e); + void setException(const char* e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( flatbuffers::FlatBufferBuilder& builder, @@ -127,7 +127,7 @@ struct TrieNode { TrieNode* parent; //! For thread-Safe locking of a node std::mutex trie_node_lock; - std::optional exception; + std::optional exception = std::nullopt; }; //! \class FusionCache diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index a3a492486a3..3d3ae2282ba 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -96,9 +96,8 @@ void FusionDefinition::finalizeDefinition() { buildFusionIr(preschedFusion()); } catch (const std::exception& e) { - trie_node_->markException(e); + trie_node_->setException(e.what()); fusion_id_ = std::nullopt; - std::cout << "error: " << e.what() << std::endl; throw; } diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 24c8bf7baad..8086a30db79 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -395,19 +395,21 @@ struct SliceOpRecord : RecordFunctor { Val* start_idx = start.at(idx); Val* end_idx = end.at(idx); Val* stride_idx = stride.at(idx); - NVF_CHECK( - !start_idx->isConstInt() || start_idx->evaluate().as() >= 0, - "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", - start_idx->evaluate().as()); - NVF_CHECK( - !start_idx->isConstInt() || !end_idx->isConstInt() || end_idx->evaluate().as() >= start_idx->evaluate().as(), - "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", - start_idx->evaluate().as(), - " End Indices: ", - end_idx->evaluate().as()); - NVF_CHECK( - stride_idx->isConstInt() && stride_idx->evaluate().as() == 1, - "nvFuser Limitation: All slice operation strides must be of const size 1"); + NVF_CHECK( + !start_idx->isConstInt() || start_idx->evaluate().as() >= 0, + "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", + start_idx->evaluate().as()); + NVF_CHECK( + !start_idx->isConstInt() || !end_idx->isConstInt() || + end_idx->evaluate().as() >= + start_idx->evaluate().as(), + "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", + start_idx->evaluate().as(), + " End Indices: ", + end_idx->evaluate().as()); + NVF_CHECK( + stride_idx->isConstInt() && stride_idx->evaluate().as() == 1, + "nvFuser Limitation: All slice operation strides must be of const size 1"); vec_slice.push_back({start_idx, end_idx, stride_idx}); } auto output = slice(arg, vec_slice); diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 982224f5c94..2607d14c6ca 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -100,11 +100,15 @@ Vector define_vector_explicit_fn( FusionDefinition& self, ITERABLE& values, PrimDataType dtype = DataType::Int) { - return define_vector_fn(self, values, /*inline_def=*/false, /*shape_check=*/true); + return define_vector_fn( + self, values, /*inline_def=*/false, /*shape_check=*/true); } template -Vector SequenceAsVector(ShapeType shape, FusionDefinition& fd, bool shape_check=true) { +Vector SequenceAsVector( + ShapeType shape, + FusionDefinition& fd, + bool shape_check = true) { static_assert( std::is_same_v || std::is_same_v || @@ -122,7 +126,8 @@ Vector SequenceAsVector(ShapeType shape, FusionDefinition& fd, bool shape_check= // ``` // would not work because the compiler would try to instantiate // define_vector_fn and fail. - return define_vector_fn(fd, shape, /*inline_def=*/true, /*shape_check=*/shape_check); + return define_vector_fn( + fd, shape, /*inline_def=*/true, /*shape_check=*/shape_check); } } @@ -251,7 +256,8 @@ Tensor slice_fn( size_t stride_index = 0; if (strides.has_value()) { - Vector new_stride = SequenceAsVector(strides.value(), *fd, /*shape_check=*/false); + Vector new_stride = + SequenceAsVector(strides.value(), *fd, /*shape_check=*/false); NVF_CHECK( new_start.size == new_stride.size, "Slice start_indices and strides don't match! Start Indices: ", From 128aee5df6ed3fd082ecfda006c04286fdec56d8 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 11:15:20 -0700 Subject: [PATCH 22/42] fixing build --- csrc/python_frontend/fusion_cache.cpp | 2 +- csrc/python_frontend/fusion_cache.h | 2 +- csrc/python_frontend/fusion_definition.cpp | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index f021ce151ef..b6aa7d244b6 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -252,7 +252,7 @@ void TrieNode::setException(const char* e) { exception = e; } -std::optional TrieNode::getException() { +std::optional TrieNode::getException() { std::lock_guard guard(trie_node_lock); return exception; } diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index 8e2f20d3464..2ef400f2816 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,7 +104,7 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; - std::optional getException(); + std::optional getException(); void setException(const char* e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index 3d3ae2282ba..af0dfe30eae 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -109,11 +109,8 @@ void FusionDefinition::finalizeDefinition() { debug() << "\nFusionDefinition: Terminal Node found!\n"; } trie_node_ = child_node.value(); - std::optional opt_e = trie_node_->getException(); - if (opt_e.has_value()) { - std::cout << "cached error: " << opt_e.value().what() << std::endl; - } - NVF_CHECK(!opt_e.has_value(), opt_e.value().what()); + std::optional opt_e = trie_node_->getException(); + NVF_CHECK(!opt_e.has_value(), opt_e); fusion_id_ = std::optional(trie_node_->fusion_id); } From a89031195baff914856e279864162b4a0628d8d9 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 11:20:19 -0700 Subject: [PATCH 23/42] fixing error message --- csrc/python_frontend/fusion_definition.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index af0dfe30eae..a6ef666ee8d 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -110,7 +110,7 @@ void FusionDefinition::finalizeDefinition() { } trie_node_ = child_node.value(); std::optional opt_e = trie_node_->getException(); - NVF_CHECK(!opt_e.has_value(), opt_e); + NVF_CHECK(!opt_e.has_value(), opt_e.value()); fusion_id_ = std::optional(trie_node_->fusion_id); } From e9da45be1b436ab0b9c07426982722239c92cfb5 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 11:22:39 -0700 Subject: [PATCH 24/42] removing print --- nvfuser/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/nvfuser/__init__.py b/nvfuser/__init__.py index aa527f70a95..d40613030e5 100644 --- a/nvfuser/__init__.py +++ b/nvfuser/__init__.py @@ -59,9 +59,7 @@ def __enter__(self): def __exit__(self, type, value, traceback): try: - print(f"definition0 {self.id()=}") self._finalize_definition() - print(f"finalized definition0 {self.id()=}") except Exception as err: logger.exception(self.getReproErrorString("defining")) raise @@ -199,14 +197,10 @@ def execute( device = device.index # if definition is not defined by a context manager, try a child class - print(" ------ xxx 0 ----") - print(f" {self.id()=}") if self.id() is None: self._setup_definition() self.definition() - print(" ------ xxx 1 ----") self._finalize_definition() - print(" ------ xxx 2 ----") # If schedule is defined by child class and schedule is not defined for # inputs, make a schedule. From 19aa2e0bbe7f2c0e75e65ad60a83d192b18e650f Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 11:38:21 -0700 Subject: [PATCH 25/42] allow fusion cache to cache error message --- csrc/python_frontend/fusion_cache.cpp | 10 ++++++++++ csrc/python_frontend/fusion_cache.h | 3 +++ csrc/python_frontend/fusion_definition.cpp | 18 +++++++++++++----- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index 8ac10145379..b6aa7d244b6 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -247,6 +247,16 @@ bool TrieNode::isTerminal() const { return (record.get()->recordType() == serde::RecordType::End); } +void TrieNode::setException(const char* e) { + std::lock_guard guard(trie_node_lock); + exception = e; +} + +std::optional TrieNode::getException() { + std::lock_guard guard(trie_node_lock); + return exception; +} + flatbuffers::Offset TrieNode::serialize( flatbuffers::FlatBufferBuilder& builder, const std::map& diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index c19e08a6b00..2ef400f2816 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,6 +104,8 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; + std::optional getException(); + void setException(const char* e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( flatbuffers::FlatBufferBuilder& builder, @@ -125,6 +127,7 @@ struct TrieNode { TrieNode* parent; //! For thread-Safe locking of a node std::mutex trie_node_lock; + std::optional exception = std::nullopt; }; //! \class FusionCache diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index 4c94bb48ed8..a6ef666ee8d 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -87,13 +87,19 @@ void FusionDefinition::finalizeDefinition() { } trie_node_ = fusionCache()->createChild(trie_node_, end_record_.get()); fusion_id_ = std::optional(trie_node_->fusion_id); - NVF_CHECK(id().has_value(), "Invalid fusion id!"); + try { + NVF_CHECK(id().has_value(), "Invalid fusion id!"); - if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { - print(debug()); - } + if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { + print(debug()); + } - buildFusionIr(preschedFusion()); + buildFusionIr(preschedFusion()); + } catch (const std::exception& e) { + trie_node_->setException(e.what()); + fusion_id_ = std::nullopt; + throw; + } if (isDebugDumpEnabled(DebugDumpOption::FusionIrOriginal)) { printIr(); @@ -103,6 +109,8 @@ void FusionDefinition::finalizeDefinition() { debug() << "\nFusionDefinition: Terminal Node found!\n"; } trie_node_ = child_node.value(); + std::optional opt_e = trie_node_->getException(); + NVF_CHECK(!opt_e.has_value(), opt_e.value()); fusion_id_ = std::optional(trie_node_->fusion_id); } From 88f81dd71f32a624fbb7e64c652c4a895271190f Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 14:28:57 -0700 Subject: [PATCH 26/42] test added --- tests/python/test_python_frontend.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 664b254c8c6..ea6dd244b91 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -4324,3 +4324,18 @@ def fusion_func(fd: FusionDefinition) -> None: ] self.exec_nvfuser(fusion_func, inputs) + + # testing that error thrown in finalizeDefinition is not accidentally cached as legit fusion. + def test_fusion_definition_error_cache(self): + def fusion_func(fd : FusionDefinition) -> None : + # NOTE: it's important that the exception is thrown inside FusionDefinition::finalizeDefinition() + # e.g. https://github.com/NVIDIA/Fuser/blob/adbbc75e58e6c53c606e90c8bc64f020b4b9df85/csrc/python_frontend/fusion_record.h#L1276 + T0 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True], dtype=DataType.Int, is_cpu=True, stride_order=[1, 0]) + + for i in range(2): + with pytest.raises( + Exception, + match="CPU non-scalar tensor is not supported!", + ): + with FusionDefinition() as fd: + fusion_func(fd) From d389d0a224f3212317e80ab79b3f8d8fcbe45ee6 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 14:51:23 -0700 Subject: [PATCH 27/42] black --- tests/python/test_python_frontend.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index ea6dd244b91..1bf5824c0bf 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -4327,10 +4327,16 @@ def fusion_func(fd: FusionDefinition) -> None: # testing that error thrown in finalizeDefinition is not accidentally cached as legit fusion. def test_fusion_definition_error_cache(self): - def fusion_func(fd : FusionDefinition) -> None : + def fusion_func(fd: FusionDefinition) -> None: # NOTE: it's important that the exception is thrown inside FusionDefinition::finalizeDefinition() # e.g. https://github.com/NVIDIA/Fuser/blob/adbbc75e58e6c53c606e90c8bc64f020b4b9df85/csrc/python_frontend/fusion_record.h#L1276 - T0 = fd.define_tensor(shape=[-1, -1], contiguity=[True, True], dtype=DataType.Int, is_cpu=True, stride_order=[1, 0]) + T0 = fd.define_tensor( + shape=[-1, -1], + contiguity=[True, True], + dtype=DataType.Int, + is_cpu=True, + stride_order=[1, 0], + ) for i in range(2): with pytest.raises( From 0e72307635e92e475ee081c8515e9efaf98e3735 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 15:08:39 -0700 Subject: [PATCH 28/42] revert changes in #2953 --- csrc/python_frontend/fusion_cache.cpp | 10 ---------- csrc/python_frontend/fusion_cache.h | 3 --- csrc/python_frontend/fusion_definition.cpp | 18 +++++------------- 3 files changed, 5 insertions(+), 26 deletions(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index b6aa7d244b6..8ac10145379 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -247,16 +247,6 @@ bool TrieNode::isTerminal() const { return (record.get()->recordType() == serde::RecordType::End); } -void TrieNode::setException(const char* e) { - std::lock_guard guard(trie_node_lock); - exception = e; -} - -std::optional TrieNode::getException() { - std::lock_guard guard(trie_node_lock); - return exception; -} - flatbuffers::Offset TrieNode::serialize( flatbuffers::FlatBufferBuilder& builder, const std::map& diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index 2ef400f2816..c19e08a6b00 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,8 +104,6 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; - std::optional getException(); - void setException(const char* e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( flatbuffers::FlatBufferBuilder& builder, @@ -127,7 +125,6 @@ struct TrieNode { TrieNode* parent; //! For thread-Safe locking of a node std::mutex trie_node_lock; - std::optional exception = std::nullopt; }; //! \class FusionCache diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index a6ef666ee8d..4c94bb48ed8 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -87,20 +87,14 @@ void FusionDefinition::finalizeDefinition() { } trie_node_ = fusionCache()->createChild(trie_node_, end_record_.get()); fusion_id_ = std::optional(trie_node_->fusion_id); - try { - NVF_CHECK(id().has_value(), "Invalid fusion id!"); + NVF_CHECK(id().has_value(), "Invalid fusion id!"); - if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { - print(debug()); - } - - buildFusionIr(preschedFusion()); - } catch (const std::exception& e) { - trie_node_->setException(e.what()); - fusion_id_ = std::nullopt; - throw; + if (isDebugDumpEnabled(DebugDumpOption::PythonDefinition)) { + print(debug()); } + buildFusionIr(preschedFusion()); + if (isDebugDumpEnabled(DebugDumpOption::FusionIrOriginal)) { printIr(); } @@ -109,8 +103,6 @@ void FusionDefinition::finalizeDefinition() { debug() << "\nFusionDefinition: Terminal Node found!\n"; } trie_node_ = child_node.value(); - std::optional opt_e = trie_node_->getException(); - NVF_CHECK(!opt_e.has_value(), opt_e.value()); fusion_id_ = std::optional(trie_node_->fusion_id); } From 175c82cfa4eb8bc32d7a9b19181e47977229e546 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 17 Sep 2024 15:12:05 -0700 Subject: [PATCH 29/42] black --- tests/python/test_python_frontend.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 207da64ce3e..422543a2bcc 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -4350,11 +4350,12 @@ def fusion_func(fd: FusionDefinition) -> None: dynamic_end = fd.define_vector(3) T3 = fd.ops.slice(T0, dynamic_start, dynamic_end) fd.add_output(T3) + inputs = [x, *offset, *x.shape] nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) for out in nvf_out: - self.assertTrue(out.allclose(x[:,1:,2:])) + self.assertTrue(out.allclose(x[:, 1:, 2:])) # testing that error thrown in finalizeDefinition is not accidentally cached as legit fusion. def test_fusion_definition_error_cache(self): From cc41d1d89a20a38eebab389f6cf7eabad244b0bf Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 10:43:52 -0700 Subject: [PATCH 30/42] comment message --- csrc/python_frontend/fusion_cache.h | 8 ++++++++ csrc/python_frontend/fusion_definition.cpp | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/csrc/python_frontend/fusion_cache.h b/csrc/python_frontend/fusion_cache.h index 2ef400f2816..36ef32c5a55 100644 --- a/csrc/python_frontend/fusion_cache.h +++ b/csrc/python_frontend/fusion_cache.h @@ -104,7 +104,13 @@ struct TrieNode { // Queries whether the entry denotes a leaf node which also represents // a the end of Fusion entry in the cache. bool isTerminal() const; + //! getException returns the cached Exception raise during construction of + //! Fusion. It returns std::nullopt if the no error thrown. This function is + //! called at the end of FusionDefinition::finalizeDefinition to avoid + //! silently using a bad FusionDefinition cached in FusionCache. std::optional getException(); + //! setException is called to record exception message thrown during + //! construction of Fusion. void setException(const char* e); //! Serialize TrieNode using flatbuffers NVF_API flatbuffers::Offset serialize( @@ -127,6 +133,8 @@ struct TrieNode { TrieNode* parent; //! For thread-Safe locking of a node std::mutex trie_node_lock; + //! exception is used to track if we failed to create a valid fusion for + //! FusionDefinition at this given TrieNode std::optional exception = std::nullopt; }; diff --git a/csrc/python_frontend/fusion_definition.cpp b/csrc/python_frontend/fusion_definition.cpp index 03710a9af13..eb0250f0bd5 100644 --- a/csrc/python_frontend/fusion_definition.cpp +++ b/csrc/python_frontend/fusion_definition.cpp @@ -95,6 +95,11 @@ void FusionDefinition::finalizeDefinition() { buildFusionIr(preschedFusion()); } catch (const std::exception& e) { + // Exception thrown after fusionCache()->createChild wouldn't be visible + // by fusion cache, if the exception is suppressed on the python side. We + // explicitly set the exception message on the terminal trie node, so + // we'll be able to throw the same exception again when user tries to + // create the same fusion entry. trie_node_->setException(e.what()); fusion_id_ = std::nullopt; throw; @@ -109,6 +114,8 @@ void FusionDefinition::finalizeDefinition() { } trie_node_ = child_node.value(); std::optional opt_e = trie_node_->getException(); + // rethrow the exception message if the cached FusionDefinition fails to + // build a proper fusion earlier. NVF_CHECK(!opt_e.has_value(), opt_e.value()); fusion_id_ = std::optional(trie_node_->fusion_id); } From ecc1316d145c6d071c669e2935ff24fc24e5b4fb Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 10:51:04 -0700 Subject: [PATCH 31/42] bumping version --- version.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.txt b/version.txt index 13dead7ebf1..d3b5ba4bfc3 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.10 +0.2.11 From b87219469672ee2b22462e5a0aaec402df570141 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 12:19:46 -0700 Subject: [PATCH 32/42] fixing deserialization with exception handling --- csrc/python_frontend/fusion_cache.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/csrc/python_frontend/fusion_cache.cpp b/csrc/python_frontend/fusion_cache.cpp index b6aa7d244b6..9770ca7987e 100644 --- a/csrc/python_frontend/fusion_cache.cpp +++ b/csrc/python_frontend/fusion_cache.cpp @@ -751,7 +751,13 @@ void FusionCache::deserialize(std::string filename) { "The fusion id for this TrieNode should already be set.") Fusion* fusion = queryFusionSchedules(fb_trie_node->fusion_id())->preschedFusion(); - state->buildFusionIr(fusion); + try { + // There could be bad fusion in the serialization. + state->buildFusionIr(fusion); + } catch (const std::exception& e) { + // catch exception and setException for the terminal node + trie_ptr->setException(e.what()); + } } // Table TrieNode => Field: children: [ulong] From 35d9fac215cfee927c995926251d50a7be67eb0d Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 13:49:35 -0700 Subject: [PATCH 33/42] fixing typo --- tests/python/test_multidevice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_multidevice.py b/tests/python/test_multidevice.py index 6291ba89b37..16c8fb1e42b 100644 --- a/tests/python/test_multidevice.py +++ b/tests/python/test_multidevice.py @@ -43,7 +43,7 @@ def test_pointwise(multidevice_test): # scalar inputs isn't supported; class MultiDeviceModel(FusionDefinition): def definition(self): - self.t0 = self.define_tensor((2, 4), (False, False), dtype=DataType.Float) + self.t0 = self.define_tensor((num_devices, 4), (False, False), dtype=DataType.Float) self.t1 = self.ops.relu(self.t0) self.t2 = self.ops.add(self.t1, self.t1) self.add_output(self.t2) From dc610d44489ee5694f418034ff5f86e919abf1a1 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 13:55:46 -0700 Subject: [PATCH 34/42] BLACK --- tests/python/test_multidevice.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/python/test_multidevice.py b/tests/python/test_multidevice.py index 16c8fb1e42b..7bb3f7af1ad 100644 --- a/tests/python/test_multidevice.py +++ b/tests/python/test_multidevice.py @@ -43,7 +43,9 @@ def test_pointwise(multidevice_test): # scalar inputs isn't supported; class MultiDeviceModel(FusionDefinition): def definition(self): - self.t0 = self.define_tensor((num_devices, 4), (False, False), dtype=DataType.Float) + self.t0 = self.define_tensor( + (num_devices, 4), (False, False), dtype=DataType.Float + ) self.t1 = self.ops.relu(self.t0) self.t2 = self.ops.add(self.t1, self.t1) self.add_output(self.t2) From 295b56c0bacc046f333666965c9ec41418eeda0e Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 14:11:15 -0700 Subject: [PATCH 35/42] review comments --- csrc/python_frontend/fusion_record.h | 4 ++-- csrc/python_frontend/python_bindings.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 8086a30db79..976e997c251 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -397,13 +397,13 @@ struct SliceOpRecord : RecordFunctor { Val* stride_idx = stride.at(idx); NVF_CHECK( !start_idx->isConstInt() || start_idx->evaluate().as() >= 0, - "Slice operation start_indices must be greater-than-or-equal-to 0. Start Indices: ", + "Slice operation start_indices must be greater than or equal to 0. Start Indices: ", start_idx->evaluate().as()); NVF_CHECK( !start_idx->isConstInt() || !end_idx->isConstInt() || end_idx->evaluate().as() >= start_idx->evaluate().as(), - "Slice operation end_indices must be greater-than-or-equal-to start_indices. Start Indices: ", + "Slice operation end_indices must be greater than or equal to start_indices. Start Indices: ", start_idx->evaluate().as(), " End Indices: ", end_idx->evaluate().as()); diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 2607d14c6ca..744dcdc0dfd 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -268,6 +268,7 @@ Tensor slice_fn( } else { // set stride with default value; std::vector stride_vec; + stride_vec.reserve(new_start.size); // Note: we cannot re-use the same ScalarRecord, otherwise, serialized // python program uses `define_vector`, which would create multiple // ScalarRecord, causing a cache miss. From cbc819de39db88012849225423e3c6e66b947bc1 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 18 Sep 2024 22:11:28 -0700 Subject: [PATCH 36/42] errr resolve conflicts --- tests/python/test_python_frontend.py | 45 +++++++++++++--------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 33f95925ab2..65d62dbeb99 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -4325,7 +4325,27 @@ def fusion_func(fd: FusionDefinition) -> None: self.exec_nvfuser(fusion_func, inputs) -<<<<<<< slice_python_api + # testing that error thrown in finalizeDefinition is not accidentally cached as legit fusion. + def test_fusion_definition_error_cache(self): + def fusion_func(fd: FusionDefinition) -> None: + # NOTE: it's important that the exception is thrown inside FusionDefinition::finalizeDefinition() + # e.g. https://github.com/NVIDIA/Fuser/blob/adbbc75e58e6c53c606e90c8bc64f020b4b9df85/csrc/python_frontend/fusion_record.h#L1276 + T0 = fd.define_tensor( + shape=[-1, -1], + contiguity=[True, True], + dtype=DataType.Int, + is_cpu=True, + stride_order=[1, 0], + ) + + for i in range(2): + with pytest.raises( + Exception, + match="CPU non-scalar tensor is not supported!", + ): + with FusionDefinition() as fd: + fusion_func(fd) + def test_slice_api(self): x = torch.randn((2, 5, 10), dtype=torch.float32, device="cuda:0") @@ -4357,26 +4377,3 @@ def fusion_func(fd: FusionDefinition) -> None: nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) for out in nvf_out: self.assertTrue(out.allclose(x[:, 1:, 2:])) - -======= ->>>>>>> main - # testing that error thrown in finalizeDefinition is not accidentally cached as legit fusion. - def test_fusion_definition_error_cache(self): - def fusion_func(fd: FusionDefinition) -> None: - # NOTE: it's important that the exception is thrown inside FusionDefinition::finalizeDefinition() - # e.g. https://github.com/NVIDIA/Fuser/blob/adbbc75e58e6c53c606e90c8bc64f020b4b9df85/csrc/python_frontend/fusion_record.h#L1276 - T0 = fd.define_tensor( - shape=[-1, -1], - contiguity=[True, True], - dtype=DataType.Int, - is_cpu=True, - stride_order=[1, 0], - ) - - for i in range(2): - with pytest.raises( - Exception, - match="CPU non-scalar tensor is not supported!", - ): - with FusionDefinition() as fd: - fusion_func(fd) From eecb0247c218952e12e1eb87f3384f37fdc6d234 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 09:19:01 -0700 Subject: [PATCH 37/42] fixing CI: 1. bump version; 2. update assert exception string --- tests/python/opinfo_input_generators.py | 4 ++-- version.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/python/opinfo_input_generators.py b/tests/python/opinfo_input_generators.py index c70572d7681..a90706f4df7 100644 --- a/tests/python/opinfo_input_generators.py +++ b/tests/python/opinfo_input_generators.py @@ -1153,12 +1153,12 @@ def slice_error_generator( check_start_indices = ErrorSample( {"start_indices": [-1, -2], "end_indices": [5, 5], "strides": [7, 7]}, - "Slice operation start_indices must be greater-than-or-equal-to 0.", + "Slice operation start_indices must be greater than or equal to 0.", ) check_end_indices = ErrorSample( {"start_indices": [3, 4], "end_indices": [1, 2], "strides": [1, 1]}, - "Slice operation end_indices must be greater-than-or-equal-to start_indices.", + "Slice operation end_indices must be greater than or equal to start_indices.", ) check_strides = ErrorSample( diff --git a/version.txt b/version.txt index d3b5ba4bfc3..f2722b13396 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.2.11 +0.2.12 From cd95c021b1e29d210124e157c1bbf51d2deda37e Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 10:07:14 -0700 Subject: [PATCH 38/42] fixing missing header --- csrc/exceptions.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/csrc/exceptions.cpp b/csrc/exceptions.cpp index da87f01f0af..e9da7f75a44 100644 --- a/csrc/exceptions.cpp +++ b/csrc/exceptions.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include From 1e92624bf1219cf6a20132d3db4400f7b3d5ef51 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 16:43:30 -0700 Subject: [PATCH 39/42] because I'm a dumb dumb... --- tests/python/test_python_frontend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 65d62dbeb99..20ae4b5085f 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -2103,11 +2103,11 @@ def legal(fd: FusionDefinition, acts) -> None: checks = [ ( check_start_indices, - "Slice operation start_indices must be greater-than-or-equal-to 0. .*", + "Slice operation start_indices must be greater than or equal to 0. .*", ), ( check_end_indices, - "Slice operation end_indices must be greater-than-or-equal-to start_indices. .*", + "Slice operation end_indices must be greater than or equal to start_indices. .*", ), ( check_strides, From cb65c77409aeb0812d7829e5f2d0cbb26272de32 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 17:02:26 -0700 Subject: [PATCH 40/42] dumb dumb again --- tests/python/test_python_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index 20ae4b5085f..e57b521fdaa 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -2111,7 +2111,7 @@ def legal(fd: FusionDefinition, acts) -> None: ), ( check_strides, - "nvFuser Limitation: All slice operation strides must be of size 1. .*", + "nvFuser Limitation: All slice operation strides must be of const size 1. .*", ), ( check_tensor_dims, From 88ae47ff016d73db82c2043603211e5cf107c120 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 17:07:42 -0700 Subject: [PATCH 41/42] error message --- csrc/python_frontend/fusion_record.h | 2 +- tests/python/test_python_frontend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/python_frontend/fusion_record.h b/csrc/python_frontend/fusion_record.h index 6293727e525..b1381941610 100644 --- a/csrc/python_frontend/fusion_record.h +++ b/csrc/python_frontend/fusion_record.h @@ -409,7 +409,7 @@ struct SliceOpRecord : RecordFunctor { end_idx->evaluate().as()); NVF_CHECK( stride_idx->isConstInt() && stride_idx->evaluate().as() == 1, - "nvFuser Limitation: All slice operation strides must be of const size 1"); + "nvFuser Limitation: All slice operation strides must be of const size 1."); vec_slice.push_back({start_idx, end_idx, stride_idx}); } auto output = slice(arg, vec_slice); diff --git a/tests/python/test_python_frontend.py b/tests/python/test_python_frontend.py index e57b521fdaa..f793428ac1c 100644 --- a/tests/python/test_python_frontend.py +++ b/tests/python/test_python_frontend.py @@ -2111,7 +2111,7 @@ def legal(fd: FusionDefinition, acts) -> None: ), ( check_strides, - "nvFuser Limitation: All slice operation strides must be of const size 1. .*", + "nvFuser Limitation: All slice operation strides must be of const size 1.*", ), ( check_tensor_dims, From 8ffa26d56e5e8673f36299ecf7d96accb61b7e65 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Thu, 19 Sep 2024 17:08:25 -0700 Subject: [PATCH 42/42] fixing test error message --- tests/python/opinfo_input_generators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/opinfo_input_generators.py b/tests/python/opinfo_input_generators.py index a90706f4df7..fb5a0ea1a20 100644 --- a/tests/python/opinfo_input_generators.py +++ b/tests/python/opinfo_input_generators.py @@ -1163,7 +1163,7 @@ def slice_error_generator( check_strides = ErrorSample( {"start_indices": [0, 0], "end_indices": [5, 5], "strides": [5, 5]}, - "nvFuser Limitation: All slice operation strides must be of const size 1", + "nvFuser Limitation: All slice operation strides must be of const size 1.", ) check_tensor_dims = ErrorSample(