NVIDIA · RomanArzumanyan · Oct 7, 2023 · Oct 18, 2023 · Oct 19, 2023 · Oct 19, 2023
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
             # , "PyOpenGL-accelerate" # does not compile on 3.10
             "dev": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", f"PytorchNvCodec @ file://{os.getcwd()}/src/PytorchNvCodec/"],
             "samples": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", "tqdm", PytorchNvCodec],
-            "tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", PytorchNvCodec],
+            "tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "pycuda", "pydantic", PytorchNvCodec],
             "torch": ["torch", "torchvision", PytorchNvCodec],
             "tensorrt": ["torch", "torchvision", PytorchNvCodec],
         },

diff --git a/src/PyNvCodec/__init__.pyi b/src/PyNvCodec/__init__.pyi
@@ -1,15 +1,18 @@
-from typing import Any, ClassVar, Dict
+from typing import Any, ClassVar, Dict, Tuple
 
 from typing import overload
 import numpy
 ASYNC_ENCODE_SUPPORT: NV_ENC_CAPS
 BGR: PixelFormat
 BIT_DEPTH_MINUS_8: NV_DEC_CAPS
+BIT_DEPTH_NOT_SUPPORTED: TaskExecInfo
 BT_601: ColorSpace
 BT_709: ColorSpace
 DYNAMIC_QUERY_ENCODER_CAPACITY: NV_ENC_CAPS
+END_OF_STREAM: TaskExecInfo
 EXACT_FRAME: SeekMode
 EXPOSED_COUNT: NV_ENC_CAPS
+FAIL: TaskExecInfo
 H264: CudaVideoCodec
 HEIGHT_MAX: NV_ENC_CAPS
 HEIGHT_MIN: NV_ENC_CAPS
@@ -28,6 +31,7 @@ MB_NUM_MAX: NV_ENC_CAPS
 MB_PER_SEC_MAX: NV_ENC_CAPS
 MIN_HEIGHT: NV_DEC_CAPS
 MIN_WIDTH: NV_DEC_CAPS
+MORE_DATA_NEEDED: TaskExecInfo
 MPEG: ColorRange
 NO_PTS: int
 NUM_MAX_BFRAMES: NV_ENC_CAPS
@@ -44,6 +48,7 @@ RGB_32F: PixelFormat
 RGB_32F_PLANAR: PixelFormat
 RGB_PLANAR: PixelFormat
 SEPARATE_COLOUR_PLANE: NV_ENC_CAPS
+SUCCESS: TaskExecInfo
 SUPPORTED_RATECONTROL_MODES: NV_ENC_CAPS
 SUPPORT_10BIT_ENCODE: NV_ENC_CAPS
 SUPPORT_ADAPTIVE_TRANSFORM: NV_ENC_CAPS
@@ -351,15 +356,18 @@ class PyFFmpegDemuxer:
 
 class PyFfmpegDecoder:
     def __init__(self, input: str, opts: Dict[str,str], gpu_id: int = ...) -> None: ...
+    def AvgFramerate(self) -> float: ...
     def Codec(self) -> CudaVideoCodec: ...
     def ColorRange(self) -> ColorRange: ...
     def ColorSpace(self) -> ColorSpace: ...
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
     def DecodeSingleSurface(self, *args, **kwargs) -> Any: ...
     def Format(self) -> PixelFormat: ...
     def Framerate(self) -> float: ...
     def GetMotionVectors(self) -> numpy.ndarray[MotionVector]: ...
     def Height(self) -> int: ...
+    def Numframes(self) -> int: ...
+    def Timebase(self) -> float: ...
     def Width(self) -> int: ...
 
 class PyFrameUploader:
@@ -388,33 +396,33 @@ class PyNvDecoder:
     def ColorRange(self) -> ColorRange: ...
     def ColorSpace(self) -> ColorSpace: ...
     @overload
-    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> bool: ...
+    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> bool: ...
+    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
+    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
+    def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ...
+    def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     def DecodeSingleSurface(self, *args, **kwargs) -> Any: ...
     def DecodeSurfaceFromPacket(self, enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData, bool_nvcv_check: bool) -> object: ...
     @overload
-    def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
+    def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
     @overload
-    def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
+    def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
     def FlushSingleSurface(self, *args, **kwargs) -> Any: ...
     def Format(self) -> PixelFormat: ...
     def Framerate(self) -> float: ...
@@ -582,5 +590,30 @@ class SurfacePlane:
     def Pitch(self) -> int: ...
     def Width(self) -> int: ...
 
+class TaskExecDetails:
+    info: TaskExecInfo
+    def __init__(self) -> None: ...
+
+class TaskExecInfo:
+    __members__: ClassVar[dict] = ...  # read-only
+    BIT_DEPTH_NOT_SUPPORTED: ClassVar[TaskExecInfo] = ...
+    END_OF_STREAM: ClassVar[TaskExecInfo] = ...
+    FAIL: ClassVar[TaskExecInfo] = ...
+    MORE_DATA_NEEDED: ClassVar[TaskExecInfo] = ...
+    SUCCESS: ClassVar[TaskExecInfo] = ...
+    __entries: ClassVar[dict] = ...
+    def __init__(self, value: int) -> None: ...
+    def __eq__(self, other: object) -> bool: ...
+    def __getstate__(self) -> int: ...
+    def __hash__(self) -> int: ...
+    def __index__(self) -> int: ...
+    def __int__(self) -> int: ...
+    def __ne__(self, other: object) -> bool: ...
+    def __setstate__(self, state: int) -> None: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def value(self) -> int: ...
+
 def GetNumGpus() -> int: ...
 def GetNvencParams() -> Dict[str,str]: ...
diff --git a/src/PyNvCodec/inc/PyNvCodec.hpp b/src/PyNvCodec/inc/PyNvCodec.hpp
@@ -287,14 +287,17 @@ class PyFfmpegDecoder {
                   const std::map<std::string, std::string> &ffmpeg_options,
                   uint32_t gpuID);
 
-  bool DecodeSingleFrame(py::array_t<uint8_t> &frame);
-  std::shared_ptr<Surface> DecodeSingleSurface();
+  bool DecodeSingleFrame(py::array_t<uint8_t> &frame, TaskExecDetails& details);
+  std::shared_ptr<Surface> DecodeSingleSurface(TaskExecDetails& details);
 
   py::array_t<MotionVector> GetMotionVectors();
 
   uint32_t Width() const;
   uint32_t Height() const;
   double Framerate() const;
+  double AvgFramerate() const;
+  double Timebase() const;
+  uint32_t Numframes() const;
   ColorSpace Color_Space() const;
   ColorRange Color_Range() const;
   cudaVideoCodec Codec() const;
@@ -343,12 +346,11 @@ class PyNvDecoder {
               const std::map<std::string, std::string> &ffmpeg_options):
     PyNvDecoder(pathToFile, (CUcontext)ctx, (CUstream)str, ffmpeg_options){}
 
-  static Buffer *getElementaryVideo(DemuxFrame *demuxer,
-                                    SeekContext *seek_ctx, bool needSEI);
+  Buffer* getElementaryVideo(SeekContext* seek_ctx, TaskExecDetails& details,
+                             bool needSEI);
 
-  static Surface *getDecodedSurface(NvdecDecodeFrame *decoder,
-                                    DemuxFrame *demuxer,
-                                    SeekContext *seek_ctx, bool needSEI);
+  Surface* getDecodedSurface(SeekContext* seek_ctx, TaskExecDetails& details,
+                             bool needSEI);
 
   uint32_t Width() const;
 
@@ -374,13 +376,14 @@ class PyNvDecoder {
 
   Pixel_Format GetPixelFormat() const;
 
-  bool DecodeSurface(class DecodeContext &ctx);
+  bool DecodeSurface(class DecodeContext& ctx, TaskExecDetails& details);
 
-  bool DecodeFrame(class DecodeContext &ctx, py::array_t<uint8_t>& frame);
+  bool DecodeFrame(class DecodeContext& ctx, TaskExecDetails& details,
+                   py::array_t<uint8_t>& frame);
 
-  Surface *getDecodedSurfaceFromPacket(const py::array_t<uint8_t> *pPacket,
-                                       const PacketData *p_packet_data = nullptr,
-                                       bool no_eos = false);
+  Surface* getDecodedSurfaceFromPacket(
+      const py::array_t<uint8_t>* pPacket, TaskExecDetails& details,
+      const PacketData* p_packet_data = nullptr, bool no_eos = false);
 
   void DownloaderLazyInit();
 

diff --git a/src/PyNvCodec/src/PyFFMpegDecoder.cpp b/src/PyNvCodec/src/PyFFMpegDecoder.cpp
@@ -34,33 +34,38 @@ PyFfmpegDecoder::PyFfmpegDecoder(const string& pathToFile,
   upDecoder.reset(FfmpegDecodeFrame::Make(pathToFile.c_str(), cli_iface));
 }
 
-bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t<uint8_t>& frame)
+bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t<uint8_t>& frame,
+                                        TaskExecDetails& details)
 {
   UpdateState();
 
-  if (TASK_EXEC_SUCCESS == upDecoder->Execute()) {
+  auto ret = upDecoder->Execute();
+  details = upDecoder->GetLastExecDetails();
+
+  if (TASK_EXEC_SUCCESS == ret) {
     auto pRawFrame = (Buffer*)upDecoder->GetOutput(0U);
     if (pRawFrame) {
       auto const frame_size = pRawFrame->GetRawMemSize();
       if (frame_size != frame.size()) {
         frame.resize({frame_size}, false);
       }
 
-      memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size);
+      memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size);      
       return true;
     }
   }
 
   return false;
 }
 
-std::shared_ptr<Surface> PyFfmpegDecoder::DecodeSingleSurface()
+std::shared_ptr<Surface>
+PyFfmpegDecoder::DecodeSingleSurface(TaskExecDetails& details)
 {
   py::array_t<uint8_t> frame;
   std::shared_ptr<Surface> p_surf = nullptr;
 
   UploaderLazyInit();
-  if (DecodeSingleFrame(frame)) {
+  if (DecodeSingleFrame(frame, details)) {
     p_surf = upUploader->UploadSingleFrame(frame);
   }
 
@@ -188,6 +193,27 @@ cudaVideoCodec PyFfmpegDecoder::Codec() const
   return params.videoContext.codec;
 };
 
+double PyFfmpegDecoder::AvgFramerate() const
+{
+  MuxingParams params;
+  upDecoder->GetParams(params);
+  return params.videoContext.avgFrameRate;
+};
+
+double PyFfmpegDecoder::Timebase() const
+{
+  MuxingParams params;
+  upDecoder->GetParams(params);
+  return params.videoContext.timeBase;
+};
+
+uint32_t PyFfmpegDecoder::Numframes() const
+{
+  MuxingParams params;
+  upDecoder->GetParams(params);
+  return params.videoContext.num_frames;
+};
+
 Pixel_Format PyFfmpegDecoder::PixelFormat() const
 {
   MuxingParams params;
@@ -197,7 +223,7 @@ Pixel_Format PyFfmpegDecoder::PixelFormat() const
 
 void Init_PyFFMpegDecoder(py::module& m)
 {
-  py::class_<PyFfmpegDecoder>(m, "PyFfmpegDecoder")
+  py::class_<PyFfmpegDecoder, shared_ptr<PyFfmpegDecoder>>(m, "PyFfmpegDecoder")
       .def(py::init<const string&, const map<string, string>&, uint32_t>(),
            py::arg("input"), py::arg("opts"), py::arg("gpu_id") = 0,
            R"pbdoc(
@@ -206,22 +232,33 @@ void Init_PyFFMpegDecoder(py::module& m)
         :param input: path to input file
         :param opts: AVDictionary options that will be passed to AVFormat context.
     )pbdoc")
-      .def("DecodeSingleFrame", &PyFfmpegDecoder::DecodeSingleFrame,
-           py::arg("frame"), py::call_guard<py::gil_scoped_release>(),
-           R"pbdoc(
+      .def(
+          "DecodeSingleFrame",
+          [](shared_ptr<PyFfmpegDecoder> self, py::array_t<uint8_t>& frame) {
+            TaskExecDetails details;
+            auto res = self->DecodeSingleFrame(frame, details);
+            return std::make_tuple(res, details.info);
+          },
+          py::arg("frame"), py::call_guard<py::gil_scoped_release>(),
+          R"pbdoc(
         Decode single video frame from input file.
 
         :param frame: decoded video frame
-        :return: True in case of success, False otherwise
+        :return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo.
     )pbdoc")
-      .def("DecodeSingleSurface", &PyFfmpegDecoder::DecodeSingleSurface,
-           py::return_value_policy::take_ownership,
-           py::call_guard<py::gil_scoped_release>(),
-           R"pbdoc(
+      .def(
+          "DecodeSingleSurface",
+          [](shared_ptr<PyFfmpegDecoder> self) {
+            TaskExecDetails details;
+            auto res = self->DecodeSingleSurface(details);
+            return std::make_tuple(res, details.info);
+          },
+          py::return_value_policy::take_ownership,
+          py::call_guard<py::gil_scoped_release>(),
+          R"pbdoc(
         Decode single video frame from input file and upload to GPU memory.
 
-        :return: Surface allocated in GPU memory. It's Empty() in case of failure,
-        non-empty otherwise.
+        :return: tuple, first element is the surface, second is TaskExecInfo.
     )pbdoc")
       .def("GetMotionVectors", &PyFfmpegDecoder::GetMotionVectors,
            py::return_value_policy::move,
@@ -246,6 +283,19 @@ void Init_PyFFMpegDecoder(py::module& m)
       .def("Framerate", &PyFfmpegDecoder::Framerate,
            R"pbdoc(
         Return encoded video file framerate.
+    )pbdoc")
+      .def("AvgFramerate", &PyFfmpegDecoder::AvgFramerate,
+           R"pbdoc(
+        Return encoded video file average framerate.
+    )pbdoc")
+      .def("Timebase", &PyFfmpegDecoder::Timebase,
+           R"pbdoc(
+        Return encoded video file time base.
+    )pbdoc")
+      .def("Numframes", &PyFfmpegDecoder::Numframes,
+           R"pbdoc(
+        Return number of video frames in encoded video file.
+        Please note that some video containers doesn't store this infomation.
     )pbdoc")
       .def("ColorSpace", &PyFfmpegDecoder::Color_Space,
            R"pbdoc(

diff --git a/src/PyNvCodec/src/PyNvCodec.cpp b/src/PyNvCodec/src/PyNvCodec.cpp
@@ -237,6 +237,14 @@ PYBIND11_MODULE(_PyNvCodec, m)
       .value("P12", Pixel_Format::P12)
       .export_values();
 
+  py::enum_<TaskExecInfo>(m, "TaskExecInfo")      
+      .value("FAIL", TaskExecInfo::FAIL)
+      .value("SUCCESS", TaskExecInfo::SUCCESS)
+      .value("END_OF_STREAM", TaskExecInfo::END_OF_STREAM)
+      .value("MORE_DATA_NEEDED", TaskExecInfo::MORE_DATA_NEEDED)
+      .value("BIT_DEPTH_NOT_SUPPORTED", TaskExecInfo::BIT_DEPTH_NOT_SUPPORTED)
+      .export_values();
+
   py::enum_<ColorSpace>(m, "ColorSpace")
       .value("BT_601", ColorSpace::BT_601)
       .value("BT_709", ColorSpace::BT_709)
@@ -328,6 +336,10 @@ PYBIND11_MODULE(_PyNvCodec, m)
         return ss.str();
       });
 
+  py::class_<TaskExecDetails, shared_ptr<TaskExecDetails>>(m, "TaskExecDetails")
+      .def(py::init<>())
+      .def_readwrite("info", &TaskExecDetails::info);
+
   py::class_<ColorspaceConversionContext,
              shared_ptr<ColorspaceConversionContext>>(
       m, "ColorspaceConversionContext")