Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

Resolves issue #540 #542

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# , "PyOpenGL-accelerate" # does not compile on 3.10
"dev": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", f"PytorchNvCodec @ file://{os.getcwd()}/src/PytorchNvCodec/"],
"samples": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "onnx", "tensorrt", "tqdm", PytorchNvCodec],
"tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", PytorchNvCodec],
"tests": ["pycuda", "pyopengl", "torch", "torchvision", "opencv-python", "pycuda", "pydantic", PytorchNvCodec],
"torch": ["torch", "torchvision", PytorchNvCodec],
"tensorrt": ["torch", "torchvision", PytorchNvCodec],
},
Expand Down
63 changes: 48 additions & 15 deletions src/PyNvCodec/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
from typing import Any, ClassVar, Dict
from typing import Any, ClassVar, Dict, Tuple

from typing import overload
import numpy
ASYNC_ENCODE_SUPPORT: NV_ENC_CAPS
BGR: PixelFormat
BIT_DEPTH_MINUS_8: NV_DEC_CAPS
BIT_DEPTH_NOT_SUPPORTED: TaskExecInfo
BT_601: ColorSpace
BT_709: ColorSpace
DYNAMIC_QUERY_ENCODER_CAPACITY: NV_ENC_CAPS
END_OF_STREAM: TaskExecInfo
EXACT_FRAME: SeekMode
EXPOSED_COUNT: NV_ENC_CAPS
FAIL: TaskExecInfo
H264: CudaVideoCodec
HEIGHT_MAX: NV_ENC_CAPS
HEIGHT_MIN: NV_ENC_CAPS
Expand All @@ -28,6 +31,7 @@ MB_NUM_MAX: NV_ENC_CAPS
MB_PER_SEC_MAX: NV_ENC_CAPS
MIN_HEIGHT: NV_DEC_CAPS
MIN_WIDTH: NV_DEC_CAPS
MORE_DATA_NEEDED: TaskExecInfo
MPEG: ColorRange
NO_PTS: int
NUM_MAX_BFRAMES: NV_ENC_CAPS
Expand All @@ -44,6 +48,7 @@ RGB_32F: PixelFormat
RGB_32F_PLANAR: PixelFormat
RGB_PLANAR: PixelFormat
SEPARATE_COLOUR_PLANE: NV_ENC_CAPS
SUCCESS: TaskExecInfo
SUPPORTED_RATECONTROL_MODES: NV_ENC_CAPS
SUPPORT_10BIT_ENCODE: NV_ENC_CAPS
SUPPORT_ADAPTIVE_TRANSFORM: NV_ENC_CAPS
Expand Down Expand Up @@ -351,15 +356,18 @@ class PyFFmpegDemuxer:

class PyFfmpegDecoder:
def __init__(self, input: str, opts: Dict[str,str], gpu_id: int = ...) -> None: ...
def AvgFramerate(self) -> float: ...
def Codec(self) -> CudaVideoCodec: ...
def ColorRange(self) -> ColorRange: ...
def ColorSpace(self) -> ColorSpace: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
def DecodeSingleSurface(self, *args, **kwargs) -> Any: ...
def Format(self) -> PixelFormat: ...
def Framerate(self) -> float: ...
def GetMotionVectors(self) -> numpy.ndarray[MotionVector]: ...
def Height(self) -> int: ...
def Numframes(self) -> int: ...
def Timebase(self) -> float: ...
def Width(self) -> int: ...

class PyFrameUploader:
Expand Down Expand Up @@ -388,33 +396,33 @@ class PyNvDecoder:
def ColorRange(self) -> ColorRange: ...
def ColorSpace(self) -> ColorSpace: ...
@overload
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> bool: ...
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> bool: ...
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
def DecodeFrameFromPacket(self, frame: numpy.ndarray[numpy.uint8], packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], sei: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext) -> Tuple[bool,TaskExecInfo]: ...
@overload
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> bool: ...
def DecodeSingleFrame(self, frame: numpy.ndarray[numpy.uint8], seek_context: SeekContext, pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
def DecodeSingleSurface(self, *args, **kwargs) -> Any: ...
def DecodeSurfaceFromPacket(self, enc_packet_data: PacketData, packet: numpy.ndarray[numpy.uint8], pkt_data: PacketData, bool_nvcv_check: bool) -> object: ...
@overload
def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> bool: ...
def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8]) -> Tuple[bool,TaskExecInfo]: ...
@overload
def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> bool: ...
def FlushSingleFrame(self, frame: numpy.ndarray[numpy.uint8], pkt_data: PacketData) -> Tuple[bool,TaskExecInfo]: ...
def FlushSingleSurface(self, *args, **kwargs) -> Any: ...
def Format(self) -> PixelFormat: ...
def Framerate(self) -> float: ...
Expand Down Expand Up @@ -582,5 +590,30 @@ class SurfacePlane:
def Pitch(self) -> int: ...
def Width(self) -> int: ...

class TaskExecDetails:
info: TaskExecInfo
def __init__(self) -> None: ...

class TaskExecInfo:
__members__: ClassVar[dict] = ... # read-only
BIT_DEPTH_NOT_SUPPORTED: ClassVar[TaskExecInfo] = ...
END_OF_STREAM: ClassVar[TaskExecInfo] = ...
FAIL: ClassVar[TaskExecInfo] = ...
MORE_DATA_NEEDED: ClassVar[TaskExecInfo] = ...
SUCCESS: ClassVar[TaskExecInfo] = ...
__entries: ClassVar[dict] = ...
def __init__(self, value: int) -> None: ...
def __eq__(self, other: object) -> bool: ...
def __getstate__(self) -> int: ...
def __hash__(self) -> int: ...
def __index__(self) -> int: ...
def __int__(self) -> int: ...
def __ne__(self, other: object) -> bool: ...
def __setstate__(self, state: int) -> None: ...
@property
def name(self) -> str: ...
@property
def value(self) -> int: ...

def GetNumGpus() -> int: ...
def GetNvencParams() -> Dict[str,str]: ...
27 changes: 15 additions & 12 deletions src/PyNvCodec/inc/PyNvCodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,14 +287,17 @@ class PyFfmpegDecoder {
const std::map<std::string, std::string> &ffmpeg_options,
uint32_t gpuID);

bool DecodeSingleFrame(py::array_t<uint8_t> &frame);
std::shared_ptr<Surface> DecodeSingleSurface();
bool DecodeSingleFrame(py::array_t<uint8_t> &frame, TaskExecDetails& details);
std::shared_ptr<Surface> DecodeSingleSurface(TaskExecDetails& details);

py::array_t<MotionVector> GetMotionVectors();

uint32_t Width() const;
uint32_t Height() const;
double Framerate() const;
double AvgFramerate() const;
double Timebase() const;
uint32_t Numframes() const;
ColorSpace Color_Space() const;
ColorRange Color_Range() const;
cudaVideoCodec Codec() const;
Expand Down Expand Up @@ -343,12 +346,11 @@ class PyNvDecoder {
const std::map<std::string, std::string> &ffmpeg_options):
PyNvDecoder(pathToFile, (CUcontext)ctx, (CUstream)str, ffmpeg_options){}

static Buffer *getElementaryVideo(DemuxFrame *demuxer,
SeekContext *seek_ctx, bool needSEI);
Buffer* getElementaryVideo(SeekContext* seek_ctx, TaskExecDetails& details,
bool needSEI);

static Surface *getDecodedSurface(NvdecDecodeFrame *decoder,
DemuxFrame *demuxer,
SeekContext *seek_ctx, bool needSEI);
Surface* getDecodedSurface(SeekContext* seek_ctx, TaskExecDetails& details,
bool needSEI);

uint32_t Width() const;

Expand All @@ -374,13 +376,14 @@ class PyNvDecoder {

Pixel_Format GetPixelFormat() const;

bool DecodeSurface(class DecodeContext &ctx);
bool DecodeSurface(class DecodeContext& ctx, TaskExecDetails& details);

bool DecodeFrame(class DecodeContext &ctx, py::array_t<uint8_t>& frame);
bool DecodeFrame(class DecodeContext& ctx, TaskExecDetails& details,
py::array_t<uint8_t>& frame);

Surface *getDecodedSurfaceFromPacket(const py::array_t<uint8_t> *pPacket,
const PacketData *p_packet_data = nullptr,
bool no_eos = false);
Surface* getDecodedSurfaceFromPacket(
const py::array_t<uint8_t>* pPacket, TaskExecDetails& details,
const PacketData* p_packet_data = nullptr, bool no_eos = false);

void DownloaderLazyInit();

Expand Down
82 changes: 66 additions & 16 deletions src/PyNvCodec/src/PyFFMpegDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,38 @@ PyFfmpegDecoder::PyFfmpegDecoder(const string& pathToFile,
upDecoder.reset(FfmpegDecodeFrame::Make(pathToFile.c_str(), cli_iface));
}

bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t<uint8_t>& frame)
bool PyFfmpegDecoder::DecodeSingleFrame(py::array_t<uint8_t>& frame,
TaskExecDetails& details)
{
UpdateState();

if (TASK_EXEC_SUCCESS == upDecoder->Execute()) {
auto ret = upDecoder->Execute();
details = upDecoder->GetLastExecDetails();

if (TASK_EXEC_SUCCESS == ret) {
auto pRawFrame = (Buffer*)upDecoder->GetOutput(0U);
if (pRawFrame) {
auto const frame_size = pRawFrame->GetRawMemSize();
if (frame_size != frame.size()) {
frame.resize({frame_size}, false);
}

memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size);
memcpy(frame.mutable_data(), pRawFrame->GetRawMemPtr(), frame_size);
return true;
}
}

return false;
}

std::shared_ptr<Surface> PyFfmpegDecoder::DecodeSingleSurface()
std::shared_ptr<Surface>
PyFfmpegDecoder::DecodeSingleSurface(TaskExecDetails& details)
{
py::array_t<uint8_t> frame;
std::shared_ptr<Surface> p_surf = nullptr;

UploaderLazyInit();
if (DecodeSingleFrame(frame)) {
if (DecodeSingleFrame(frame, details)) {
p_surf = upUploader->UploadSingleFrame(frame);
}

Expand Down Expand Up @@ -188,6 +193,27 @@ cudaVideoCodec PyFfmpegDecoder::Codec() const
return params.videoContext.codec;
};

double PyFfmpegDecoder::AvgFramerate() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.avgFrameRate;
};

double PyFfmpegDecoder::Timebase() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.timeBase;
};

uint32_t PyFfmpegDecoder::Numframes() const
{
MuxingParams params;
upDecoder->GetParams(params);
return params.videoContext.num_frames;
};

Pixel_Format PyFfmpegDecoder::PixelFormat() const
{
MuxingParams params;
Expand All @@ -197,7 +223,7 @@ Pixel_Format PyFfmpegDecoder::PixelFormat() const

void Init_PyFFMpegDecoder(py::module& m)
{
py::class_<PyFfmpegDecoder>(m, "PyFfmpegDecoder")
py::class_<PyFfmpegDecoder, shared_ptr<PyFfmpegDecoder>>(m, "PyFfmpegDecoder")
.def(py::init<const string&, const map<string, string>&, uint32_t>(),
py::arg("input"), py::arg("opts"), py::arg("gpu_id") = 0,
R"pbdoc(
Expand All @@ -206,22 +232,33 @@ void Init_PyFFMpegDecoder(py::module& m)
:param input: path to input file
:param opts: AVDictionary options that will be passed to AVFormat context.
)pbdoc")
.def("DecodeSingleFrame", &PyFfmpegDecoder::DecodeSingleFrame,
py::arg("frame"), py::call_guard<py::gil_scoped_release>(),
R"pbdoc(
.def(
"DecodeSingleFrame",
[](shared_ptr<PyFfmpegDecoder> self, py::array_t<uint8_t>& frame) {
TaskExecDetails details;
auto res = self->DecodeSingleFrame(frame, details);
return std::make_tuple(res, details.info);
},
py::arg("frame"), py::call_guard<py::gil_scoped_release>(),
R"pbdoc(
Decode single video frame from input file.

:param frame: decoded video frame
:return: True in case of success, False otherwise
:return: tuple, first element is True in case of success, False otherwise. Second elements is TaskExecInfo.
)pbdoc")
.def("DecodeSingleSurface", &PyFfmpegDecoder::DecodeSingleSurface,
py::return_value_policy::take_ownership,
py::call_guard<py::gil_scoped_release>(),
R"pbdoc(
.def(
"DecodeSingleSurface",
[](shared_ptr<PyFfmpegDecoder> self) {
TaskExecDetails details;
auto res = self->DecodeSingleSurface(details);
return std::make_tuple(res, details.info);
},
py::return_value_policy::take_ownership,
py::call_guard<py::gil_scoped_release>(),
R"pbdoc(
Decode single video frame from input file and upload to GPU memory.

:return: Surface allocated in GPU memory. It's Empty() in case of failure,
non-empty otherwise.
:return: tuple, first element is the surface, second is TaskExecInfo.
)pbdoc")
.def("GetMotionVectors", &PyFfmpegDecoder::GetMotionVectors,
py::return_value_policy::move,
Expand All @@ -246,6 +283,19 @@ void Init_PyFFMpegDecoder(py::module& m)
.def("Framerate", &PyFfmpegDecoder::Framerate,
R"pbdoc(
Return encoded video file framerate.
)pbdoc")
.def("AvgFramerate", &PyFfmpegDecoder::AvgFramerate,
R"pbdoc(
Return encoded video file average framerate.
)pbdoc")
.def("Timebase", &PyFfmpegDecoder::Timebase,
R"pbdoc(
Return encoded video file time base.
)pbdoc")
.def("Numframes", &PyFfmpegDecoder::Numframes,
R"pbdoc(
Return number of video frames in encoded video file.
Please note that some video containers doesn't store this infomation.
)pbdoc")
.def("ColorSpace", &PyFfmpegDecoder::Color_Space,
R"pbdoc(
Expand Down
12 changes: 12 additions & 0 deletions src/PyNvCodec/src/PyNvCodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ PYBIND11_MODULE(_PyNvCodec, m)
.value("P12", Pixel_Format::P12)
.export_values();

py::enum_<TaskExecInfo>(m, "TaskExecInfo")
.value("FAIL", TaskExecInfo::FAIL)
.value("SUCCESS", TaskExecInfo::SUCCESS)
.value("END_OF_STREAM", TaskExecInfo::END_OF_STREAM)
.value("MORE_DATA_NEEDED", TaskExecInfo::MORE_DATA_NEEDED)
.value("BIT_DEPTH_NOT_SUPPORTED", TaskExecInfo::BIT_DEPTH_NOT_SUPPORTED)
.export_values();

py::enum_<ColorSpace>(m, "ColorSpace")
.value("BT_601", ColorSpace::BT_601)
.value("BT_709", ColorSpace::BT_709)
Expand Down Expand Up @@ -328,6 +336,10 @@ PYBIND11_MODULE(_PyNvCodec, m)
return ss.str();
});

py::class_<TaskExecDetails, shared_ptr<TaskExecDetails>>(m, "TaskExecDetails")
.def(py::init<>())
.def_readwrite("info", &TaskExecDetails::info);

py::class_<ColorspaceConversionContext,
shared_ptr<ColorspaceConversionContext>>(
m, "ColorspaceConversionContext")
Expand Down
Loading