-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor seeking to only store pts as int64 timestamp #262
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -647,26 +647,25 @@ bool VideoDecoder::canWeAvoidSeekingForStream( | |
// AVFormatContext if it is needed. We can skip seeking in certain cases. See | ||
// the comment of canWeAvoidSeeking() for details. | ||
void VideoDecoder::maybeSeekToBeforeDesiredPts() { | ||
TORCH_CHECK( | ||
hasDesiredPts_, | ||
"maybeSeekToBeforeDesiredPts() called when hasDesiredPts_ is false"); | ||
if (activeStreamIndices_.size() == 0) { | ||
return; | ||
} | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streams_[streamIndex]; | ||
streamInfo.discardFramesBeforePts = | ||
*maybeDesiredPts_ * streamInfo.timeBase.den; | ||
} | ||
|
||
decodeStats_.numSeeksAttempted++; | ||
// See comment for canWeAvoidSeeking() for details on why this optimization | ||
// works. | ||
bool mustSeek = false; | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streams_[streamIndex]; | ||
int64_t desiredPtsForStream = *maybeDesiredPts_ * streamInfo.timeBase.den; | ||
if (!canWeAvoidSeekingForStream( | ||
streamInfo, streamInfo.currentPts, desiredPtsForStream)) { | ||
streamInfo, | ||
streamInfo.currentPts, | ||
*streamInfo.discardFramesBeforePts)) { | ||
VLOG(5) << "Seeking is needed for streamIndex=" << streamIndex | ||
<< " desiredPts=" << desiredPtsForStream | ||
<< " desiredPts=" << *streamInfo.discardFramesBeforePts | ||
<< " currentPts=" << streamInfo.currentPts; | ||
mustSeek = true; | ||
break; | ||
|
@@ -678,7 +677,7 @@ void VideoDecoder::maybeSeekToBeforeDesiredPts() { | |
} | ||
int firstActiveStreamIndex = *activeStreamIndices_.begin(); | ||
const auto& firstStreamInfo = streams_[firstActiveStreamIndex]; | ||
int64_t desiredPts = *maybeDesiredPts_ * firstStreamInfo.timeBase.den; | ||
int64_t desiredPts = *firstStreamInfo.discardFramesBeforePts; | ||
|
||
// For some encodings like H265, FFMPEG sometimes seeks past the point we | ||
// set as the max_ts. So we use our own index to give it the exact pts of | ||
|
@@ -718,10 +717,10 @@ VideoDecoder::RawDecodedOutput VideoDecoder::getDecodedOutputWithFilter( | |
} | ||
VLOG(9) << "Starting getDecodedOutputWithFilter()"; | ||
resetDecodeStats(); | ||
if (maybeDesiredPts_.has_value()) { | ||
VLOG(9) << "maybeDesiredPts_=" << *maybeDesiredPts_; | ||
if (hasDesiredPts_) { | ||
maybeSeekToBeforeDesiredPts(); | ||
maybeDesiredPts_ = std::nullopt; | ||
hasDesiredPts_ = false; | ||
// FIXME: should we also reset each stream info's discardFramesBeforePts? | ||
VLOG(9) << "seeking done"; | ||
} | ||
auto seekDone = std::chrono::high_resolution_clock::now(); | ||
|
@@ -988,7 +987,7 @@ VideoDecoder::DecodedOutput VideoDecoder::getFrameAtIndex( | |
validateFrameIndex(stream, frameIndex); | ||
|
||
int64_t pts = stream.allFrames[frameIndex].pts; | ||
setCursorPtsInSeconds(ptsToSeconds(pts, stream.timeBase)); | ||
setCursorPts(pts); | ||
return getNextDecodedOutputNoDemux(); | ||
} | ||
|
||
|
@@ -1010,7 +1009,7 @@ VideoDecoder::BatchDecodedOutput VideoDecoder::getFramesAtIndices( | |
"Invalid frame index=" + std::to_string(frameIndex)); | ||
} | ||
int64_t pts = stream.allFrames[frameIndex].pts; | ||
setCursorPtsInSeconds(ptsToSeconds(pts, stream.timeBase)); | ||
setCursorPts(pts); | ||
auto rawSingleOutput = getNextRawDecodedOutputNoDemux(); | ||
if (stream.colorConversionLibrary == ColorConversionLibrary::SWSCALE) { | ||
// We are using sws_scale to convert the frame to tensor. sws_scale can | ||
|
@@ -1179,7 +1178,18 @@ VideoDecoder::DecodedOutput VideoDecoder::getNextDecodedOutputNoDemux() { | |
} | ||
|
||
void VideoDecoder::setCursorPtsInSeconds(double seconds) { | ||
maybeDesiredPts_ = seconds; | ||
for (int streamIndex : activeStreamIndices_) { | ||
StreamInfo& streamInfo = streams_[streamIndex]; | ||
streamInfo.discardFramesBeforePts = seconds * streamInfo.timeBase.den; | ||
} | ||
hasDesiredPts_ = true; | ||
} | ||
|
||
void VideoDecoder::setCursorPts(int64_t pts) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know that this function makes sense without all streams having a common timebase There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it's internal, and we always know a stream index when calling it, I think it's fine if this takes a stream index. |
||
for (int streamIndex : activeStreamIndices_) { | ||
streams_[streamIndex].discardFramesBeforePts = pts; | ||
} | ||
hasDesiredPts_ = true; | ||
} | ||
|
||
VideoDecoder::DecodeStats VideoDecoder::getDecodeStats() const { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -367,6 +367,7 @@ class VideoDecoder { | |
void convertAVFrameToDecodedOutputOnCPU( | ||
RawDecodedOutput& rawOutput, | ||
DecodedOutput& output); | ||
void setCursorPts(int64_t pts); | ||
|
||
DecoderOptions options_; | ||
ContainerMetadata containerMetadata_; | ||
|
@@ -375,9 +376,9 @@ class VideoDecoder { | |
// Stores the stream indices of the active streams, i.e. the streams we are | ||
// decoding and returning to the user. | ||
std::set<int> activeStreamIndices_; | ||
// Set when the user wants to seek and stores the desired pts that the user | ||
// wants to seek to. | ||
std::optional<double> maybeDesiredPts_; | ||
// True when the user wants to seek. The actual pts values to seek to are | ||
// stored in the per-stream metadata in discardFramesBeforePts. | ||
bool hasDesiredPts_ = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seeking is hard to implement correctly and I am not confident about this change. I think at least for debugging purposes we should keep the original double value around and when returning a frame after a seek we should ensure it's >= the value passed in here |
||
|
||
// Stores various internal decoding stats. | ||
DecodeStats decodeStats_; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like the original change because its lazier -- when the user seeks a bunch of times only the last seek value should affect anything. The new change sets things eagerly
More importantly, if the user adds a stream after calling seek it doesn't seek that stream, so it's less robust too