Skip to content

Commit d98b2b0

Browse files
authored
Merge branch 'main' into fix-include
2 parents 2517f6b + 08fcb94 commit d98b2b0

File tree

5 files changed

+40
-20
lines changed

5 files changed

+40
-20
lines changed

libkineto/sample_programs/build.sh

+2
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,7 @@ g++ \
1818
-lpthread \
1919
-lcuda \
2020
-lcudart \
21+
-lcupti \
22+
-lnvperf_host \
2123
/usr/local/lib/libkineto.a \
2224
kplay_cu.o

libkineto/sample_programs/kineto_playground.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88

9+
#include <cuda_runtime.h>
910
#include <iostream>
1011
#include <string>
1112

@@ -14,6 +15,20 @@
1415
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
1516
#include "kineto_playground.cuh"
1617

18+
#define CHECK_CUDA(call) \
19+
do { \
20+
cudaError_t status = call; \
21+
if (status != cudaSuccess) { \
22+
fprintf( \
23+
stderr, \
24+
"CUDA Error at %s:%d: %s\n", \
25+
__FILE__, \
26+
__LINE__, \
27+
cudaGetErrorString(status)); \
28+
exit(1); \
29+
} \
30+
} while (0)
31+
1732
using namespace kineto;
1833

1934
static const std::string kFileName = "/tmp/kineto_playground_trace.json";
@@ -23,6 +38,7 @@ int main() {
2338
warmup();
2439

2540
// Kineto config
41+
libkineto_init(false, true);
2642

2743
// Empty types set defaults to all types
2844
std::set<libkineto::ActivityType> types;
@@ -38,6 +54,7 @@ int main() {
3854
profiler.startTrace();
3955
std::cout << "Start playground" << std::endl;
4056
playground();
57+
CHECK_CUDA(cudaDeviceSynchronize());
4158

4259
std::cout << "Stop Trace" << std::endl;
4360
auto trace = profiler.stopTrace();

libkineto/sample_programs/kineto_playground.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ __global__ void square(float* A, int N) {
7676

7777
void playground(void) {
7878
// Add your experimental CUDA implementation here.
79-
basicMemcpyFromDevice();
79+
basicMemcpyToDevice();
8080
compute();
8181
basicMemcpyFromDevice();
8282
}

libkineto/src/CuptiActivityProfiler.cpp

+10-12
Original file line numberDiff line numberDiff line change
@@ -459,12 +459,9 @@ void CuptiActivityProfiler::processCpuTrace(
459459
const std::unique_ptr<GenericTraceActivity>>::value,
460460
"handleActivity is unsafe and relies on the caller to maintain not "
461461
"only lifetime but also address stability.");
462-
if (act->type() == ActivityType::USER_ANNOTATION &&
463-
act->duration() <= 0) {
462+
if (act->duration() <= 0) {
464463
act->endTime = captureWindowEndTime_;
465464
act->addMetadata("finished", "false");
466-
} else {
467-
act->addMetadata("finished", "true");
468465
}
469466
logger.handleActivity(*act);
470467
}
@@ -581,8 +578,8 @@ inline static bool isBlockListedRuntimeCbid(CUpti_CallbackId cbid) {
581578
if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetDevice_v3020 ||
582579
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaSetDevice_v3020 ||
583580
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaGetLastError_v3020 ||
584-
// Support cudaEventRecord and cudaEventSynchronize, revisit if others are
585-
// needed
581+
// Support cudaEventRecord and cudaEventSynchronize, revisit if others
582+
// are needed
586583
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreate_v3020 ||
587584
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventCreateWithFlags_v3020 ||
588585
cbid == CUPTI_RUNTIME_TRACE_CBID_cudaEventDestroy_v3020) {
@@ -1339,20 +1336,21 @@ const time_point<system_clock> CuptiActivityProfiler::performRunLoopStep(
13391336
|| cupti_.stopCollection
13401337
#endif // HAS_CUPTI || HAS_ROCTRACER
13411338
) {
1342-
// Update runloop state first to prevent further updates to shared state
1339+
// Update runloop state first to prevent further updates to shared
1340+
// state
13431341
LOG(INFO) << "Tracing complete.";
13441342
VLOG_IF(1, currentIter >= 0)
13451343
<< "This state change was invoked by application's step() call";
13461344

13471345
// currentIter >= 0 means this is called from the step() api of
1348-
// the profile in pytorch main thread, it should be executed in another
1349-
// thread in case pytorch main thread is blocked
1346+
// the profile in pytorch main thread, it should be executed in
1347+
// another thread in case pytorch main thread is blocked
13501348
if (currentIter >= 0) {
13511349
// if collectTraceThread_ is already running, there's no need to
13521350
// execute collectTrace twice.
1353-
// Do not call collectTrace when profilerThread_ is collecting Trace.
1354-
// Otherwise, libkineto::api().client()->stop will be called twice,
1355-
// which leads to an unrecoverable ::c10:Error at
1351+
// Do not call collectTrace when profilerThread_ is collecting
1352+
// Trace. Otherwise, libkineto::api().client()->stop will be called
1353+
// twice, which leads to an unrecoverable ::c10:Error at
13561354
// disableProfiler
13571355
if (!collectTraceThread_ && !getCollectTraceState()) {
13581356
std::lock_guard<std::recursive_mutex> guard(mutex_);

libkineto/src/output_json.cpp

+10-7
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,13 @@ void ChromeTraceLogger::handleTraceStart(
131131
device_properties);
132132

133133
metadataToJSON(metadata);
134+
135+
traceOf_ << fmt::format(
136+
R"JSON(
137+
"displayTimeUnit": "ms",
138+
"baseTimeNanoseconds": {},)JSON",
139+
ChromeTraceBaseTime::singleton().get());
140+
134141
traceOf_ << R"JSON(
135142
"traceEvents": [)JSON";
136143
}
@@ -681,13 +688,9 @@ void ChromeTraceLogger::finalizeTrace(
681688
#endif // !USE_GOOGLE_LOG
682689

683690
// Putting this here because the last entry MUST not end with a comma.
684-
685-
traceOf_ << fmt::format(R"JSON(
686-
"traceName": "{}",
687-
"displayTimeUnit": "ms",
688-
"baseTimeNanoseconds": {}
689-
}})JSON", fileName_, ChromeTraceBaseTime::singleton().get());
690-
// clang-format on
691+
traceOf_ << fmt::format(R"JSON(
692+
"traceName": "{}"
693+
}})JSON", fileName_);
691694

692695
traceOf_.close();
693696
// On some systems, rename() fails if the destination file exists.

0 commit comments

Comments
 (0)