-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
#2387: Gather hashed trace user events at the end of run #2395
base: develop
Are you sure you want to change the base?
Conversation
Pipelines resultsPR tests (gcc-12, ubuntu, mpich, verbose, kokkos) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (intel icpx, ubuntu, mpich, verbose) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-9, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-12, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (gcc-9, ubuntu, mpich, zoltan) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-11, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (gcc-10, ubuntu, openmpi, no LB) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-13, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-14, ubuntu, mpich, verbose) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-10, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (nvidia cuda 11.2, gcc-9, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (intel icpc, ubuntu, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (gcc-11, ubuntu, mpich, trace runtime, coverage) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (nvidia cuda 12.2.0, gcc-9, ubuntu, mpich, verbose) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
PR tests (clang-13, alpine, mpich) Build for 56b67a9 (2025-02-14 16:19:23 UTC)
|
1742430
to
7ee3b85
Compare
ef7edca
to
9e40158
Compare
067d4a3
to
27b7d4c
Compare
This PR does not compile in
Is it possible to use |
No, because in |
8665c22
to
444d54e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For what it's worth, I can reproduce the segfault easily:
Thread 1 "hello_world" received signal SIGSEGV, Segmentation fault.
vt::objgroup::ObjGroupManager::makeCollectiveImpl (this=0x0, label="Trace", base=std::unique_ptr<vt::objgroup::holder::HolderBase> = {...}, obj_ptr=0x3096ed0)
at /home/cz4rs/code/vt/src/vt/objgroup/manager.cc:89
89 auto const id = cur_obj_id_++;
(gdb) bt
#0 vt::objgroup::ObjGroupManager::makeCollectiveImpl (this=0x0, label="Trace", base=std::unique_ptr<vt::objgroup::holder::HolderBase> = {...}, obj_ptr=0x3096ed0)
at /home/cz4rs/code/vt/src/vt/objgroup/manager.cc:89
#1 0x0000000001c2dcf6 in vt::objgroup::ObjGroupManager::makeCollectiveObj<vt::trace::Trace> (this=0x0, label="Trace", obj=0x3096ed0,
holder=std::unique_ptr<vt::objgroup::holder::HolderBase> = {...}) at /home/cz4rs/code/vt/src/vt/objgroup/manager.impl.h:107
#2 0x0000000001c2a022 in vt::objgroup::ObjGroupManager::makeCollective<vt::trace::Trace> (this=0x0, obj=0x3096ed0, label="Trace")
at /home/cz4rs/code/vt/src/vt/objgroup/manager.impl.h:82
#3 0x0000000001bb309f in vt::trace::Trace::construct (in_prog_name="hello_world") at /home/cz4rs/code/vt/src/vt/trace/trace.cc:133
#4 0x00000000016a6f30 in vt::runtime::component::ComponentConstructor<vt::trace::Trace, void, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>::apply<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&> (args="hello_world")
at /home/cz4rs/code/vt/src/vt/runtime/component/component.h:73
#5 0x00000000016a6ee0 in vt::runtime::component::Component<vt::trace::Trace>::staticInit<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&> (args="hello_world") at /home/cz4rs/code/vt/src/vt/runtime/component/component.h:123
#6 0x0000000001672470 in vt::runtime::component::(anonymous namespace)::tupleConsImpl<vt::trace::Trace, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>, 0ul> (tup=..., seq=...) at /home/cz4rs/code/vt/src/vt/runtime/component/component_pack.impl.h:57
#7 0x0000000001672430 in vt::runtime::component::(anonymous namespace)::tupleCons<vt::trace::Trace, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&> > (tup=...) at /home/cz4rs/code/vt/src/vt/runtime/component/component_pack.impl.h:64
#8 0x00000000016a6aec in vt::runtime::component::ComponentPack::registerComponent<vt::trace::Trace, vt::ctx::Context, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(vt::trace::Trace**, vt::runtime::component::BaseComponent::DepsPack<vt::ctx::Context>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::{lambda()#1}::operator()() (this=0x3075488) at /home/cz4rs/code/vt/src/vt/runtime/component/component_pack.impl.h:93
#9 0x00000000016a6ab9 in vt::runtime::component::MovableFnTyped<vt::runtime::component::ComponentPack::registerComponent<vt::trace::Trace, vt::ctx::Context, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(vt::trace::Trace**, vt::runtime::component::BaseComponent::DepsPack<vt::ctx::Context>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)::{lambda()#1}>::invoke() (this=0x3075480)
at /home/cz4rs/code/vt/src/vt/runtime/component/movable_fn.h:78
#10 0x000000000179d6af in vt::runtime::component::ComponentPack::construct (this=0x3067d20) at /home/cz4rs/code/vt/src/vt/runtime/component/component_pack.cc:63
#11 0x000000000165facb in vt::runtime::Runtime::initializeComponents (this=0x2f1f510) at /home/cz4rs/code/vt/src/vt/runtime/runtime.cc:963
#12 0x000000000165eca8 in vt::runtime::Runtime::initialize (this=0x2f1f510, force_now=true) at /home/cz4rs/code/vt/src/vt/runtime/runtime.cc:453
#13 0x000000000165ec5e in vt::runtime::Runtime::tryInitialize (this=0x2f1f510) at /home/cz4rs/code/vt/src/vt/runtime/runtime.cc:402
#14 0x000000000165ee4a in vt::runtime::Runtime::initialize (this=0x2f1f510, force_now=false) at /home/cz4rs/code/vt/src/vt/runtime/runtime.cc:491
#15 0x00000000016586f6 in vt::CollectiveAnyOps<(vt::runtime::eRuntimeInstance)0>::initialize (argc=@0x7fffffffc988: 1, argv=@0x7fffffffc980: 0x307dea0, is_interop=false,
comm=0x0, appConfig=0x0) at /home/cz4rs/code/vt/src/vt/collective/collective_ops.cc:238
#16 0x000000000165a35d in vt::initialize (argc=@0x7fffffffc988: 1, argv=@0x7fffffffc980: 0x307dea0, comm=0x0, appConfig=0x0)
at /home/cz4rs/code/vt/src/vt/collective/startup.cc:78
#17 0x0000000001609692 in main (argc=1, argv=0x307dea0) at /home/cz4rs/code/vt/examples/hello_world/hello_world.cc:52
@cwschilly Let me know if you need help debugging the failures. I'll try to do a proper review soon-ish. |
b55a195
to
9d4a193
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ready to merge when it passes!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…tempt allreduce at end of run
9d4a193
to
a7343f4
Compare
diff --git a/src/vt/pipe/callback/cb_union/cb_raw_base.fwd.h b/src/vt/pipe/callback/cb_union/cb_raw_base.fwd.h
index 416fb3403..974ac0695 100644
--- a/src/vt/pipe/callback/cb_union/cb_raw_base.fwd.h
+++ b/src/vt/pipe/callback/cb_union/cb_raw_base.fwd.h
@@ -44,9 +44,11 @@
#if !defined INCLUDED_VT_PIPE_CALLBACK_CB_UNION_CB_RAW_BASE_FWD_H
#define INCLUDED_VT_PIPE_CALLBACK_CB_UNION_CB_RAW_BASE_FWD_H
-namespace vt { namespace pipe { namespace callback { namespace cbunion {
+namespace vt {
+namespace pipe { namespace callback { namespace cbunion {
-template <typename... Args> struct CallbackTyped;
+template <typename... Args>
+struct CallbackTyped;
struct CallbackRawBaseSingle;
diff --git a/src/vt/runtime/component/component_pack.cc b/src/vt/runtime/component/component_pack.cc
index 6775c83bb..00a0f8941 100644
--- a/src/vt/runtime/component/component_pack.cc
+++ b/src/vt/runtime/component/component_pack.cc
@@ -143,7 +143,9 @@ std::list<int> ComponentPack::topoSort() {
return order;
}
-void ComponentPack::topoSortImpl(int v, std::list<int>& order, bool* visited, bool* visiting) {
+void ComponentPack::topoSortImpl(
+ int v, std::list<int>& order, bool* visited, bool* visiting
+) {
//fmt::print("impl v={}\n",v);
vtAbortIf(visiting[v] == true, "Already visiting this node, cycle detected");
visiting[v] = true;
diff --git a/src/vt/runtime/component/component_pack.h b/src/vt/runtime/component/component_pack.h
index 2b2fa8e41..32a49bef9 100644
--- a/src/vt/runtime/component/component_pack.h
+++ b/src/vt/runtime/component/component_pack.h
@@ -162,7 +162,8 @@ private:
* \param[in] visited array of visited vertices
* \param[in] visiting array of currently visiting vertices
*/
- void topoSortImpl(int v, std::list<int>& order, bool* visited, bool* visiting);
+ void
+ topoSortImpl(int v, std::list<int>& order, bool* visited, bool* visiting);
/**
* \internal \brief Detect cycles in the dependence graph
diff --git a/src/vt/runtime/runtime.cc b/src/vt/runtime/runtime.cc
index 083468bc9..cb330adee 100644
--- a/src/vt/runtime/runtime.cc
+++ b/src/vt/runtime/runtime.cc
@@ -749,10 +749,11 @@ void Runtime::initializeComponents() {
# if vt_check_enabled(trace_enabled)
// The Trace and Scheduler components have a co-dependency. However,
// the lifetime of theTrace should be longer than that of theSched.
- p_->registerComponent<trace::Trace>(&theTrace, Deps<
- ctx::Context, // Everything depends on theContext
- objgroup::ObjGroupManager
- >{},
+ p_->registerComponent<trace::Trace>(
+ &theTrace,
+ Deps<
+ ctx::Context, // Everything depends on theContext
+ objgroup::ObjGroupManager>{},
prog_name
);
# endif
@@ -768,12 +769,12 @@ void Runtime::initializeComponents() {
#endif
p_->registerComponent<objgroup::ObjGroupManager>(
- &theObjGroup, Deps<
- ctx::Context // Everything depends on theContext
+ &theObjGroup,
+ Deps<ctx::Context // Everything depends on theContext
- // Break this dependency for startup ordering
- // messaging::ActiveMessenger // Depends on active messenger to send
- >{}
+ // Break this dependency for startup ordering
+ // messaging::ActiveMessenger // Depends on active messenger to send
+ >{}
);
p_->registerComponent<messaging::ActiveMessenger>(
diff --git a/src/vt/trace/trace.cc b/src/vt/trace/trace.cc
index e98bae813..675c68e32 100644
--- a/src/vt/trace/trace.cc
+++ b/src/vt/trace/trace.cc
@@ -127,14 +127,13 @@ void Trace::setProxy(objgroup::proxy::Proxy<Trace> in_proxy) {
}
#endif
-/*static*/ std::unique_ptr<Trace> Trace::construct(std::string const& in_prog_name) {
+/*static*/ std::unique_ptr<Trace>
+Trace::construct(std::string const& in_prog_name) {
auto ptr = std::make_unique<Trace>(in_prog_name);
- #if !vt_check_enabled(trace_only)
- auto proxy = theObjGroup()->makeCollective<Trace>(
- ptr.get(), "Trace"
- );
+#if !vt_check_enabled(trace_only)
+ auto proxy = theObjGroup()->makeCollective<Trace>(ptr.get(), "Trace");
proxy.get()->setProxy(proxy);
- #endif
+#endif
return ptr;
}
@@ -219,9 +218,9 @@ void Trace::setUserEvents(const UserEventRegistry& events) {
}
void Trace::gatherUserEvents() {
- #if !vt_check_enabled(trace_only)
+#if !vt_check_enabled(trace_only)
proxy_.reduce<&reducedEventsHan, vt::collective::PlusOp>(0, user_event_);
- #endif
+#endif
}
UserEventIDType Trace::registerUserEventRoot(std::string const& name) {
@@ -241,10 +240,12 @@ void Trace::registerUserEventManual(
void reducedEventsHan(
[[maybe_unused]] const UserEventRegistry& gathered_user_events
) {
- #if vt_check_enabled(trace_enabled)
- vtAssert(theContext()->getNode() == 0, "User events must be gathered on node 0");
+#if vt_check_enabled(trace_enabled)
+ vtAssert(
+ theContext()->getNode() == 0, "User events must be gathered on node 0"
+ );
theTrace()->setUserEvents(gathered_user_events);
- #endif
+#endif
}
void insertNewUserEvent(
diff --git a/src/vt/trace/trace.h b/src/vt/trace/trace.h
index 88fc1ef3a..393cb1a97 100644
--- a/src/vt/trace/trace.h
+++ b/src/vt/trace/trace.h
@@ -135,9 +135,9 @@ struct Trace : runtime::component::Component<Trace>, TraceLite {
void startup() override;
void finalize() override;
- #if !vt_check_enabled(trace_only)
+#if !vt_check_enabled(trace_only)
void setProxy(objgroup::proxy::Proxy<Trace> in_proxy);
- #endif
+#endif
/**
* \brief Initiate a paired processing event.
@@ -407,31 +407,15 @@ struct Trace : runtime::component::Component<Trace>, TraceLite {
template <typename SerializerT>
void serialize(SerializerT& s) {
- s | incremental_flush_mode_
- | traces_
- | open_events_
- | event_holds_
- | cur_event_
- | enabled_
- | idle_begun_
- | start_time_
- | user_event_
- | prog_name_
- | trace_name_
- | full_trace_name_
- | full_sts_name_
- | full_dir_name_
- | wrote_sts_file_
- | trace_write_count_
- | spec_proxy_
- #if !vt_check_enabled(trace_only)
+ s | incremental_flush_mode_ | traces_ | open_events_ | event_holds_ |
+ cur_event_ | enabled_ | idle_begun_ | start_time_ | user_event_ |
+ prog_name_ | trace_name_ | full_trace_name_ | full_sts_name_ |
+ full_dir_name_ | wrote_sts_file_ | trace_write_count_ | spec_proxy_
+#if !vt_check_enabled(trace_only)
| proxy_
- #endif
- | trace_enabled_cur_phase_
- | flush_event_
- | between_sched_event_type_
- | between_sched_event_
- | inside_invoke_context_;
+#endif
+ | trace_enabled_cur_phase_ | flush_event_ | between_sched_event_type_ |
+ between_sched_event_ | inside_invoke_context_;
s.skip(log_file_); // definition unavailable
}
@@ -445,10 +429,10 @@ private:
ObjGroupProxyType spec_proxy_ = vt::no_obj_group;
- #if !vt_check_enabled(trace_only)
+#if !vt_check_enabled(trace_only)
// Objgroup proxy
objgroup::proxy::Proxy<Trace> proxy_;
- #endif
+#endif
// Processing event between top-level loops.
TraceEntryIDType between_sched_event_type_ = no_trace_entry_id;
diff --git a/src/vt/trace/trace_user_event.cc b/src/vt/trace/trace_user_event.cc
index e0a13179d..269794027 100644
--- a/src/vt/trace/trace_user_event.cc
+++ b/src/vt/trace/trace_user_event.cc
@@ -126,15 +126,13 @@ bool UserEventRegistry::insertEvent(
std::forward_as_tuple(name)
);
return true;
- } else if (user_event_[event] != name){
+ } else if (user_event_[event] != name) {
user_event_[event] += " COLLISION " + name;
}
return false;
}
-UserEventRegistry operator+(
- UserEventRegistry r1, UserEventRegistry const& r2
-) {
+UserEventRegistry operator+(UserEventRegistry r1, UserEventRegistry const& r2) {
for (auto& [hash, event_str] : r2.getEvents()) {
r1.insertEvent(hash, event_str);
}
diff --git a/src/vt/trace/trace_user_event.h b/src/vt/trace/trace_user_event.h
index de4f0bad5..07f10553e 100644
--- a/src/vt/trace/trace_user_event.h
+++ b/src/vt/trace/trace_user_event.h
@@ -123,7 +123,8 @@ struct UserEventRegistry {
friend void insertNewUserEvent(UserEventIDType event, std::string const& name);
- friend UserEventRegistry operator+(UserEventRegistry r1, UserEventRegistry const& r2);
+ friend UserEventRegistry
+ operator+(UserEventRegistry r1, UserEventRegistry const& r2);
template <typename Serializer>
void serialize(Serializer& s) { |
Fixes #2387