Skip to content

Commit

Permalink
#2183: ldms: write out LB stats instread of data, add freq check
Browse files Browse the repository at this point in the history
  • Loading branch information
lifflander committed Nov 9, 2023
1 parent 546f53c commit 1c98fba
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,12 @@ void LBManager::commitPhaseStatistics(PhaseType phase) {
auto writer = static_cast<JSONAppender*>(statistics_writer_.get());
writer->stageObject(j);
writer->commitStaged();

#if vt_check_enabled(ldms)
j["ts"] = MPI_Wtime();

theNodeLBData()->writeJSONToLDMS(j);
#endif
}

balance::LoadData reduceVec(
Expand Down
17 changes: 16 additions & 1 deletion src/vt/vrt/collection/balance/node_lb_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ void NodeLBData::initialize() {
#endif

#if vt_check_enabled(ldms)
if (auto ldms_freq = getenv("VT_LDMS_MILLI_FREQ")) {
ldms_milli_freq_ = atoi(ldms_freq);
}
const auto xPtr = getenv("VT_LDMS_XPTR");
const auto auth = getenv("VT_LDMS_AUTH");
ldms_ = ldms_xprt_new_with_auth(xPtr, auth, NULL);
Expand Down Expand Up @@ -305,11 +308,23 @@ void NodeLBData::outputLBDataForPhase(PhaseType phase) {
auto j = lb_data_->toJson(phase);
auto writer = static_cast<JSONAppender*>(lb_data_writer_.get());
writer->addElm(*j);
}

void NodeLBData::writeJSONToLDMS(nlohman::json& j) {
#if vt_check_enabled(ldms)
if (ldms_prev_submission_ == 0) {
ldms_prev_submission_ = MPI_Wtime();
} else if (
(MPI_Wtime() - ldms_prev_submission_) * 1000.0 < (double)ldms_milli_freq_
) {
return;
} else {
ldms_prev_submission_ = MPI_Wtime();
}

auto jsonStr = j->dump();
const auto returnVal = ldmsd_stream_publish(
ldms_, "LB_data", LDMSD_STREAM_JSON, jsonStr.c_str(), jsonStr.length() + 1
ldms_, "vtLBStats", LDMSD_STREAM_JSON, jsonStr.c_str(), jsonStr.length() + 1
);
vtWarnIf(returnVal == 0, fmt::format("ldmsd_stream_publish returned {}!\n", returnVal));
#endif
Expand Down
9 changes: 9 additions & 0 deletions src/vt/vrt/collection/balance/node_lb_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,13 @@ struct NodeLBData : runtime::component::Component<NodeLBData> {
*/
LBDataHolder* getLBData() { return lb_data_.get(); }

/**
* \brief Write some JSON to the LDMS stream
*
* \param[in] j the josn to write
*/
void writeJSONToLDMS(nlohman::json& j);

template <typename SerializerT>
void serialize(SerializerT& s) {
s | proxy_
Expand Down Expand Up @@ -304,6 +311,8 @@ struct NodeLBData : runtime::component::Component<NodeLBData> {
std::unique_ptr<LBDataHolder> lb_data_ = nullptr;
#if vt_check_enabled(ldms)
ldms_xprt* ldms_ = nullptr;
int ldms_milli_freq_ = 10;
double ldms_prev_submission_ = 0;
#endif
};

Expand Down

0 comments on commit 1c98fba

Please sign in to comment.