Skip to content

Commit f1bd727

Browse files
tabikatibaldurk
authored andcommitted
Add support for ARM counters
Adding support for ARM counters via a third-party lib. The main target platform is Android.
1 parent 6a2415f commit f1bd727

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+10086
-1
lines changed

qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ enum class CounterFamily
5151
Intel,
5252
NVIDIA,
5353
VulkanExtended,
54+
ARM,
5455
};
5556

5657
CounterFamily GetCounterFamily(GPUCounter counter)
@@ -71,6 +72,10 @@ CounterFamily GetCounterFamily(GPUCounter counter)
7172
{
7273
return CounterFamily::VulkanExtended;
7374
}
75+
else if(IsARMCounter(counter))
76+
{
77+
return CounterFamily::ARM;
78+
}
7479

7580
return CounterFamily::Generic;
7681
}
@@ -84,6 +89,7 @@ QString ToString(CounterFamily family)
8489
case CounterFamily::Intel: return lit("Intel");
8590
case CounterFamily::NVIDIA: return lit("NVIDIA");
8691
case CounterFamily::VulkanExtended: return lit("Vulkan Extended");
92+
case CounterFamily::ARM: return lit("ARM");
8793
case CounterFamily::Unknown: return lit("Unknown");
8894
}
8995

qrenderdoc/Windows/PerformanceCounterViewer.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ QTableWidgetItem *PerformanceCounterViewer::MakeCounterResultItem(const CounterR
177177

178178
case CounterUnit::Absolute:
179179
case CounterUnit::Ratio: break;
180+
181+
case CounterUnit::Hertz: returnValue += lit(" Hz"); break;
182+
case CounterUnit::Volt: returnValue += lit(" V"); break;
183+
case CounterUnit::Celsius: returnValue += lit(" °C"); break;
180184
}
181185

182186
return new CustomSortedTableItem(returnValue, SortValue(result, description));

renderdoc.sln

+11
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NV", "renderdoc\driver\ihv\
8383
EndProject
8484
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Intel", "renderdoc\driver\ihv\intel\Intel.vcxproj", "{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}"
8585
EndProject
86+
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ARM", "renderdoc\driver\ihv\arm\ARM.vcxproj", "{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}"
87+
EndProject
8688
Global
8789
GlobalSection(SolutionConfigurationPlatforms) = preSolution
8890
Development|x64 = Development|x64
@@ -283,6 +285,14 @@ Global
283285
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x64.Build.0 = Release|x64
284286
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x86.ActiveCfg = Release|Win32
285287
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x86.Build.0 = Release|Win32
288+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x64.ActiveCfg = Development|x64
289+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x64.Build.0 = Development|x64
290+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x86.ActiveCfg = Development|Win32
291+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x86.Build.0 = Development|Win32
292+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x64.ActiveCfg = Release|x64
293+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x64.Build.0 = Release|x64
294+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x86.ActiveCfg = Release|Win32
295+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x86.Build.0 = Release|Win32
286296
EndGlobalSection
287297
GlobalSection(SolutionProperties) = preSolution
288298
HideSolutionNode = FALSE
@@ -318,5 +328,6 @@ Global
318328
{37955C79-D91D-423F-8C6C-8F5BCF4F28D4} = {B5A783D9-AEB9-420D-8E77-D4D930F8D88C}
319329
{40349AD9-5558-4DF4-84E2-11934DE90A11} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
320330
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
331+
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
321332
EndGlobalSection
322333
EndGlobal

renderdoc/CMakeLists.txt

+6
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,12 @@ if(ENABLE_GL OR ENABLE_GLES)
502502
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_intel>)
503503
endif()
504504

505+
# pull in the ARM folder for perf query
506+
if(ENABLE_GL OR ENABLE_GLES)
507+
add_subdirectory(driver/ihv/arm)
508+
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_arm>)
509+
endif()
510+
505511
add_library(rdoc OBJECT ${sources})
506512
target_compile_definitions(rdoc ${RDOC_DEFINITIONS})
507513
target_include_directories(rdoc ${RDOC_INCLUDES})

renderdoc/android/android.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -558,11 +558,22 @@ struct AndroidRemoteServer : public RemoteServer
558558
{
559559
ResetAndroidSettings();
560560

561+
// enable profiling to measure hardware counters
562+
Android::adbExecCommand(m_deviceID, "shell setprop security.perf_harden 0");
563+
561564
LazilyStartLogcatThread();
562565

563566
return RemoteServer::OpenCapture(proxyid, filename, opts, progress);
564567
}
565568

569+
virtual void CloseCapture(IReplayController *rend) override
570+
{
571+
// disable profiling
572+
Android::adbExecCommand(m_deviceID, "shell setprop security.perf_harden 1");
573+
574+
RemoteServer::CloseCapture(rend);
575+
}
576+
566577
virtual rdcstr GetHomeFolder() override { return ""; }
567578
virtual rdcarray<PathEntry> ListFolder(const char *path) override
568579
{

renderdoc/api/replay/replay_enums.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -3229,7 +3229,10 @@ enum class GPUCounter : uint32_t
32293229
FirstVulkanExtended = 4000000,
32303230
LastNvidia = FirstVulkanExtended - 1,
32313231

3232-
LastVulkanExtended = 5000000,
3232+
FirstARM = 5000000,
3233+
LastVulkanExtended = FirstARM - 1,
3234+
3235+
LastARM = 6000000,
32333236
};
32343237

32353238
ITERABLE_OPERATORS(GPUCounter);
@@ -3290,6 +3293,17 @@ inline constexpr bool IsVulkanExtendedCounter(GPUCounter c)
32903293
return c >= GPUCounter::FirstVulkanExtended && c <= GPUCounter::LastVulkanExtended;
32913294
}
32923295

3296+
DOCUMENT(R"(Check whether or not this is an ARM private counter.
3297+
3298+
:param GPUCounter c: The counter.
3299+
:return: ``True`` if it is an ARM private counter, ``False`` if it's not.
3300+
:rtype: ``bool``
3301+
)");
3302+
inline constexpr bool IsARMCounter(GPUCounter c)
3303+
{
3304+
return c >= GPUCounter::FirstARM && c <= GPUCounter::LastARM;
3305+
}
3306+
32933307
DOCUMENT(R"(The unit that GPU counter data is returned in.
32943308
32953309
.. data:: Absolute
@@ -3324,6 +3338,9 @@ enum class CounterUnit : uint32_t
33243338
Ratio,
33253339
Bytes,
33263340
Cycles,
3341+
Hertz,
3342+
Volt,
3343+
Celsius
33273344
};
33283345

33293346
DECLARE_REFLECTION_ENUM(CounterUnit);

renderdoc/driver/gl/gl_counters.cpp

+99
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <algorithm>
2626
#include <iterator>
2727
#include "driver/ihv/amd/amd_counters.h"
28+
#include "driver/ihv/arm/arm_counters.h"
2829
#include "driver/ihv/intel/intel_gl_counters.h"
2930
#include "gl_driver.h"
3031
#include "gl_replay.h"
@@ -65,6 +66,11 @@ rdcarray<GPUCounter> GLReplay::EnumerateCounters()
6566
ret.append(m_pIntelCounters->GetPublicCounterIds());
6667
}
6768

69+
if(m_pARMCounters)
70+
{
71+
ret.append(m_pARMCounters->GetPublicCounterIds());
72+
}
73+
6874
return ret;
6975
}
7076

@@ -96,6 +102,11 @@ CounterDescription GLReplay::DescribeCounter(GPUCounter counterID)
96102
}
97103
}
98104

105+
if(IsARMCounter(counterID) && m_pARMCounters)
106+
{
107+
return m_pARMCounters->GetCounterDescription(counterID);
108+
}
109+
99110
// FFBA5548-FBF8-405D-BA18-F0329DA370A0
100111
desc.uuid.words[0] = 0xFFBA5548;
101112
desc.uuid.words[1] = 0xFBF8405D;
@@ -461,6 +472,84 @@ rdcarray<CounterResult> GLReplay::FetchCountersIntel(const rdcarray<GPUCounter>
461472
return ret;
462473
}
463474

475+
void GLReplay::FillTimersARM(uint32_t *eventStartID, uint32_t *sampleIndex,
476+
rdcarray<uint32_t> *eventIDs, const DrawcallDescription &drawnode)
477+
{
478+
if(drawnode.children.empty())
479+
return;
480+
481+
for(size_t i = 0; i < drawnode.children.size(); i++)
482+
{
483+
const DrawcallDescription &d = drawnode.children[i];
484+
485+
FillTimersARM(eventStartID, sampleIndex, eventIDs, drawnode.children[i]);
486+
487+
if(d.events.empty())
488+
continue;
489+
490+
eventIDs->push_back(d.eventId);
491+
492+
m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_WithoutDraw);
493+
494+
m_pARMCounters->BeginSample(d.eventId);
495+
496+
m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_OnlyDraw);
497+
498+
// wait for the GPU to process all commands
499+
GLsync sync = GL.glFenceSync(eGL_SYNC_GPU_COMMANDS_COMPLETE, 0);
500+
GL.glClientWaitSync(sync, eGL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
501+
502+
m_pARMCounters->EndSample();
503+
504+
GL.glDeleteSync(sync);
505+
506+
*eventStartID = d.eventId + 1;
507+
++*sampleIndex;
508+
}
509+
}
510+
511+
rdcarray<CounterResult> GLReplay::FetchCountersARM(const rdcarray<GPUCounter> &counters)
512+
{
513+
m_pARMCounters->DisableAllCounters();
514+
515+
// enable counters it needs
516+
for(size_t i = 0; i < counters.size(); i++)
517+
{
518+
// This function is only called internally, and violating this assertion means our
519+
// caller has invoked this method incorrectly
520+
RDCASSERT(IsARMCounter(counters[i]));
521+
m_pARMCounters->EnableCounter(counters[i]);
522+
}
523+
524+
uint32_t passCount = m_pARMCounters->GetPassCount();
525+
526+
uint32_t sampleIndex = 0;
527+
528+
rdcarray<uint32_t> eventIDs;
529+
530+
m_pDriver->ReplayMarkers(false);
531+
532+
for(uint32_t p = 0; p < passCount; p++)
533+
{
534+
m_pARMCounters->BeginPass(p);
535+
536+
uint32_t eventStartID = 0;
537+
538+
sampleIndex = 0;
539+
540+
eventIDs.clear();
541+
542+
FillTimersARM(&eventStartID, &sampleIndex, &eventIDs, m_pDriver->GetRootDraw());
543+
544+
m_pARMCounters->EndPass();
545+
}
546+
m_pDriver->ReplayMarkers(true);
547+
548+
rdcarray<CounterResult> ret = m_pARMCounters->GetCounterData(eventIDs, counters);
549+
550+
return ret;
551+
}
552+
464553
rdcarray<CounterResult> GLReplay::FetchCounters(const rdcarray<GPUCounter> &allCounters)
465554
{
466555
rdcarray<CounterResult> ret;
@@ -503,6 +592,16 @@ rdcarray<CounterResult> GLReplay::FetchCounters(const rdcarray<GPUCounter> &allC
503592
}
504593
}
505594

595+
if(m_pARMCounters)
596+
{
597+
rdcarray<GPUCounter> armCounters;
598+
std::copy_if(allCounters.begin(), allCounters.end(), std::back_inserter(armCounters),
599+
[](const GPUCounter &c) { return IsARMCounter(c); });
600+
601+
if(!armCounters.empty())
602+
ret = FetchCountersARM(armCounters);
603+
}
604+
506605
if(counters.empty())
507606
{
508607
return ret;

renderdoc/driver/gl/gl_replay.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "gl_replay.h"
2727
#include "core/settings.h"
2828
#include "driver/ihv/amd/amd_counters.h"
29+
#include "driver/ihv/arm/arm_counters.h"
2930
#include "driver/ihv/intel/intel_gl_counters.h"
3031
#include "maths/matrix.h"
3132
#include "serialise/rdcfile.h"
@@ -67,6 +68,7 @@ void GLReplay::Shutdown()
6768
{
6869
SAFE_DELETE(m_pAMDCounters);
6970
SAFE_DELETE(m_pIntelCounters);
71+
SAFE_DELETE(m_pARMCounters);
7072

7173
DeleteDebugData();
7274

@@ -235,6 +237,7 @@ void GLReplay::SetReplayData(GLWindowingData data)
235237
{
236238
AMDCounters *countersAMD = NULL;
237239
IntelGlCounters *countersIntel = NULL;
240+
ARMCounters *countersARM = NULL;
238241

239242
bool isMesa = false;
240243

@@ -283,6 +286,11 @@ void GLReplay::SetReplayData(GLWindowingData data)
283286
RDCLOG("AMD GPU detected - trying to initialise AMD counters");
284287
countersAMD = new AMDCounters();
285288
}
289+
else if(m_DriverInfo.vendor == GPUVendor::ARM)
290+
{
291+
RDCLOG("ARM Mali GPU detected - trying to initialise ARM counters");
292+
countersARM = new ARMCounters();
293+
}
286294
else
287295
{
288296
RDCLOG("%s GPU detected - no counters available", ToStr(m_DriverInfo.vendor).c_str());
@@ -308,6 +316,16 @@ void GLReplay::SetReplayData(GLWindowingData data)
308316
delete countersIntel;
309317
m_pIntelCounters = NULL;
310318
}
319+
320+
if(countersARM && countersARM->Init())
321+
{
322+
m_pARMCounters = countersARM;
323+
}
324+
else
325+
{
326+
delete countersARM;
327+
m_pARMCounters = NULL;
328+
}
311329
}
312330
}
313331

renderdoc/driver/gl/gl_replay.h

+9
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "gl_common.h"
3232

3333
class AMDCounters;
34+
class ARMCounters;
3435
class IntelGlCounters;
3536
class WrappedOpenGL;
3637
struct GLCounterContext;
@@ -465,4 +466,12 @@ class GLReplay : public IReplayDriver
465466
const DrawcallDescription &drawnode);
466467

467468
rdcarray<CounterResult> FetchCountersIntel(const rdcarray<GPUCounter> &counters);
469+
470+
// ARM counter instance
471+
ARMCounters *m_pARMCounters = NULL;
472+
473+
void FillTimersARM(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray<uint32_t> *eventIDs,
474+
const DrawcallDescription &drawnode);
475+
476+
rdcarray<CounterResult> FetchCountersARM(const rdcarray<GPUCounter> &counters);
468477
};

0 commit comments

Comments
 (0)