Skip to content

Commit

Permalink
address self-review comment
Browse files Browse the repository at this point in the history
  • Loading branch information
gargnitingoogle committed Aug 21, 2024
1 parent 7e043e1 commit 066952e
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 9 deletions.
10 changes: 4 additions & 6 deletions perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
sys.path.append("../")
from utils.utils import get_memory, get_cpu, standard_timestamp, is_mash_installed

_LOCAL_LOGS_LOCATION = "../../bin/dlio-logs"
_LOCAL_LOGS_LOCATION = "../../bin/dlio-logs/logs"

record = {
"pod_name": "",
Expand Down Expand Up @@ -56,7 +56,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
"-r",
"--no-user-output-enabled", # do not print names of files being copied
f"gs://{dlioWorkload.bucket}/logs/{instanceId}",
_LOCAL_LOGS_LOCATION + "/logs",
_LOCAL_LOGS_LOCATION,
],
capture_output=False,
text=True,
Expand Down Expand Up @@ -100,7 +100,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
args = parser.parse_args()

try:
os.makedirs(_LOCAL_LOGS_LOCATION + "/logs")
os.makedirs(_LOCAL_LOGS_LOCATION)
except FileExistsError:
pass

Expand All @@ -125,9 +125,7 @@ def downloadDlioOutputs(dlioWorkloads: set, instanceId: str):
if not mash_installed:
print("Mash is not installed, will skip parsing CPU and memory usage.")

for root, _, files in os.walk(
_LOCAL_LOGS_LOCATION + "/logs/" + args.instance_id
):
for root, _, files in os.walk(_LOCAL_LOGS_LOCATION + "/" + args.instance_id):
if files:
print(f"Parsing directory {root} ...")
per_epoch_stats_file = root + "/per_epoch_stats.json"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def run_command(command: str):
print(result.stderr)


def createHelmInstallCommands(dlioWorkloads: list, instanceId: str):
def createHelmInstallCommands(dlioWorkloads: set, instanceId: str):
"""Create helm install commands for the given set of dlioWorkload objects."""
helm_commands = []
for dlioWorkload in dlioWorkloads:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ spec:
sleep 300
{{ end }}
outputDir=/logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}
echo "Testing {{ .Values.scenario }}"
mpirun -np 8 dlio_benchmark workload=unet3d_a100 \
++workload.train.epochs=4 \
Expand All @@ -84,11 +86,11 @@ spec:
++workload.reader.batch_size={{ .Values.dlio.batchSize }} \
++workload.dataset.record_length={{ .Values.dlio.recordLength }} \
++workload.reader.read_threads={{ .Values.dlio.readThreads }} \
++workload.output.folder=/logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}
++workload.output.folder=${outputDir}
# dump the gcsfuse-mount-configuration to a file in output-directory.
{{ if eq .Values.scenario "gcsfuse-generic"}}
echo "{{ .Values.gcsfuse.mountOptions }}" > /logs/{{ .Values.instanceId }}/{{ .Values.dlio.numFilesTrain }}-{{ .Values.dlio.recordLength }}-{{ .Values.dlio.batchSize }}/{{ .Values.scenario }}/gcsfuse_mount_options
echo "{{ .Values.gcsfuse.mountOptions }}" > ${outputDir}/gcsfuse_mount_options
{{ end }}
gsutil -m cp -R /logs/{{ .Values.instanceId }} gs://{{ .Values.bucketName }}/logs/{{ .Values.instanceId }}/$(date +"%Y-%m-%d-%H-%M")
Expand Down

0 comments on commit 066952e

Please sign in to comment.