Skip to content

Commit 337fae2

Browse files
committed
Merge: [GPUNet/PyTorch] Better resume logging functionality
2 parents 10060de + 0c5e2ab commit 337fae2

1 file changed

Lines changed: 10 additions & 9 deletions

File tree

PyTorch/Classification/GPUNet/train.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
import argparse
3333
import logging
3434
import os
35+
import glob
36+
import re
37+
from pathlib import Path
3538
import time
3639
from collections import OrderedDict
3740
from contextlib import suppress
3841
from datetime import datetime
39-
import itertools
4042
import dllogger
4143

4244
import torch
@@ -1008,6 +1010,11 @@ def _parse_args():
10081010
args_text = yaml.safe_dump(args.__dict__, default_flow_style=False)
10091011
return args, args_text
10101012

1013+
def unique_log_fpath(fpath):
1014+
"""Have a unique log filename for every separate run"""
1015+
log_num = max([0] + [int(re.search("\.(\d+)", Path(f).suffix).group(1))
1016+
for f in glob.glob(f"{fpath}.*")])
1017+
return f"{fpath}.{log_num + 1}"
10111018

10121019
def main():
10131020

@@ -1101,16 +1108,10 @@ def main():
11011108
if dllogger_dir and not os.path.exists(dllogger_dir):
11021109
os.makedirs(dllogger_dir, exist_ok=True)
11031110
log_path = args.dllogger_name
1104-
original_log_path = log_path
1105-
if os.path.exists(log_path):
1106-
for i in itertools.count():
1107-
s_fname = original_log_path.split('.')
1108-
log_path = '.'.join(s_fname[:-1]) + f'_{i}.' + s_fname[-1]
1109-
if not os.path.exists(log_path):
1110-
break
11111111
dllogger.init(
11121112
backends=[
1113-
dllogger.JSONStreamBackend(verbosity=1, filename=log_path),
1113+
dllogger.JSONStreamBackend(verbosity=1, filename=log_path, append=True),
1114+
dllogger.JSONStreamBackend(verbosity=1, filename=unique_log_fpath(log_path)),
11141115
dllogger.StdOutBackend(verbosity=0),
11151116
]
11161117
)

0 commit comments

Comments
 (0)