|
16 | 16 | import glob |
17 | 17 | import os |
18 | 18 | import re |
19 | | -import numpy as np |
| 19 | +from pathlib import Path |
20 | 20 |
|
| 21 | +import numpy as np |
21 | 22 | import torch |
22 | 23 | from torch.utils.tensorboard import SummaryWriter |
23 | 24 |
|
@@ -69,17 +70,11 @@ def log_grads(self, step, model): |
69 | 70 | stat=stat) |
70 | 71 |
|
71 | 72 |
|
72 | | -def unique_log_fpath(log_fpath): |
73 | | - |
74 | | - if not os.path.isfile(log_fpath): |
75 | | - return log_fpath |
76 | | - |
77 | | - # Avoid overwriting old logs |
78 | | - saved = sorted([int(re.search('\.(\d+)', f).group(1)) |
79 | | - for f in glob.glob(f'{log_fpath}.*')]) |
80 | | - |
81 | | - log_num = (saved[-1] if saved else 0) + 1 |
82 | | - return f'{log_fpath}.{log_num}' |
| 73 | +def unique_log_fpath(fpath): |
| 74 | + """Have a unique log filename for every separate run""" |
| 75 | + log_num = max([0] + [int(re.search("\.(\d+)", Path(f).suffix).group(1)) |
| 76 | + for f in glob.glob(f"{fpath}.*")]) |
| 77 | + return f"{fpath}.{log_num + 1}" |
83 | 78 |
|
84 | 79 |
|
85 | 80 | def stdout_step_format(step): |
@@ -108,11 +103,12 @@ def init_log(args): |
108 | 103 | enabled = (args.local_rank == 0) |
109 | 104 | if enabled: |
110 | 105 | fpath = args.log_file or os.path.join(args.output_dir, 'nvlog.json') |
111 | | - backends = [JSONStreamBackend(Verbosity.DEFAULT, |
112 | | - unique_log_fpath(fpath)), |
113 | | - StdOutBackend(Verbosity.VERBOSE, |
114 | | - step_format=stdout_step_format, |
115 | | - metric_format=stdout_metric_format)] |
| 106 | + backends = [ |
| 107 | + JSONStreamBackend(Verbosity.DEFAULT, fpath, append=True), |
| 108 | + JSONStreamBackend(Verbosity.DEFAULT, unique_log_fpath(fpath)), |
| 109 | + StdOutBackend(Verbosity.VERBOSE, step_format=stdout_step_format, |
| 110 | + metric_format=stdout_metric_format) |
| 111 | + ] |
116 | 112 | else: |
117 | 113 | backends = [] |
118 | 114 |
|
|
0 commit comments