Skip to content

Commit 8800eee

Browse files
wcharginnfelt
authored andcommitted
uploader: export experiments to subdirectories (#3307)
Summary: We now emit an experiment’s scalar data to `experiment_123/scalars.json` rather than `scalars_123.json`. This generalizes more cleanly to writing multiple files under an experiment’s directory. Test Plan: Unit tests updated. One previous test case can no longer be hit and has thus been removed. wchargin-branch: uploader-export-subdirs
1 parent 9aa5f0a commit 8800eee

File tree

2 files changed

+31
-38
lines changed

2 files changed

+31
-38
lines changed

tensorboard/uploader/exporter.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
# Maximum value of a signed 64-bit integer.
4545
_MAX_INT64 = 2 ** 63 - 1
4646

47+
# Output filename for scalar data within an experiment directory.
48+
_FILENAME_SCALARS = "scalars.json"
49+
4750

4851
class TensorBoardExporter(object):
4952
"""Exports all of the user's experiment data from TensorBoard.dev.
@@ -113,9 +116,12 @@ def export(self, read_time=None):
113116
if read_time is None:
114117
read_time = time.time()
115118
for experiment_id in self._request_experiment_ids(read_time):
116-
filepath = _scalars_filepath(self._outdir, experiment_id)
119+
experiment_dir = _experiment_directory(self._outdir, experiment_id)
120+
os.mkdir(experiment_dir)
121+
122+
scalars_filepath = os.path.join(experiment_dir, _FILENAME_SCALARS)
117123
try:
118-
with _open_excl(filepath) as outfile:
124+
with _open_excl(scalars_filepath) as outfile:
119125
data = self._request_scalar_data(experiment_id, read_time)
120126
for block in data:
121127
json.dump(block, outfile, sort_keys=True)
@@ -221,8 +227,7 @@ def __init__(self, experiment_id):
221227
self.experiment_id = experiment_id
222228

223229

224-
def _scalars_filepath(base_dir, experiment_id):
225-
"""Gets file path in which to store scalars for the given experiment."""
230+
def _experiment_directory(base_dir, experiment_id):
226231
# Experiment IDs from the server should be filename-safe; verify
227232
# this before creating any files.
228233
bad_chars = frozenset(experiment_id) - _FILENAME_SAFE_CHARS
@@ -232,7 +237,7 @@ def _scalars_filepath(base_dir, experiment_id):
232237
bad_chars=sorted(bad_chars), eid=experiment_id
233238
)
234239
)
235-
return os.path.join(base_dir, "scalars_%s.json" % experiment_id)
240+
return os.path.join(base_dir, "experiment_%s" % experiment_id)
236241

237242

238243
def _mkdir_p(path):

tensorboard/uploader/exporter_test.py

Lines changed: 21 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,27 @@ def stream_experiment_data(request, **kwargs):
9999
start_time = 1571084846.25
100100
start_time_pb = test_util.timestamp_pb(1571084846250000000)
101101

102+
def outdir_files():
103+
# Recursively list `outdir`.
104+
result = []
105+
for (dirpath, dirnames, filenames) in os.walk(outdir):
106+
for filename in filenames:
107+
fullpath = os.path.join(dirpath, filename)
108+
result.append(os.path.relpath(fullpath, outdir))
109+
return result
110+
102111
generator = exporter.export(read_time=start_time)
103112
expected_files = []
104113
self.assertTrue(os.path.isdir(outdir))
105-
self.assertCountEqual(expected_files, os.listdir(outdir))
114+
self.assertCountEqual(expected_files, outdir_files())
106115
mock_api_client.StreamExperiments.assert_not_called()
107116
mock_api_client.StreamExperimentData.assert_not_called()
108117

109118
# The first iteration should request the list of experiments and
110119
# data for one of them.
111120
self.assertEqual(next(generator), "123")
112-
expected_files.append("scalars_123.json")
113-
self.assertCountEqual(expected_files, os.listdir(outdir))
121+
expected_files.append(os.path.join("experiment_123", "scalars.json"))
122+
self.assertCountEqual(expected_files, outdir_files())
114123

115124
expected_eids_request = export_service_pb2.StreamExperimentsRequest()
116125
expected_eids_request.read_timestamp.CopyFrom(start_time_pb)
@@ -131,8 +140,8 @@ def stream_experiment_data(request, **kwargs):
131140
mock_api_client.StreamExperimentData.reset_mock()
132141
self.assertEqual(next(generator), "456")
133142

134-
expected_files.append("scalars_456.json")
135-
self.assertCountEqual(expected_files, os.listdir(outdir))
143+
expected_files.append(os.path.join("experiment_456", "scalars.json"))
144+
self.assertCountEqual(expected_files, outdir_files())
136145
mock_api_client.StreamExperiments.assert_not_called()
137146
expected_data_request.experiment_id = "456"
138147
mock_api_client.StreamExperimentData.assert_called_once_with(
@@ -141,12 +150,12 @@ def stream_experiment_data(request, **kwargs):
141150

142151
# Again, request data for the next experiment; this experiment ID
143152
# was in the second response batch in the list of IDs.
144-
expected_files.append("scalars_789.json")
153+
expected_files.append(os.path.join("experiment_789", "scalars.json"))
145154
mock_api_client.StreamExperiments.reset_mock()
146155
mock_api_client.StreamExperimentData.reset_mock()
147156
self.assertEqual(next(generator), "789")
148157

149-
self.assertCountEqual(expected_files, os.listdir(outdir))
158+
self.assertCountEqual(expected_files, outdir_files())
150159
mock_api_client.StreamExperiments.assert_not_called()
151160
expected_data_request.experiment_id = "789"
152161
mock_api_client.StreamExperimentData.assert_called_once_with(
@@ -158,12 +167,14 @@ def stream_experiment_data(request, **kwargs):
158167
mock_api_client.StreamExperimentData.reset_mock()
159168
self.assertEqual(list(generator), [])
160169

161-
self.assertCountEqual(expected_files, os.listdir(outdir))
170+
self.assertCountEqual(expected_files, outdir_files())
162171
mock_api_client.StreamExperiments.assert_not_called()
163172
mock_api_client.StreamExperimentData.assert_not_called()
164173

165-
# Spot-check one of the files.
166-
with open(os.path.join(outdir, "scalars_456.json")) as infile:
174+
# Spot-check one of the scalar data files.
175+
with open(
176+
os.path.join(outdir, "experiment_456", "scalars.json")
177+
) as infile:
167178
jsons = [json.loads(line) for line in infile]
168179
self.assertLen(jsons, 4)
169180
datum = jsons[2]
@@ -309,29 +320,6 @@ def test_rejects_existing_directory(self):
309320
mock_api_client.StreamExperiments.assert_not_called()
310321
mock_api_client.StreamExperimentData.assert_not_called()
311322

312-
def test_rejects_existing_file(self):
313-
mock_api_client = self._create_mock_api_client()
314-
315-
def stream_experiments(request, **kwargs):
316-
del request # unused
317-
yield export_service_pb2.StreamExperimentsResponse(
318-
experiment_ids=["123"]
319-
)
320-
321-
mock_api_client.StreamExperiments = stream_experiments
322-
323-
outdir = os.path.join(self.get_temp_dir(), "outdir")
324-
exporter = exporter_lib.TensorBoardExporter(mock_api_client, outdir)
325-
generator = exporter.export()
326-
327-
with open(os.path.join(outdir, "scalars_123.json"), "w"):
328-
pass
329-
330-
with self.assertRaises(exporter_lib.OutputFileExistsError):
331-
next(generator)
332-
333-
mock_api_client.StreamExperimentData.assert_not_called()
334-
335323
def test_propagates_mkdir_errors(self):
336324
mock_api_client = self._create_mock_api_client()
337325
outdir = os.path.join(self.get_temp_dir(), "some_file", "outdir")

0 commit comments

Comments
 (0)