Skip to content

Commit 995b88b

Browse files
wcharginnfelt
authored andcommitted
uploader: export experiment metadata (#3308)
Summary: When running `tensorboard dev export`, experiment names and descriptions (set with the `update-metadata` subcommand or the `--name` and `--description` flags to `upload`) as well as experiment creation and modification times are now emitted to a new `metadata.json` file in the experiment directory. Test Plan: Unit tests included. Also end-to-end tested against a live server, exporting some experiments with name/description and some without. wchargin-branch: uploader-export-metadata
1 parent 909bf85 commit 995b88b

File tree

5 files changed

+105
-4
lines changed

5 files changed

+105
-4
lines changed

tensorboard/uploader/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ py_test(
3131
deps = [
3232
":exporter_lib",
3333
":test_util",
34+
":util",
3435
"//tensorboard:expect_grpc_installed",
3536
"//tensorboard:expect_grpc_testing_installed",
3637
"//tensorboard:test",

tensorboard/uploader/exporter.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
# Maximum value of a signed 64-bit integer.
4646
_MAX_INT64 = 2 ** 63 - 1
4747

48+
# Output filename for experiment metadata (creation time, description,
49+
# etc.) within an experiment directory.
50+
_FILENAME_METADATA = "metadata.json"
4851
# Output filename for scalar data within an experiment directory.
4952
_FILENAME_SCALARS = "scalars.json"
5053

@@ -118,11 +121,32 @@ def export(self, read_time=None):
118121
"""
119122
if read_time is None:
120123
read_time = time.time()
121-
for experiment in list_experiments(self._api, read_time=read_time):
124+
experiment_metadata_mask = experiment_pb2.ExperimentMask(
125+
create_time=True, update_time=True, name=True, description=True,
126+
)
127+
experiments = list_experiments(
128+
self._api, fieldmask=experiment_metadata_mask, read_time=read_time
129+
)
130+
for experiment in experiments:
122131
experiment_id = experiment.experiment_id
132+
experiment_metadata = {
133+
"name": experiment.name,
134+
"description": experiment.description,
135+
"create_time": util.format_time_absolute(
136+
experiment.create_time
137+
),
138+
"update_time": util.format_time_absolute(
139+
experiment.update_time
140+
),
141+
}
123142
experiment_dir = _experiment_directory(self._outdir, experiment_id)
124143
os.mkdir(experiment_dir)
125144

145+
metadata_filepath = os.path.join(experiment_dir, _FILENAME_METADATA)
146+
with _open_excl(metadata_filepath) as outfile:
147+
json.dump(experiment_metadata, outfile, sort_keys=True)
148+
outfile.write("\n")
149+
126150
scalars_filepath = os.path.join(experiment_dir, _FILENAME_SCALARS)
127151
try:
128152
with _open_excl(scalars_filepath) as outfile:

tensorboard/uploader/exporter_test.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from tensorboard.uploader.proto import export_service_pb2_grpc
3939
from tensorboard.uploader import exporter as exporter_lib
4040
from tensorboard.uploader import test_util
41+
from tensorboard.uploader import util
4142
from tensorboard.util import grpc_util
4243
from tensorboard import test as tb_test
4344
from tensorboard.compat.proto import summary_pb2
@@ -64,8 +65,20 @@ def test_e2e_success_case(self):
6465
def stream_experiments(request, **kwargs):
6566
del request # unused
6667
self.assertEqual(kwargs["metadata"], grpc_util.version_metadata())
67-
yield _make_experiments_response(["123", "456"])
68-
yield _make_experiments_response(["789"])
68+
69+
response = export_service_pb2.StreamExperimentsResponse()
70+
response.experiments.add(experiment_id="123")
71+
response.experiments.add(experiment_id="456")
72+
yield response
73+
74+
response = export_service_pb2.StreamExperimentsResponse()
75+
experiment = response.experiments.add()
76+
experiment.experiment_id = "789"
77+
experiment.name = "bert"
78+
experiment.description = "ernie"
79+
util.set_timestamp(experiment.create_time, 981173106)
80+
util.set_timestamp(experiment.update_time, 1015218367)
81+
yield response
6982

7083
def stream_experiment_data(request, **kwargs):
7184
self.assertEqual(kwargs["metadata"], grpc_util.version_metadata())
@@ -115,12 +128,17 @@ def outdir_files():
115128
# The first iteration should request the list of experiments and
116129
# data for one of them.
117130
self.assertEqual(next(generator), "123")
131+
expected_files.append(os.path.join("experiment_123", "metadata.json"))
118132
expected_files.append(os.path.join("experiment_123", "scalars.json"))
119133
self.assertCountEqual(expected_files, outdir_files())
120134

121135
expected_eids_request = export_service_pb2.StreamExperimentsRequest()
122136
expected_eids_request.read_timestamp.CopyFrom(start_time_pb)
123137
expected_eids_request.limit = 2 ** 63 - 1
138+
expected_eids_request.experiments_mask.create_time = True
139+
expected_eids_request.experiments_mask.update_time = True
140+
expected_eids_request.experiments_mask.name = True
141+
expected_eids_request.experiments_mask.description = True
124142
mock_api_client.StreamExperiments.assert_called_once_with(
125143
expected_eids_request, metadata=grpc_util.version_metadata()
126144
)
@@ -137,6 +155,7 @@ def outdir_files():
137155
mock_api_client.StreamExperimentData.reset_mock()
138156
self.assertEqual(next(generator), "456")
139157

158+
expected_files.append(os.path.join("experiment_456", "metadata.json"))
140159
expected_files.append(os.path.join("experiment_456", "scalars.json"))
141160
self.assertCountEqual(expected_files, outdir_files())
142161
mock_api_client.StreamExperiments.assert_not_called()
@@ -147,6 +166,7 @@ def outdir_files():
147166

148167
# Again, request data for the next experiment; this experiment ID
149168
# was in the second response batch in the list of IDs.
169+
expected_files.append(os.path.join("experiment_789", "metadata.json"))
150170
expected_files.append(os.path.join("experiment_789", "scalars.json"))
151171
mock_api_client.StreamExperiments.reset_mock()
152172
mock_api_client.StreamExperimentData.reset_mock()
@@ -192,6 +212,21 @@ def outdir_files():
192212
self.assertEqual(points, {})
193213
self.assertEqual(datum, {})
194214

215+
# Spot-check one of the metadata files.
216+
with open(
217+
os.path.join(outdir, "experiment_789", "metadata.json")
218+
) as infile:
219+
metadata = json.load(infile)
220+
self.assertEqual(
221+
metadata,
222+
{
223+
"name": "bert",
224+
"description": "ernie",
225+
"create_time": "2001-02-03T04:05:06Z",
226+
"update_time": "2002-03-04T05:06:07Z",
227+
},
228+
)
229+
195230
def test_rejects_dangerous_experiment_ids(self):
196231
mock_api_client = self._create_mock_api_client()
197232

tensorboard/uploader/util.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ def format_time(timestamp_pb, now=None):
123123
"""Converts a `timestamp_pb2.Timestamp` to human-readable string.
124124
125125
This always includes the absolute date and time, and for recent dates
126-
may include a relative time like "(just now)" or "(2 hours ago)".
126+
may include a relative time like "(just now)" or "(2 hours ago)". It
127+
should thus be used for ephemeral values. Use `format_time_absolute`
128+
if the output will be persisted.
127129
128130
Args:
129131
timestamp_pb: A `google.protobuf.timestamp_pb2.Timestamp` value to
@@ -163,5 +165,21 @@ def ago_text(n, singular, plural):
163165
return str(dt) + relative_part
164166

165167

168+
def format_time_absolute(timestamp_pb):
169+
"""Converts a `timestamp_pb2.Timestamp` to UTC time string.
170+
171+
This will always be of the form "2001-02-03T04:05:06Z".
172+
173+
Args:
174+
timestamp_pb: A `google.protobuf.timestamp_pb2.Timestamp` value to
175+
convert to string. The input will not be modified.
176+
177+
Returns:
178+
An RFC 3339 date-time string.
179+
"""
180+
dt = datetime.datetime.utcfromtimestamp(timestamp_pb.seconds)
181+
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
182+
183+
166184
def _ngettext(n, singular, plural):
167185
return "%d %s ago" % (n, singular if n == 1 else plural)

tensorboard/uploader/util_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,5 +253,28 @@ def test_long_ago(self):
253253
self.assertEqual(actual, "2019-01-02 03:04:05")
254254

255255

256+
class FormatTimeAbsoluteTest(tb_test.TestCase):
257+
def _run(self, t=None, tz=None):
258+
timestamp_pb = timestamp_pb2.Timestamp()
259+
util.set_timestamp(timestamp_pb, t)
260+
try:
261+
with mock.patch.dict(os.environ, {"TZ": tz}):
262+
time.tzset()
263+
return util.format_time_absolute(timestamp_pb)
264+
finally:
265+
time.tzset()
266+
267+
def test_in_tz_utc(self):
268+
t = 981173106
269+
actual = self._run(t, tz="UTC")
270+
self.assertEqual(actual, "2001-02-03T04:05:06Z")
271+
272+
def test_in_tz_nonutc(self):
273+
# Shouldn't be affected by timezone.
274+
t = 981173106
275+
actual = self._run(t, tz="America/Los_Angeles")
276+
self.assertEqual(actual, "2001-02-03T04:05:06Z")
277+
278+
256279
if __name__ == "__main__":
257280
tb_test.main()

0 commit comments

Comments
 (0)