data: add tensor support to data provider interface (#2979)

wchargin · web-flow · commit 1f245b2ddcb5 · 2019-12-02T19:38:08.000-08:00
Summary:
This commit specifies the `list_tensors` and `read_tensors` methods on
the data provider interface. These methods are optional for now (i.e.,
not decorated with `abc.abstractmethod`) for compatibility reasons, but
we’ll make them required soon.

Test Plan:
Unit tests included.

wchargin-branch: data-tensors-interface
diff --git a/tensorboard/data/BUILD b/tensorboard/data/BUILD
@@ -12,6 +12,7 @@ py_library(
     srcs = ["provider.py"],
     srcs_version = "PY2AND3",
     deps = [
+        "//tensorboard:expect_numpy_installed",
         "@org_pythonhosted_six",
     ],
 )
@@ -24,6 +25,7 @@ py_test(
     tags = ["support_notf"],
     deps = [
         ":provider",
+        "//tensorboard:expect_numpy_installed",
         "//tensorboard:test",
         "@org_pythonhosted_six",
     ],
diff --git a/tensorboard/data/provider.py b/tensorboard/data/provider.py
@@ -22,6 +22,7 @@
 import collections
 
 import six
+import numpy as np
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -120,12 +121,56 @@ def read_scalars(
     """
     pass
 
-  def list_tensors(self):
-    """Not yet specified."""
+  def list_tensors(self, experiment_id, plugin_name, run_tag_filter=None):
+    """List metadata about tensor time series.
+
+    Args:
+      experiment_id: ID of enclosing experiment.
+      plugin_name: String name of the TensorBoard plugin that created
+        the data to be queried. Required.
+      run_tag_filter: Optional `RunTagFilter` value. If omitted, all
+        runs and tags will be included.
+
+    The result will only contain keys for run-tag combinations that
+    actually exist, which may not include all entries in the
+    `run_tag_filter`.
+
+    Returns:
+      A nested map `d` such that `d[run][tag]` is a `TensorTimeSeries`
+      value.
+
+    Raises:
+      tensorboard.errors.PublicError: See `DataProvider` class docstring.
+    """
     pass
 
-  def read_tensors(self):
-    """Not yet specified."""
+  def read_tensors(
+      self, experiment_id, plugin_name, downsample=None, run_tag_filter=None
+  ):
+    """Read values from tensor time series.
+
+    Args:
+      experiment_id: ID of enclosing experiment.
+      plugin_name: String name of the TensorBoard plugin that created
+        the data to be queried. Required.
+      downsample: Integer number of steps to which to downsample the
+        results (e.g., `1000`). Required.
+      run_tag_filter: Optional `RunTagFilter` value. If provided, a time
+        series will only be included in the result if its run and tag
+        both pass this filter. If `None`, all time series will be
+        included.
+
+    The result will only contain keys for run-tag combinations that
+    actually exist, which may not include all entries in the
+    `run_tag_filter`.
+
+    Returns:
+      A nested map `d` such that `d[run][tag]` is a list of
+      `TensorDatum` values sorted by step.
+
+    Raises:
+      tensorboard.errors.PublicError: See `DataProvider` class docstring.
+    """
     pass
 
   def list_blob_sequences(
@@ -392,6 +437,110 @@ def __repr__(self):
     ))
 
 
+class TensorTimeSeries(_TimeSeries):
+  """Metadata about a tensor time series for a particular run and tag.
+
+  Attributes:
+    max_step: The largest step value of any datum in this tensor time series; a
+      nonnegative integer.
+    max_wall_time: The largest wall time of any datum in this time series, as
+      `float` seconds since epoch.
+    plugin_content: A bytestring of arbitrary plugin-specific metadata for this
+      time series, as provided to `tf.summary.write` in the
+      `plugin_data.content` field of the `metadata` argument.
+    description: An optional long-form Markdown description, as a `str` that is
+      empty if no description was specified.
+    display_name: An optional long-form Markdown description, as a `str` that is
+      empty if no description was specified. Deprecated; may be removed soon.
+  """
+
+  def __eq__(self, other):
+    if not isinstance(other, TensorTimeSeries):
+      return False
+    if self._max_step != other._max_step:
+      return False
+    if self._max_wall_time != other._max_wall_time:
+      return False
+    if self._plugin_content != other._plugin_content:
+      return False
+    if self._description != other._description:
+      return False
+    if self._display_name != other._display_name:
+      return False
+    return True
+
+  def __hash__(self):
+    return hash((
+        self._max_step,
+        self._max_wall_time,
+        self._plugin_content,
+        self._description,
+        self._display_name,
+    ))
+
+  def __repr__(self):
+    return "TensorTimeSeries(%s)" % ", ".join((
+        "max_step=%r" % (self._max_step,),
+        "max_wall_time=%r" % (self._max_wall_time,),
+        "plugin_content=%r" % (self._plugin_content,),
+        "description=%r" % (self._description,),
+        "display_name=%r" % (self._display_name,),
+    ))
+
+
+class TensorDatum(object):
+  """A single datum in a tensor time series for a run and tag.
+
+  Attributes:
+    step: The global step at which this datum occurred; an integer. This
+      is a unique key among data of this time series.
+    wall_time: The real-world time at which this datum occurred, as
+      `float` seconds since epoch.
+    numpy: The `numpy.ndarray` value with the tensor contents of this
+      datum.
+  """
+
+  __slots__ = ("_step", "_wall_time", "_numpy")
+
+  def __init__(self, step, wall_time, numpy):
+    self._step = step
+    self._wall_time = wall_time
+    self._numpy = numpy
+
+  @property
+  def step(self):
+    return self._step
+
+  @property
+  def wall_time(self):
+    return self._wall_time
+
+  @property
+  def numpy(self):
+    return self._numpy
+
+  def __eq__(self, other):
+    if not isinstance(other, TensorDatum):
+      return False
+    if self._step != other._step:
+      return False
+    if self._wall_time != other._wall_time:
+      return False
+    if not np.array_equal(self._numpy, other._numpy):
+      return False
+    return True
+
+  # Unhashable type: numpy arrays are mutable.
+  __hash__ = None
+
+  def __repr__(self):
+    return "TensorDatum(%s)" % ", ".join((
+        "step=%r" % (self._step,),
+        "wall_time=%r" % (self._wall_time,),
+        "numpy=%r" % (self._numpy,),
+    ))
+
+
 class BlobSequenceTimeSeries(_TimeSeries):
   """Metadata about a blob sequence time series for a particular run and tag.
 
diff --git a/tensorboard/data/provider_test.py b/tensorboard/data/provider_test.py
@@ -18,6 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import six
 
 from tensorboard import test as tb_test
@@ -109,6 +110,72 @@ def test_hash(self):
     self.assertNotEqual(hash(x1), hash(x3))
 
 
+class TensorTimeSeriesTest(tb_test.TestCase):
+  def test_repr(self):
+    x = provider.TensorTimeSeries(
+        max_step=77,
+        max_wall_time=1234.5,
+        plugin_content=b"AB\xCD\xEF!\x00",
+        description="test test",
+        display_name="one two",
+    )
+    repr_ = repr(x)
+    self.assertIn(repr(x.max_step), repr_)
+    self.assertIn(repr(x.max_wall_time), repr_)
+    self.assertIn(repr(x.plugin_content), repr_)
+    self.assertIn(repr(x.description), repr_)
+    self.assertIn(repr(x.display_name), repr_)
+
+  def test_eq(self):
+    x1 = provider.TensorTimeSeries(77, 1234.5, b"\x12", "one", "two")
+    x2 = provider.TensorTimeSeries(77, 1234.5, b"\x12", "one", "two")
+    x3 = provider.TensorTimeSeries(66, 4321.0, b"\x7F", "hmm", "hum")
+    self.assertEqual(x1, x2)
+    self.assertNotEqual(x1, x3)
+    self.assertNotEqual(x1, object())
+
+  def test_hash(self):
+    x1 = provider.TensorTimeSeries(77, 1234.5, b"\x12", "one", "two")
+    x2 = provider.TensorTimeSeries(77, 1234.5, b"\x12", "one", "two")
+    x3 = provider.TensorTimeSeries(66, 4321.0, b"\x7F", "hmm", "hum")
+    self.assertEqual(hash(x1), hash(x2))
+    # The next check is technically not required by the `__hash__`
+    # contract, but _should_ pass; failure on this assertion would at
+    # least warrant some scrutiny.
+    self.assertNotEqual(hash(x1), hash(x3))
+
+
+class TensorDatumTest(tb_test.TestCase):
+  def test_repr(self):
+    x = provider.TensorDatum(step=123, wall_time=234.5, numpy=np.array(-0.25))
+    repr_ = repr(x)
+    self.assertIn(repr(x.step), repr_)
+    self.assertIn(repr(x.wall_time), repr_)
+    self.assertIn(repr(x.numpy), repr_)
+
+  def test_eq(self):
+    nd = np.array
+    x1 = provider.TensorDatum(step=12, wall_time=0.25, numpy=nd([1.0, 2.0]))
+    x2 = provider.TensorDatum(step=12, wall_time=0.25, numpy=nd([1.0, 2.0]))
+    x3 = provider.TensorDatum(step=23, wall_time=3.25, numpy=nd([-0.5, -2.5]))
+    self.assertEqual(x1, x2)
+    self.assertNotEqual(x1, x3)
+    self.assertNotEqual(x1, object())
+
+  def test_eq_with_rank0_tensor(self):
+    x1 = provider.TensorDatum(step=12, wall_time=0.25, numpy=np.array([1.25]))
+    x2 = provider.TensorDatum(step=12, wall_time=0.25, numpy=np.array([1.25]))
+    x3 = provider.TensorDatum(step=23, wall_time=3.25, numpy=np.array([1.25]))
+    self.assertEqual(x1, x2)
+    self.assertNotEqual(x1, x3)
+    self.assertNotEqual(x1, object())
+
+  def test_hash(self):
+    x = provider.TensorDatum(step=12, wall_time=0.25, numpy=np.array([1.25]))
+    with six.assertRaisesRegex(self, TypeError, "unhashable type"):
+      hash(x)
+
+
 class BlobSequenceTimeSeriesTest(tb_test.TestCase):
 
   def test_repr(self):