Revert "text: remove legacy PluginAsset support (#2417)"

wchargin · wchargin · commit 30dd2e27229c · 2019-09-12T13:56:40.000-07:00
Summary: Reverting because this is a breaking change that should be deferred until TensorBoard 2.0. This reverts commit 2149119. Test Plan: See the commit message of the commit being reverted. Follow that test plan, but verify that all data shows up instead of only the new data. wchargin-branch: reinstate-text-plugin-assets
diff --git a/tensorboard/plugins/text/BUILD b/tensorboard/plugins/text/BUILD
@@ -19,7 +19,9 @@ py_library(
         ":metadata",
         "//tensorboard:plugin_util",
         "//tensorboard/backend:http_util",
+        "//tensorboard/compat:tensorflow",
         "//tensorboard/plugins:base_plugin",
+        "//tensorboard/util:tb_logging",
         "//tensorboard/util:tensor_util",
         "@org_mozilla_bleach",
         "@org_pocoo_werkzeug",
diff --git a/tensorboard/plugins/text/text_plugin.py b/tensorboard/plugins/text/text_plugin.py
@@ -20,6 +20,8 @@
 
 import json
 import textwrap
+import threading
+import time
 
 # pylint: disable=g-bad-import-order
 # Necessary for an internal test with special behavior for numpy.
@@ -31,10 +33,14 @@
 
 from tensorboard import plugin_util
 from tensorboard.backend import http_util
+from tensorboard.compat import tf
 from tensorboard.plugins import base_plugin
 from tensorboard.plugins.text import metadata
+from tensorboard.util import tb_logging
 from tensorboard.util import tensor_util
 
+logger = tb_logging.get_logger()
+
 # HTTP routes
 TAGS_ROUTE = '/tags'
 TEXT_ROUTE = '/text'
@@ -202,6 +208,19 @@ def __init__(self, context):
     """
     self._multiplexer = context.multiplexer
 
+    # Cache the last result of index_impl() so that methods that depend on it
+    # can return without blocking (while kicking off a background thread to
+    # recompute the current index).
+    self._index_cached = None
+
+    # Lock that ensures that only one thread attempts to compute index_impl()
+    # at a given time, since it's expensive.
+    self._index_impl_lock = threading.Lock()
+
+    # Pointer to the current thread computing index_impl(), if any.  This is
+    # stored on TextPlugin only to facilitate testing.
+    self._index_impl_thread = None
+
   def is_active(self):
     """Determines whether this plugin is active.
 
@@ -212,23 +231,104 @@ def is_active(self):
     """
     if not self._multiplexer:
       return False
-    return bool(self._multiplexer.PluginRunToTagToContent(metadata.PLUGIN_NAME))
+
+    if self._index_cached is not None:
+      # If we already have computed the index, use it to determine whether
+      # the plugin should be active, and if so, return immediately.
+      if any(self._index_cached.values()):
+        return True
+
+    if self._multiplexer.PluginRunToTagToContent(metadata.PLUGIN_NAME):
+      # Text data is present in the multiplexer. No need to further check for
+      # data stored via the outdated plugin assets method.
+      return True
+
+    # We haven't conclusively determined if the plugin should be active. Launch
+    # a thread to compute index_impl() and return False to avoid blocking.
+    self._maybe_launch_index_impl_thread()
+
+    return False
 
   def frontend_metadata(self):
     return base_plugin.FrontendMetadata(element_name='tf-text-dashboard')
 
+  def _maybe_launch_index_impl_thread(self):
+    """Attempts to launch a thread to compute index_impl().
+
+    This may not launch a new thread if one is already running to compute
+    index_impl(); in that case, this function is a no-op.
+    """
+    # Try to acquire the lock for computing index_impl(), without blocking.
+    if self._index_impl_lock.acquire(False):
+      # We got the lock. Start the thread, which will unlock the lock when done.
+      self._index_impl_thread = threading.Thread(
+          target=self._async_index_impl,
+          name='TextPluginIndexImplThread')
+      self._index_impl_thread.start()
+
+  def _async_index_impl(self):
+    """Computes index_impl() asynchronously on a separate thread."""
+    start = time.time()
+    logger.info('TextPlugin computing index_impl() in a new thread')
+    self._index_cached = self.index_impl()
+    self._index_impl_thread = None
+    self._index_impl_lock.release()
+    elapsed = time.time() - start
+    logger.info(
+        'TextPlugin index_impl() thread ending after %0.3f sec', elapsed)
+
   def index_impl(self):
-    mapping = self._multiplexer.PluginRunToTagToContent(metadata.PLUGIN_NAME)
+    run_to_series = self._fetch_run_to_series_from_multiplexer()
+
+    # A previous system of collecting and serving text summaries involved
+    # storing the tags of text summaries within tensors.json files. See if we
+    # are currently using that system. We do not want to drop support for that
+    # use case.
+    name = 'tensorboard_text'
+    run_to_assets = self._multiplexer.PluginAssets(name)
+    for run, assets in run_to_assets.items():
+      if run in run_to_series:
+        # When runs conflict, the summaries created via the new method override.
+        continue
+
+      if 'tensors.json' in assets:
+        tensors_json = self._multiplexer.RetrievePluginAsset(
+            run, name, 'tensors.json')
+        tensors = json.loads(tensors_json)
+        run_to_series[run] = tensors
+      else:
+        # The mapping should contain all runs among its keys.
+        run_to_series[run] = []
+
+    return run_to_series
+
+  def _fetch_run_to_series_from_multiplexer(self):
+    # TensorBoard is obtaining summaries related to the text plugin based on
+    # SummaryMetadata stored within Value protos.
+    mapping = self._multiplexer.PluginRunToTagToContent(
+        metadata.PLUGIN_NAME)
     return {
-        run: list(tag_to_content)
+        run: list(tag_to_content.keys())
         for (run, tag_to_content)
         in six.iteritems(mapping)
     }
 
+  def tags_impl(self):
+    # Recompute the index on demand whenever tags are requested, but do it
+    # in a separate thread to avoid blocking.
+    self._maybe_launch_index_impl_thread()
+
+    # Use the cached index if present. If it's not, just return the result based
+    # on data from the multiplexer, requiring no disk read.
+    if self._index_cached:
+      return self._index_cached
+    else:
+      return self._fetch_run_to_series_from_multiplexer()
+
   @wrappers.Request.application
   def tags_route(self, request):
-    index = self.index_impl()
-    return http_util.Respond(request, index, 'application/json')
+    response = self.tags_impl()
+    return http_util.Respond(request, response, 'application/json')
 
   def text_impl(self, run, tag):
     try:
diff --git a/tensorboard/plugins/text/text_plugin_test.py b/tensorboard/plugins/text/text_plugin_test.py
@@ -20,6 +20,7 @@
 from __future__ import print_function
 
 import collections
+import json
 import os
 import textwrap
 import numpy as np
@@ -328,12 +329,41 @@ def test_text_array_to_html(self):
       </table>""")
     self.assertEqual(convert(d3), d3_expected)
 
+  def assertIsActive(self, plugin, expected_finally_is_active):
+    """Helper to simulate threading for asserting on is_active()."""
+    patcher = tf.compat.v1.test.mock.patch('threading.Thread.start', autospec=True)
+    mock = patcher.start()
+    self.addCleanup(patcher.stop)
+
+    # Initial response from is_active() is always False.
+    self.assertFalse(plugin.is_active())
+    thread = plugin._index_impl_thread
+    mock.assert_called_once_with(thread)
+
+    # The thread hasn't run yet, so is_active() should still be False, and we
+    # should not have tried to launch a second thread.
+    self.assertFalse(plugin.is_active())
+    mock.assert_called_once_with(thread)
+
+    # Run the thread; it should clean up after itself.
+    thread.run()
+    self.assertIsNone(plugin._index_impl_thread)
+
+    if expected_finally_is_active:
+      self.assertTrue(plugin.is_active())
+      # The call above shouldn't have launched a new thread.
+      mock.assert_called_once_with(thread)
+    else:
+      self.assertFalse(plugin.is_active())
+      # The call above should have launched a second thread to check again.
+      self.assertEqual(2, mock.call_count)
+
   def testPluginIsActiveWhenNoRuns(self):
     """The plugin should be inactive when there are no runs."""
     multiplexer = event_multiplexer.EventMultiplexer()
     context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer)
     plugin = text_plugin.TextPlugin(context)
-    self.assertFalse(plugin.is_active())
+    self.assertIsActive(plugin, False)
 
   def testPluginIsActiveWhenTextRuns(self):
     """The plugin should be active when there are runs with text."""
@@ -342,7 +372,15 @@ def testPluginIsActiveWhenTextRuns(self):
     plugin = text_plugin.TextPlugin(context)
     multiplexer.AddRunsFromDirectory(self.logdir)
     multiplexer.Reload()
-    self.assertTrue(plugin.is_active())
+
+    patcher = tf.compat.v1.test.mock.patch('threading.Thread.start', autospec=True)
+    mock = patcher.start()
+    self.addCleanup(patcher.stop)
+    self.assertTrue(plugin.is_active(), True)
+
+    # Data is available within the multiplexer. No thread should have started
+    # for checking plugin assets data.
+    self.assertFalse(mock.called)
 
   def testPluginIsActiveWhenRunsButNoText(self):
     """The plugin should be inactive when there are runs but none has text."""
@@ -353,13 +391,99 @@ def testPluginIsActiveWhenRunsButNoText(self):
     self.generate_testdata(include_text=False, logdir=logdir)
     multiplexer.AddRunsFromDirectory(logdir)
     multiplexer.Reload()
-    self.assertFalse(plugin.is_active())
+    self.assertIsActive(plugin, False)
+
+  def testPluginTagsImpl(self):
+    patcher = tf.compat.v1.test.mock.patch('threading.Thread.start', autospec=True)
+    mock = patcher.start()
+    self.addCleanup(patcher.stop)
 
-  def testPluginIndexImpl(self):
-    run_to_tags = self.plugin.index_impl()
+    # Initially, the thread for checking for plugin assets data has not run.
+    # Hence, the mapping should only have data from the multiplexer.
+    run_to_tags = self.plugin.tags_impl()
     self.assertItemsEqual(['fry', 'leela'], run_to_tags.keys())
     self.assertItemsEqual(['message', 'vector'], run_to_tags['fry'])
     self.assertItemsEqual(['message', 'vector'], run_to_tags['leela'])
+    thread = self.plugin._index_impl_thread
+    mock.assert_called_once_with(thread)
+
+    # The thread hasn't run yet, so no change in response, and we should not
+    # have tried to launch a second thread.
+    run_to_tags = self.plugin.tags_impl()
+    self.assertItemsEqual(['fry', 'leela'], run_to_tags.keys())
+    self.assertItemsEqual(['message', 'vector'], run_to_tags['fry'])
+    self.assertItemsEqual(['message', 'vector'], run_to_tags['leela'])
+    mock.assert_called_once_with(thread)
+
+    # Run the thread; it should clean up after itself.
+    thread.run()
+    self.assertIsNone(self.plugin._index_impl_thread)
+
+    # Expect response to be identical to calling index_impl() directly.
+    self.assertEqual(self.plugin.index_impl(), self.plugin.tags_impl())
+    # The call above should have launched a second thread to check again.
+    self.assertEqual(2, mock.call_count)
+
+
+class TextPluginBackwardsCompatibilityTest(tf.test.TestCase):
+
+  def setUp(self):
+    self.logdir = self.get_temp_dir()
+    self.generate_testdata()
+    multiplexer = event_multiplexer.EventMultiplexer()
+    multiplexer.AddRunsFromDirectory(self.logdir)
+    multiplexer.Reload()
+    context = base_plugin.TBContext(logdir=self.logdir, multiplexer=multiplexer)
+    self.plugin = text_plugin.TextPlugin(context)
+
+  def generate_testdata(self):
+    tf.compat.v1.reset_default_graph()
+    sess = tf.compat.v1.Session()
+    placeholder = tf.constant('I am deprecated.')
+
+    # Previously, we had used a means of creating text summaries that used
+    # plugin assets (which loaded JSON files containing runs and tags). The
+    # plugin must continue to be able to load summaries of that format, so we
+    # create a summary using that old plugin asset-based method here.
+    plugin_asset_summary = tf.compat.v1.summary.tensor_summary('old_plugin_asset_summary',
+                                                     placeholder)
+    assets_directory = os.path.join(self.logdir, 'fry', 'plugins',
+                                    'tensorboard_text')
+    # Make the directory of assets if it does not exist.
+    if not os.path.isdir(assets_directory):
+      try:
+        os.makedirs(assets_directory)
+      except OSError as err:
+        self.assertFail('Could not make assets directory %r: %r',
+                        assets_directory, err)
+    json_path = os.path.join(assets_directory, 'tensors.json')
+    with open(json_path, 'w+') as tensors_json_file:
+      # Write the op name to a JSON file that the text plugin later uses to
+      # determine the tag names of tensors to fetch.
+      tensors_json_file.write(json.dumps([plugin_asset_summary.op.name]))
+
+    run_name = 'fry'
+    subdir = os.path.join(self.logdir, run_name)
+    with test_util.FileWriterCache.get(subdir) as writer:
+      writer.add_graph(sess.graph)
+
+      summ = sess.run(plugin_asset_summary)
+      writer.add_summary(summ)
+
+  def testIndex(self):
+    index = self.plugin.index_impl()
+    self.assertItemsEqual(['fry'], index.keys())
+    # The summary made via plugin assets (the old method being phased out) is
+    # only available for run 'fry'.
+    self.assertItemsEqual(['old_plugin_asset_summary'],
+                          index['fry'])
+
+  def testText(self):
+    fry = self.plugin.text_impl('fry', 'old_plugin_asset_summary')
+    self.assertEqual(len(fry), 1)
+    self.assertEqual(fry[0]['step'], 0)
+    self.assertEqual(fry[0]['text'], u'<p>I am deprecated.</p>')
+
 
 
 if __name__ == '__main__':