add metric to log model output kwargs (#2279)

iamzainhuda · PaulZhang12 · commit 592fd668a449 · 2024-08-19T15:02:25.000Z
Summary: Pull Request resolved: #2279 Differential Revision: D60986155 fbshipit-source-id: 25d389c7acbab81f43726e9991ff8ddd9a34ddb4
diff --git a/torchrec/metrics/metric_module.py b/torchrec/metrics/metric_module.py
@@ -41,6 +41,7 @@
 from torchrec.metrics.multiclass_recall import MulticlassRecallMetric
 from torchrec.metrics.ndcg import NDCGMetric
 from torchrec.metrics.ne import NEMetric
+from torchrec.metrics.output import OutputMetric
 from torchrec.metrics.precision import PrecisionMetric
 from torchrec.metrics.rauc import RAUCMetric
 from torchrec.metrics.rec_metric import RecMetric, RecMetricList
@@ -80,6 +81,7 @@
     RecMetricEnum.RECALL: RecallMetric,
     RecMetricEnum.SERVING_NE: ServingNEMetric,
     RecMetricEnum.SERVING_CALIBRATION: ServingCalibrationMetric,
+    RecMetricEnum.OUTPUT: OutputMetric,
 }
 
 
diff --git a/torchrec/metrics/metrics_config.py b/torchrec/metrics/metrics_config.py
@@ -41,6 +41,7 @@ class RecMetricEnum(RecMetricEnumBase):
     RECALL = "recall"
     SERVING_NE = "serving_ne"
     SERVING_CALIBRATION = "serving_calibration"
+    OUTPUT = "output"
 
 
 @dataclass(unsafe_hash=True, eq=True)
diff --git a/torchrec/metrics/metrics_namespace.py b/torchrec/metrics/metrics_namespace.py
@@ -63,6 +63,7 @@ class MetricName(MetricNameBase):
     NDCG = "ndcg"
     XAUC = "xauc"
     SCALAR = "scalar"
+    OUTPUT = "output"
 
     TOTAL_POSITIVE_EXAMPLES = "total_positive_examples"
     TOTAL_NEGATIVE_EXAMPLES = "total_negative_examples"
@@ -112,6 +113,8 @@ class MetricNamespace(MetricNamespaceBase):
     SERVING_NE = "serving_ne"
     SERVING_CALIBRATION = "serving_calibration"
 
+    OUTPUT = "output"
+
 
 class MetricPrefix(StrValueMixin, Enum):
     DEFAULT = ""
diff --git a/torchrec/metrics/output.py b/torchrec/metrics/output.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from typing import Any, Dict, List, Optional, Type
+
+import torch
+from torch import distributed as dist
+
+from torchrec.metrics.metrics_namespace import MetricName, MetricNamespace, MetricPrefix
+from torchrec.metrics.rec_metric import (
+    MetricComputationReport,
+    RecComputeMode,
+    RecMetric,
+    RecMetricComputation,
+    RecMetricException,
+    RecTaskInfo,
+)
+
+
+class OutputMetricComputation(RecMetricComputation):
+    """
+    Metric that logs whatever model outputs are given in kwargs
+    TODO - make this generic metric that can be used for any model output tensor
+    """
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self._add_state(
+            "latest_imp",
+            torch.zeros(self._n_tasks, dtype=torch.double),
+            add_window_state=False,
+            dist_reduce_fx="sum",
+            persistent=False,
+        )
+        self._add_state(
+            "total_latest_imp",
+            torch.zeros(self._n_tasks, dtype=torch.double),
+            add_window_state=False,
+            dist_reduce_fx="sum",
+            persistent=False,
+        )
+
+    def update(
+        self,
+        *,
+        predictions: Optional[torch.Tensor],
+        labels: torch.Tensor,
+        weights: Optional[torch.Tensor],
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        required_list = ["latest_imp", "total_latest_imp"]
+        if "required_inputs" not in kwargs or not all(
+            item in kwargs["required_inputs"] for item in required_list
+        ):
+            raise RecMetricException(
+                "OutputMetricComputation requires 'latest_imp' and 'total_latest_imp' in kwargs"
+            )
+        states = {
+            "latest_imp": kwargs["required_inputs"]["latest_imp"]
+            .float()
+            .mean(dim=-1, dtype=torch.double),
+            "total_latest_imp": kwargs["required_inputs"]["total_latest_imp"]
+            .float()
+            .mean(dim=-1, dtype=torch.double),
+        }
+
+        for state_name, state_value in states.items():
+            setattr(self, state_name, state_value)
+
+    def _compute(self) -> List[MetricComputationReport]:
+        return [
+            MetricComputationReport(
+                name=MetricName.OUTPUT,
+                metric_prefix=MetricPrefix.DEFAULT,
+                value=self.latest_imp,
+                description="_latest_imp",
+            ),
+            MetricComputationReport(
+                name=MetricName.OUTPUT,
+                metric_prefix=MetricPrefix.DEFAULT,
+                value=self.total_latest_imp,
+                description="_total_latest_imp",
+            ),
+        ]
+
+
+class OutputMetric(RecMetric):
+    _namespace: MetricNamespace = MetricNamespace.OUTPUT
+    _computation_class: Type[RecMetricComputation] = OutputMetricComputation
+
+    def __init__(
+        self,
+        world_size: int,
+        my_rank: int,
+        batch_size: int,
+        tasks: List[RecTaskInfo],
+        compute_mode: RecComputeMode = RecComputeMode.UNFUSED_TASKS_COMPUTATION,
+        window_size: int = 100,
+        fused_update_limit: int = 0,
+        compute_on_all_ranks: bool = False,
+        should_validate_update: bool = False,
+        process_group: Optional[dist.ProcessGroup] = None,
+        **kwargs: Dict[str, Any],
+    ) -> None:
+        super().__init__(
+            world_size=world_size,
+            my_rank=my_rank,
+            batch_size=batch_size,
+            tasks=tasks,
+            compute_mode=compute_mode,
+            window_size=window_size,
+            fused_update_limit=fused_update_limit,
+            compute_on_all_ranks=compute_on_all_ranks,
+            should_validate_update=should_validate_update,
+            process_group=process_group,
+            **kwargs,
+        )
+        self._required_inputs.add("latest_imp")
+        self._required_inputs.add("total_latest_imp")
diff --git a/torchrec/metrics/rec_metric.py b/torchrec/metrics/rec_metric.py
@@ -623,8 +623,13 @@ def _update(
                         else:
                             continue
                     if "required_inputs" in kwargs:
+                        # Expand scalars to match the shape of the predictions
                         kwargs["required_inputs"] = {
-                            k: v.view(task_labels.size())
+                            k: (
+                                v.view(task_labels.size())
+                                if v.numel() > 1
+                                else v.expand(task_labels.size())
+                            )
                             for k, v in kwargs["required_inputs"].items()
                         }
                     metric_.update(