Add portable randn kernel implementation

GregoryComer · GregoryComer · commit 6292a33c9663 · 2025-05-30T13:54:47.000-07:00
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
@@ -317,6 +317,8 @@
 
 - op: rand.out
 
+- op: randn.out
+
 - op: reciprocal.out
 
 - op: relu.out
diff --git a/kernels/portable/cpu/op_randn.cpp b/kernels/portable/cpu/op_randn.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <c10/util/irange.h>
+
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+#include <random>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using executorch::aten::IntArrayRef;
+using Tensor = executorch::aten::Tensor;
+using ScalarType = executorch::aten::ScalarType;
+
+Tensor&
+randn_out(KernelRuntimeContext& ctx, const IntArrayRef sizes, Tensor& out) {
+  (void)ctx;
+
+  std::mt19937 gen((std::random_device())());
+  std::normal_distribution<double> dist(0.0, 1.0);
+
+  // Resize for dynamic shape
+  ET_KERNEL_CHECK_MSG(
+      ctx,
+      resize_tensor(out, sizes) == Error::Ok,
+      InvalidArgument,
+      out,
+      "Failed to resize output tensor.");
+
+  ET_SWITCH_FLOATHBF16_TYPES(out.scalar_type(), ctx, "randn.out", CTYPE, [&] {
+    auto data_out = out.mutable_data_ptr<CTYPE>();
+    for (const auto i : c10::irange(out.numel())) {
+      data_out[i] = static_cast<CTYPE>(dist(gen));
+    }
+  });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -717,6 +717,11 @@
   kernels:
     - arg_meta: null
       kernel_name: torch::executor::rand_out
+- op: randn.out
+
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::randn_out
   tags: nondeterministic_seeded
 
 - op: reciprocal.out
diff --git a/kernels/test/CMakeLists.txt b/kernels/test/CMakeLists.txt
@@ -198,6 +198,7 @@ set(all_test_sources
     "op_pixel_shuffle_test.cpp"
     "op_prod_test.cpp"
     "op_rand_test.cpp"
+    "op_randn_test.cpp"
     "op_reciprocal_test.cpp"
     "op_relu_test.cpp"
     "op_remainder_test.cpp"
diff --git a/kernels/test/op_randn_test.cpp b/kernels/test/op_randn_test.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <c10/util/irange.h>
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <numeric>
+
+using executorch::aten::IntArrayRef;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using torch::executor::testing::TensorFactory;
+
+class OpRandnTest : public OperatorTest {
+ protected:
+  void op_randn_out(const IntArrayRef sizes, Tensor& out) {
+    torch::executor::aten::randn_outf(context_, sizes, out);
+  }
+
+  template <typename CTYPE, ScalarType DTYPE>
+  void test_randn(std::vector<int64_t>& sizes) {
+    TensorFactory<DTYPE> tf;
+
+    // Tensor factory wants int32 scales, op kernel wants int64.
+    std::vector<int32_t> sizes_i32;
+    std::transform(
+        sizes.begin(),
+        sizes.end(),
+        std::back_inserter(sizes_i32),
+        [](int64_t s) { return static_cast<int32_t>(s); });
+    Tensor out = tf.zeros(sizes_i32);
+
+    IntArrayRef sizes_ref(sizes.data(), sizes.size());
+    op_randn_out(sizes_ref, out);
+
+    // Check mean and standard deviation. To avoid flaky CI, test pretty
+    // loosely.
+    auto out_data = out.const_data_ptr<CTYPE>();
+    double mean =
+        std::accumulate(
+            out_data,
+            out_data + out.numel(),
+            0.0,
+            [](double acc, CTYPE n) { return acc + static_cast<double>(n); }) /
+        out.numel();
+    double var = std::accumulate(
+                     out_data,
+                     out_data + out.numel(),
+                     0.0,
+                     [=](double acc, CTYPE n) {
+                       return acc + std::pow(static_cast<double>(n) - mean, 2);
+                     }) /
+        out.numel();
+    auto stdev = std::sqrt(var);
+
+    // These are very rough thresholds. A better test implementation would
+    // probably do a proper statistical test to compare the generated empirical
+    // data to the reference distribution, but this should do.
+    EXPECT_LE(std::abs(mean), 5.0 / std::sqrt(out.numel()));
+    EXPECT_LE(std::abs(stdev - 1.0), 0.1);
+    EXPECT_GT(stdev, 0);
+  }
+};
+
+TEST_F(OpRandnTest, SmokeTest) {
+  std::vector<int64_t> sizes = {2, 3, 4, 128};
+
+#define TEST_ENTRY(ctype, dtype) test_randn<ctype, ScalarType::dtype>(sizes);
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpRandnTest, Rank) {
+  std::vector<int64_t> sizes = {1024};
+
+  for (int64_t i = 0; i < 4; i++) {
+    sizes.push_back(i + 1);
+    test_randn<float, executorch::aten::ScalarType::Float>(sizes);
+  }
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
@@ -286,6 +286,7 @@ def define_common_targets():
     _common_op_test("op_pow_test", ["aten", "portable"])
     _common_op_test("op_prod_test", ["aten", "portable"])
     _common_op_test("op_rand_test", ["aten", "portable"])
+    _common_op_test("op_randn_test", ["aten", "portable"])
     _common_op_test("op_reciprocal_test", ["aten", "portable"])
     _common_op_test("op_relu_test", ["aten", "portable"])
     _common_op_test("op_remainder_test", ["aten", "portable"])
diff --git a/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -981,6 +981,14 @@ ATEN_OPS = (
             "//executorch/runtime/core/exec_aten/util:tensor_util",
         ]
     ),
+    op_target(
+        name = "op_randn",
+        deps = [
+            ":scalar_utils",
+            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
+            "//executorch/runtime/core/exec_aten/util:tensor_util",
+        ]
+    ),
     op_target(
         name = "op_reciprocal",
         deps = [