Skip to content

feat: initial work to set-up opentelemetry tracing #1378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions lib/realtime_web/controllers/broadcast_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ defmodule RealtimeWeb.BroadcastController do
)

def broadcast(%{assigns: %{tenant: tenant}} = conn, attrs) do
OpenTelemetry.Tracer.set_attributes(external_id: tenant.external_id)

with :ok <- BatchBroadcast.broadcast(conn, tenant, attrs) do
send_resp(conn, :accepted, "")
end
Expand Down
26 changes: 10 additions & 16 deletions lib/realtime_web/controllers/tenant_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ defmodule RealtimeWeb.TenantController do
require Logger
import Realtime.Logs

alias OpenTelemetry.Tracer
alias Realtime.Api
alias Realtime.Api.Tenant
alias Realtime.Database
Expand All @@ -26,6 +25,8 @@ defmodule RealtimeWeb.TenantController do

action_fallback(RealtimeWeb.FallbackController)

plug :set_observability_attributes when action in [:show, :edit, :update, :delete, :reload, :health]
Copy link
Member Author

@edgurgel edgurgel May 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Ziinc I ended up using a plug here as we don't need for all actions. I thought about moving this to the router but it would essentially need to look for tenant_id as path_params on all the routes which I don't know if that's what we want 🤷

This felt like a good compromise?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the plug can handle the situation where there isn't a tenant_id in path param with a fallthrough so you don't have to explicitly state the actions, that would make it less verbose
lgtm


operation(:index,
summary: "List tenants",
parameters: [
Expand Down Expand Up @@ -70,9 +71,6 @@ defmodule RealtimeWeb.TenantController do
)

def show(conn, %{"tenant_id" => id}) do
Logger.metadata(external_id: id, project: id)
Tracer.set_attributes(external_id: id)

tenant = Api.get_tenant_by_external_id(id)

case tenant do
Expand Down Expand Up @@ -136,8 +134,6 @@ defmodule RealtimeWeb.TenantController do
)

def update(conn, %{"tenant_id" => external_id, "tenant" => tenant_params}) do
Logger.metadata(external_id: external_id, project: external_id)
Tracer.set_attributes(external_id: external_id)
tenant = Api.get_tenant_by_external_id(external_id)

case tenant do
Expand All @@ -153,7 +149,6 @@ defmodule RealtimeWeb.TenantController do
with {:ok, %Tenant{} = tenant} <- Api.create_tenant(%{tenant_params | "extensions" => extensions}),
res when res in [:ok, :noop] <- Migrations.run_migrations(tenant) do
Logger.metadata(external_id: tenant.external_id, project: tenant.external_id)
Tracer.set_attributes(external_id: tenant.external_id)

conn
|> put_status(:created)
Expand Down Expand Up @@ -192,9 +187,6 @@ defmodule RealtimeWeb.TenantController do
)

def delete(conn, %{"tenant_id" => tenant_id}) do
Logger.metadata(external_id: tenant_id, project: tenant_id)
Tracer.set_attributes(external_id: tenant_id)

stop_all_timeout = Enum.count(PostgresCdc.available_drivers()) * 1_000

with %Tenant{} = tenant <- Api.get_tenant_by_external_id(tenant_id, :primary),
Expand Down Expand Up @@ -236,9 +228,6 @@ defmodule RealtimeWeb.TenantController do
)

def reload(conn, %{"tenant_id" => tenant_id}) do
Logger.metadata(external_id: tenant_id, project: tenant_id)
Tracer.set_attributes(external_id: tenant_id)

case Tenants.get_tenant_by_external_id(tenant_id) do
nil ->
log_error("TenantNotFound", "Tenant not found")
Expand Down Expand Up @@ -274,9 +263,6 @@ defmodule RealtimeWeb.TenantController do
)

def health(conn, %{"tenant_id" => tenant_id}) do
Logger.metadata(external_id: tenant_id, project: tenant_id)
Tracer.set_attributes(external_id: tenant_id)

case Tenants.health_check(tenant_id) do
{:ok, response} ->
json(conn, %{data: response})
Expand All @@ -292,4 +278,12 @@ defmodule RealtimeWeb.TenantController do
|> render("not_found.json", tenant: nil)
end
end

defp set_observability_attributes(conn, _opts) do
tenant_id = conn.path_params["tenant_id"]
OpenTelemetry.Tracer.set_attributes(external_id: tenant_id)
Logger.metadata(external_id: tenant_id, project: tenant_id)

conn
end
end
1 change: 1 addition & 0 deletions lib/realtime_web/plugs/assign_tenant.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ defmodule RealtimeWeb.Plugs.AssignTenant do
with {:ok, external_id} <- Database.get_external_id(host),
%Tenant{} = tenant <- Api.get_tenant_by_external_id(external_id) do
Logger.metadata(external_id: external_id, project: external_id)
OpenTelemetry.Tracer.set_attributes(external_id: external_id)

tenant =
tenant
Expand Down
1 change: 1 addition & 0 deletions test/integration/tracing_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ defmodule Realtime.Integration.TracingTest do
@span_parent_id Integer.parse(@parent_id, 16) |> elem(0)

# This is doing a blackbox approach because tracing is not captured with normal Phoenix controller tests
# We need cowboy, endpoint and router to trigger their telemetry events

test "traces basic HTTP request with phoenix and cowboy information" do
:otel_simple_processor.set_exporter(:otel_exporter_pid, self())
Expand Down
112 changes: 107 additions & 5 deletions test/realtime_web/controllers/tenant_controller_test.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
defmodule RealtimeWeb.TenantControllerTest do
# Can't run async true because under the hood Cachex is used and it doesn't see Ecto.Sandbox
# Also using global otel_simple_processor
use RealtimeWeb.ConnCase, async: false

require OpenTelemetry.Tracer, as: Tracer

alias Realtime.Api.Tenant
alias Realtime.Crypto
alias Realtime.Database
Expand All @@ -23,6 +26,8 @@ defmodule RealtimeWeb.TenantControllerTest do
|> put_req_header("accept", "application/json")
|> put_req_header("authorization", "Bearer #{jwt}")

:otel_simple_processor.set_exporter(:otel_exporter_pid, self())

{:ok, conn: conn}
end

Expand All @@ -45,6 +50,25 @@ defmodule RealtimeWeb.TenantControllerTest do
response = json_response(conn, 404)
assert response == %{"error" => "not found"}
end

test "sets appropriate observability metadata", %{conn: conn, tenant: tenant} do
external_id = tenant.external_id

# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
get(conn, ~p"/api/tenants/#{external_id}")

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "GET /api/tenants/:tenant_id", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end

describe "create tenant with post" do
Expand Down Expand Up @@ -161,14 +185,36 @@ defmodule RealtimeWeb.TenantControllerTest do
conn = put(conn, ~p"/api/tenants/external_id", tenant: default_tenant_attrs(5000))
assert response(conn, 403)
end

test "sets appropriate observability metadata", %{conn: conn, tenant: tenant} do
external_id = tenant.external_id
port = Database.from_tenant(tenant, "realtime_test", :stop).port
attrs = default_tenant_attrs(port)

# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
put(conn, ~p"/api/tenants/#{external_id}", tenant: attrs)

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "PUT /api/tenants/:tenant_id", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end

describe "delete tenant" do
setup [:with_tenant]

test "deletes chosen tenant", %{conn: conn, tenant: tenant} do
{:ok, _pid} = Realtime.Tenants.Connect.lookup_or_start_connection(tenant.external_id)
Process.sleep(500)
{:ok, _pid} = Connect.lookup_or_start_connection(tenant.external_id)
assert Connect.ready?(tenant.external_id)

assert Cache.get_tenant_by_external_id(tenant.external_id)
{:ok, db_conn} = Database.connect(tenant, "realtime_test", :stop)

Expand Down Expand Up @@ -197,6 +243,25 @@ defmodule RealtimeWeb.TenantControllerTest do
conn = delete(conn, ~p"/api/tenants/#{tenant.external_id}")
assert response(conn, 403) == ""
end

test "sets appropriate observability metadata", %{conn: conn, tenant: tenant} do
external_id = tenant.external_id

# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
delete(conn, ~p"/api/tenants/#{external_id}")

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "DELETE /api/tenants/:tenant_id", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end

describe "reload tenant" do
Expand All @@ -212,10 +277,29 @@ defmodule RealtimeWeb.TenantControllerTest do
assert status == 404
end

test "returns 404 when jwt is invalid", %{conn: conn, tenant: tenant} do
test "returns 403 when jwt is invalid", %{conn: conn, tenant: tenant} do
conn = put_req_header(conn, "authorization", "Bearer potato")
conn = delete(conn, ~p"/api/tenants/#{tenant.external_id}/reload")
assert json_response(conn, 404) == "Not Found"
conn = post(conn, ~p"/api/tenants/#{tenant.external_id}/reload")
assert response(conn, 403) == ""
end

test "sets appropriate observability metadata", %{conn: conn, tenant: tenant} do
external_id = tenant.external_id

# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
post(conn, ~p"/api/tenants/#{tenant.external_id}/reload")

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "POST /api/tenants/:tenant_id/reload", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end

Expand Down Expand Up @@ -316,6 +400,24 @@ defmodule RealtimeWeb.TenantControllerTest do

assert %{"healthy" => true, "db_connected" => false, "connected_cluster" => 0} = data
end

test "sets appropriate observability metadata", %{conn: conn, tenant: tenant} do
external_id = tenant.external_id
# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
get(conn, ~p"/api/tenants/#{tenant.external_id}/health")

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "GET /api/tenants/:tenant_id/health", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end

defp default_tenant_attrs(port) do
Expand Down
29 changes: 28 additions & 1 deletion test/realtime_web/plugs/assign_tenant_test.exs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
defmodule RealtimeWeb.Plugs.AssignTenantTest do
use RealtimeWeb.ConnCase
# Use of global otel_simple_processor
use RealtimeWeb.ConnCase, async: false

require OpenTelemetry.Tracer, as: Tracer

alias Realtime.Api

Expand Down Expand Up @@ -76,4 +79,28 @@ defmodule RealtimeWeb.Plugs.AssignTenantTest do

assert conn.status == 200
end

test "sets appropriate observability metadata", %{conn: conn} do
external_id = "localhost"
tenant_fixture(%{external_id: external_id})

:otel_simple_processor.set_exporter(:otel_exporter_pid, self())

# opentelemetry_phoenix expects to be a child of the originating cowboy process hence the Task here :shrug:
Tracer.with_span "test" do
Task.async(fn ->
conn
|> Map.put(:host, "localhost.localhost.com")
|> get(Routes.ping_path(conn, :ping))

assert Logger.metadata()[:external_id] == external_id
assert Logger.metadata()[:project] == external_id
end)
|> Task.await()
end

assert_receive {:span, span(name: "GET /api/ping", attributes: attributes)}

assert attributes(map: %{external_id: ^external_id}) = attributes
end
end
Loading