Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,21 @@ The following default metrics are exposed:
##### Common metrics

- `flagsmith_build_info`: Has the labels `version` and `ci_commit_sha`.
- `http_server_request_duration_seconds`: Histogram labeled with `method`, `route`, and `response_status`.
- `http_server_requests_total`: Counter labeled with `method`, `route`, and `response_status`.
- `task_processor_enqueued_tasks_total`: Counter labeled with `task_identifier`.
- `flagsmith_http_server_request_duration_seconds`: Histogram labeled with `method`, `route`, and `response_status`.
- `flagsmith_http_server_requests_total`: Counter labeled with `method`, `route`, and `response_status`.
- `flagsmith_task_processor_enqueued_tasks_total`: Counter labeled with `task_identifier`.

##### Task Processor metrics

- `task_processor_finished_tasks_total`: Counter labeled with `task_identifier` and `result` (`"success"`, `"failure"`).
- `task_processor_task_duration_seconds`: Histogram labeled with `task_identifier` and `result` (`"success"`, `"failure"`).
- `flagsmith_task_processor_finished_tasks_total`: Counter labeled with `task_identifier` and `result` (`"success"`, `"failure"`).
- `flagsmith_task_processor_task_duration_seconds`: Histogram labeled with `task_identifier` and `result` (`"success"`, `"failure"`).

##### Guidelines

Try to come up with meaningful metrics to cover your feature with when developing it. Refer to [Prometheus best practices][1] when naming your metric and labels.

As a reasonable default, Flagsmith metrics are expected to be namespaced with the `"flagsmith_"` prefix.

Define your metrics in a `metrics.py` module of your Django application — see [example][2]. Contrary to Prometheus Python client examples and documentation, please name a metric variable exactly as your metric name.

It's generally a good idea to allow users to define histogram buckets of their own. Flagsmith accepts a `PROMETHEUS_HISTOGRAM_BUCKETS` setting so users can customise their buckets. To honour the setting, use the `common.prometheus.Histogram` class when defining your histograms. When using `prometheus_client.Histogram` directly, please expose a dedicated setting like so:
Expand All @@ -84,8 +86,8 @@ It's generally a good idea to allow users to define histogram buckets of their o
import prometheus_client
from django.conf import settings

distance_from_earth_au = prometheus.Histogram(
"distance_from_earth_au",
flagsmith_distance_from_earth_au = prometheus.Histogram(
"flagsmith_distance_from_earth_au",
"Distance from Earth in astronomical units",
buckets=settings.DISTANCE_FROM_EARTH_AU_HISTOGRAM_BUCKETS,
)
Expand Down
4 changes: 2 additions & 2 deletions src/common/gunicorn/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ def access(
"method": environ.get("REQUEST_METHOD") or "",
"response_status": resp.status_code,
}
metrics.http_server_request_duration_seconds.labels(**labels).observe(
metrics.flagsmith_http_server_request_duration_seconds.labels(**labels).observe(
duration_seconds
)
metrics.http_server_requests_total.labels(**labels).inc()
metrics.flagsmith_http_server_requests_total.labels(**labels).inc()


class GunicornJsonCapableLogger(PrometheusGunicornLogger):
Expand Down
8 changes: 4 additions & 4 deletions src/common/gunicorn/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from common.prometheus import Histogram

http_server_requests_total = prometheus_client.Counter(
"http_server_requests_total",
flagsmith_http_server_requests_total = prometheus_client.Counter(
"flagsmith_http_server_requests_total",
"Total number of HTTP requests",
["route", "method", "response_status"],
)
http_server_request_duration_seconds = Histogram(
"http_server_request_duration_seconds",
flagsmith_http_server_request_duration_seconds = Histogram(
"flagsmith_http_server_request_duration_seconds",
"HTTP request duration in seconds",
["route", "method", "response_status"],
)
2 changes: 1 addition & 1 deletion src/task_processor/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def delay(
self.run_in_thread(args=args, kwargs=kwargs)
else:
logger.debug("Creating task for function '%s'...", task_identifier)
metrics.task_processor_enqueued_tasks_total.labels(
metrics.flagsmith_task_processor_enqueued_tasks_total.labels(
task_identifier=task_identifier
).inc()
try:
Expand Down
12 changes: 6 additions & 6 deletions src/task_processor/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@

from common.prometheus import Histogram

task_processor_enqueued_tasks_total = prometheus_client.Counter(
"task_processor_enqueued_tasks_total",
flagsmith_task_processor_enqueued_tasks_total = prometheus_client.Counter(
"flagsmith_task_processor_enqueued_tasks_total",
"Total number of enqueued tasks",
["task_identifier"],
)

if settings.TASK_PROCESSOR_MODE:
task_processor_finished_tasks_total = prometheus_client.Counter(
"task_processor_finished_tasks_total",
flagsmith_task_processor_finished_tasks_total = prometheus_client.Counter(
"flagsmith_task_processor_finished_tasks_total",
"Total number of finished tasks",
["task_identifier", "result"],
)
task_processor_task_duration_seconds = Histogram(
"task_processor_task_duration_seconds",
flagsmith_task_processor_task_duration_seconds = Histogram(
"flagsmith_task_processor_task_duration_seconds",
"Task processor task duration in seconds",
["task_identifier", "result"],
)
4 changes: 2 additions & 2 deletions src/task_processor/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _run_task(
)

ctx = ExitStack()
timer = metrics.task_processor_task_duration_seconds.time()
timer = metrics.flagsmith_task_processor_task_duration_seconds.time()
ctx.enter_context(timer)

task_identifier = task.task_identifier
Expand Down Expand Up @@ -158,7 +158,7 @@ def _run_task(
) # type: ignore[no-untyped-call]
ctx.close()

metrics.task_processor_finished_tasks_total.labels(
metrics.flagsmith_task_processor_finished_tasks_total.labels(
task_identifier=task_identifier,
result=result_label_value,
).inc()
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/common/gunicorn/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,12 @@ def test_gunicorn_prometheus_gunicorn_logger__expected_metrics(

# Then
assert_metric(
name="http_server_requests_total",
name="flagsmith_http_server_requests_total",
value=1.0,
labels={"method": "GET", "route": "^health", "response_status": "200"},
)
assert_metric(
name="http_server_request_duration_seconds_sum",
name="flagsmith_http_server_request_duration_seconds_sum",
value=0.101,
labels={"method": "GET", "route": "^health", "response_status": "200"},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def my_function(*args: typing.Any, **kwargs: typing.Any) -> None:

# Then
assert_metric(
name="task_processor_enqueued_tasks_total",
name="flagsmith_task_processor_enqueued_tasks_total",
value=1.0,
labels={"task_identifier": "test_unit_task_processor_decorators.my_function"},
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -603,31 +603,31 @@ def test_run_tasks__expected_metrics(

# Then
assert_metric(
name="task_processor_finished_tasks_total",
name="flagsmith_task_processor_finished_tasks_total",
value=1.0,
labels={
"task_identifier": dummy_task_identifier,
"result": "success",
},
)
assert_metric(
name="task_processor_finished_tasks_total",
name="flagsmith_task_processor_finished_tasks_total",
value=1.0,
labels={
"task_identifier": raise_exception_task_identifier,
"result": "failure",
},
)
assert_metric(
name="task_processor_task_duration_seconds",
name="flagsmith_task_processor_task_duration_seconds",
value=mocker.ANY,
labels={
"task_identifier": dummy_task_identifier,
"result": "success",
},
)
assert_metric(
name="task_processor_task_duration_seconds",
name="flagsmith_task_processor_task_duration_seconds",
value=mocker.ANY,
labels={
"task_identifier": raise_exception_task_identifier,
Expand Down