diff --git a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml index c1e6ebef59..da5ae9bc63 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_pgadmins.yaml @@ -2301,6 +2301,13 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set perDBMetricTargets: description: User defined databases to target for default per-db metrics diff --git a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml index 86ae261dd4..18cf2b5e85 100644 --- a/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml +++ b/config/crd/bases/postgres-operator.crunchydata.com_postgresclusters.yaml @@ -12146,6 +12146,13 @@ spec: type: string type: array type: object + exporters: + description: The names of exporters that should send metrics. + items: + type: string + minItems: 1 + type: array + x-kubernetes-list-type: set perDBMetricTargets: description: User defined databases to target for default per-db metrics diff --git a/internal/collector/helpers_test.go b/internal/collector/helpers_test.go index 7f1e277e9b..1f174ebcda 100644 --- a/internal/collector/helpers_test.go +++ b/internal/collector/helpers_test.go @@ -23,6 +23,9 @@ func testInstrumentationSpec() *v1beta1.InstrumentationSpec { Logs: &v1beta1.InstrumentationLogsSpec{ Exporters: []string{"googlecloud"}, }, + Metrics: &v1beta1.InstrumentationMetricsSpec{ + Exporters: []string{"googlecloud"}, + }, } return spec.DeepCopy() diff --git a/internal/collector/naming.go b/internal/collector/naming.go index 801d61e8ce..c12ed89ebc 100644 --- a/internal/collector/naming.go +++ b/internal/collector/naming.go @@ -9,7 +9,7 @@ const DebugExporter = "debug" const LogsBatchProcessor = "batch/logs" const OneSecondBatchProcessor = "batch/1s" const SubSecondBatchProcessor = "batch/200ms" -const Prometheus = "prometheus" +const Prometheus = "prometheus/cpk-monitoring" const PrometheusPort = 9187 const PGBouncerMetrics = "metrics/pgbouncer" const PostgresMetrics = "metrics/postgres" diff --git a/internal/collector/patroni.go b/internal/collector/patroni.go index 2e0edb0d15..ea11c7a2f9 100644 --- a/internal/collector/patroni.go +++ b/internal/collector/patroni.go @@ -168,6 +168,14 @@ func EnablePatroniMetrics(ctx context.Context, }, } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline outConfig.Pipelines[PatroniMetrics] = Pipeline{ Receivers: []ComponentID{Prometheus}, @@ -175,7 +183,7 @@ func EnablePatroniMetrics(ctx context.Context, SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/patroni_test.go b/internal/collector/patroni_test.go index 20dd8096eb..1626f92256 100644 --- a/internal/collector/patroni_test.go +++ b/internal/collector/patroni_test.go @@ -16,7 +16,7 @@ import ( ) func TestEnablePatroniLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -26,9 +26,7 @@ func TestEnablePatroniLogging(t *testing.T) { config := NewConfig(nil) cluster := new(v1beta1.PostgresCluster) require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) EnablePatroniLogging(ctx, cluster, config) @@ -216,3 +214,137 @@ service: `) }) } + +func TestEnablePatroniMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePatroniMetrics(ctx, cluster, config) + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + prometheus/cpk-monitoring: + config: + scrape_configs: + - job_name: patroni + scheme: https + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8008 + tls_config: + insecure_skip_verify: true +service: + extensions: [] + pipelines: + metrics/patroni: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - prometheus/cpk-monitoring +`) + + }) +} diff --git a/internal/collector/pgadmin_test.go b/internal/collector/pgadmin_test.go index b856baab0c..2c1a4eb05a 100644 --- a/internal/collector/pgadmin_test.go +++ b/internal/collector/pgadmin_test.go @@ -20,7 +20,7 @@ import ( ) func TestEnablePgAdminLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -31,9 +31,7 @@ func TestEnablePgAdminLogging(t *testing.T) { configmap := new(corev1.ConfigMap) initialize.Map(&configmap.Data) var instrumentation *v1beta1.InstrumentationSpec - require.UnmarshalInto(t, &instrumentation, `{ - logs: { retentionPeriod: 12h }, - }`) + require.UnmarshalInto(t, &instrumentation, `{}`) err := collector.EnablePgAdminLogging(ctx, instrumentation, configmap) assert.NilError(t, err) diff --git a/internal/collector/pgbackrest_test.go b/internal/collector/pgbackrest_test.go index 66e180ef1f..911f0f0909 100644 --- a/internal/collector/pgbackrest_test.go +++ b/internal/collector/pgbackrest_test.go @@ -16,7 +16,7 @@ import ( ) func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -29,9 +29,7 @@ func TestNewConfigForPgBackrestRepoHostPod(t *testing.T) { }, } var instrumentation *v1beta1.InstrumentationSpec - require.UnmarshalInto(t, &instrumentation, `{ - logs: { retentionPeriod: 12h }, - }`) + require.UnmarshalInto(t, &instrumentation, `{}`) config := NewConfigForPgBackrestRepoHostPod(ctx, instrumentation, repos) diff --git a/internal/collector/pgbouncer.go b/internal/collector/pgbouncer.go index 700b9a3725..2e2bb99c56 100644 --- a/internal/collector/pgbouncer.go +++ b/internal/collector/pgbouncer.go @@ -187,6 +187,14 @@ func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresClus "queries": slices.Clone(pgBouncerMetricsQueries), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PGBouncerMetrics] = Pipeline{ Receivers: []ComponentID{SqlQuery}, @@ -194,7 +202,7 @@ func EnablePgBouncerMetrics(ctx context.Context, inCluster *v1beta1.PostgresClus SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } } } diff --git a/internal/collector/pgbouncer_test.go b/internal/collector/pgbouncer_test.go index cbd69cbd03..1589c27079 100644 --- a/internal/collector/pgbouncer_test.go +++ b/internal/collector/pgbouncer_test.go @@ -16,7 +16,7 @@ import ( ) func TestEnablePgBouncerLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -26,9 +26,7 @@ func TestEnablePgBouncerLogging(t *testing.T) { config := NewConfig(nil) cluster := new(v1beta1.PostgresCluster) require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) EnablePgBouncerLogging(ctx, cluster, config) @@ -214,3 +212,361 @@ service: `) }) } + +func TestEnablePgBouncerMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(nil) + cluster := new(v1beta1.PostgresCluster) + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + config := NewConfig(testInstrumentationSpec()) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.Instrumentation = testInstrumentationSpec() + + EnablePgBouncerMetrics(ctx, cluster, config, "test_user") + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery: + datasource: host=localhost dbname=pgbouncer port=5432 user=test_user password=${env:PGPASSWORD} + driver: postgres + queries: + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: Current waiting time in seconds + metric_name: ccp_pgbouncer_clients_wait_seconds + value_column: wait + sql: SHOW CLIENTS; + - metrics: + - attribute_columns: + - name + - port + - database + description: Maximum number of server connections + metric_name: ccp_pgbouncer_databases_pool_size + value_column: pool_size + - attribute_columns: + - name + - port + - database + description: Minimum number of server connections + metric_name: ccp_pgbouncer_databases_min_pool_size + value_column: min_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of additional connections for this database + metric_name: ccp_pgbouncer_databases_reserve_pool_size + value_column: reserve_pool_size + - attribute_columns: + - name + - port + - database + description: Maximum number of allowed connections for this database, as set + by max_db_connections, either globally or per database + metric_name: ccp_pgbouncer_databases_max_connections + value_column: max_connections + - attribute_columns: + - name + - port + - database + description: Current number of connections for this database + metric_name: ccp_pgbouncer_databases_current_connections + value_column: current_connections + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently paused, else 0 + metric_name: ccp_pgbouncer_databases_paused + value_column: paused + - attribute_columns: + - name + - port + - database + description: 1 if this database is currently disabled, else 0 + metric_name: ccp_pgbouncer_databases_disabled + value_column: disabled + sql: SHOW DATABASES; + - metrics: + - attribute_columns: + - list + description: Count of items registered with pgBouncer + metric_name: ccp_pgbouncer_lists_item_count + value_column: items + sql: SHOW LISTS; + - metrics: + - attribute_columns: + - database + - user + description: Client connections that are either linked to server connections + or are idle with no queries waiting to be processed + metric_name: ccp_pgbouncer_pools_client_active + value_column: cl_active + - attribute_columns: + - database + - user + description: Client connections that have sent queries but have not yet got + a server connection + metric_name: ccp_pgbouncer_pools_client_waiting + value_column: cl_waiting + - attribute_columns: + - database + - user + description: Server connections that are linked to a client + metric_name: ccp_pgbouncer_pools_server_active + value_column: sv_active + - attribute_columns: + - database + - user + description: Server connections that are unused and immediately usable for + client queries + metric_name: ccp_pgbouncer_pools_server_idle + value_column: sv_idle + - attribute_columns: + - database + - user + description: Server connections that have been idle for more than server_check_delay, + so they need server_check_query to run on them before they can be used again + metric_name: ccp_pgbouncer_pools_server_used + value_column: sv_used + sql: SHOW POOLS; + - metrics: + - attribute_columns: + - database + - user + - state + - application_name + - link + description: 1 if the connection will be closed as soon as possible, because + a configuration file reload or DNS update changed the connection information + or RECONNECT was issued + metric_name: ccp_pgbouncer_servers_close_needed + value_column: close_needed + sql: SHOW SERVERS; +service: + extensions: [] + pipelines: + metrics/pgbouncer: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery +`) + + }) +} diff --git a/internal/collector/postgres_metrics.go b/internal/collector/postgres_metrics.go index 072ec6987a..f3d5371cc6 100644 --- a/internal/collector/postgres_metrics.go +++ b/internal/collector/postgres_metrics.go @@ -171,6 +171,14 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust "queries": slices.Clone(fiveMinuteMetricsClone), } + // If there are exporters to be added to the metrics pipelines defined + // in the spec, add them to the pipeline. + exporters := []ComponentID{Prometheus} + if inCluster.Spec.Instrumentation.Metrics != nil && + inCluster.Spec.Instrumentation.Metrics.Exporters != nil { + exporters = append(exporters, inCluster.Spec.Instrumentation.Metrics.Exporters...) + } + // Add Metrics Pipeline config.Pipelines[PostgresMetrics] = Pipeline{ Receivers: []ComponentID{FiveSecondSqlQuery, FiveMinuteSqlQuery}, @@ -178,7 +186,7 @@ func EnablePostgresMetrics(ctx context.Context, inCluster *v1beta1.PostgresClust SubSecondBatchProcessor, CompactingProcessor, }, - Exporters: []ComponentID{Prometheus}, + Exporters: exporters, } // Add custom queries and per-db metrics if they are defined in the spec diff --git a/internal/collector/postgres_test.go b/internal/collector/postgres_test.go index 222b263e25..a36a827b3b 100644 --- a/internal/collector/postgres_test.go +++ b/internal/collector/postgres_test.go @@ -17,7 +17,7 @@ import ( ) func TestEnablePostgresLogging(t *testing.T) { - t.Run("NilInstrumentationSpec", func(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { gate := feature.NewGate() assert.NilError(t, gate.SetFromMap(map[string]bool{ feature.OpenTelemetryLogs: true, @@ -27,9 +27,7 @@ func TestEnablePostgresLogging(t *testing.T) { cluster := new(v1beta1.PostgresCluster) cluster.Spec.PostgresVersion = 99 require.UnmarshalInto(t, &cluster.Spec, `{ - instrumentation: { - logs: { retentionPeriod: 5h }, - }, + instrumentation: {} }`) config := NewConfig(nil) @@ -537,3 +535,137 @@ service: `) }) } + +func TestEnablePostgresMetrics(t *testing.T) { + t.Run("EmptyInstrumentationSpec", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + require.UnmarshalInto(t, &cluster.Spec, `{ + instrumentation: {} + }`) + + config := NewConfig(nil) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + }) + + t.Run("InstrumentationSpecDefined", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.OpenTelemetryMetrics: true, + })) + ctx := feature.NewContext(context.Background(), gate) + + cluster := new(v1beta1.PostgresCluster) + cluster.Spec.PostgresVersion = 99 + cluster.Spec.Instrumentation = testInstrumentationSpec() + + config := NewConfig(cluster.Spec.Instrumentation) + + EnablePostgresMetrics(ctx, cluster, config) + + // The queries aren't really needed for this test and sheer number of queries + // would make this file excessively long (and string formatting presented it's + // own formatting headaches), so I am removing them + config.Receivers["sqlquery/5s"] = nil + config.Receivers["sqlquery/300s"] = nil + + result, err := config.ToYAML() + assert.NilError(t, err) + assert.DeepEqual(t, result, `# Generated by postgres-operator. DO NOT EDIT. +# Your changes will not be saved. +exporters: + debug: + verbosity: detailed + googlecloud: + log: + default_log_name: opentelemetry.io/collector-exported-log + project: google-project-name + prometheus/cpk-monitoring: + endpoint: 0.0.0.0:9187 +extensions: {} +processors: + batch/1s: + timeout: 1s + batch/200ms: + timeout: 200ms + batch/logs: + send_batch_size: 8192 + timeout: 200ms + groupbyattrs/compact: {} + resourcedetection: + detectors: [] + override: false + timeout: 30s +receivers: + sqlquery/5s: null + sqlquery/300s: null +service: + extensions: [] + pipelines: + metrics/postgres: + exporters: + - prometheus/cpk-monitoring + - googlecloud + processors: + - batch/200ms + - groupbyattrs/compact + receivers: + - sqlquery/5s + - sqlquery/300s +`) + + }) +} diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go index dfefccd6de..7c90b6f65e 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/instrumentation_types.go @@ -117,6 +117,13 @@ type InstrumentationMetricsSpec struct { // +optional CustomQueries *InstrumentationCustomQueriesSpec `json:"customQueries,omitempty"` + // The names of exporters that should send metrics. + // --- + // +kubebuilder:validation:MinItems=1 + // +listType=set + // +optional + Exporters []string `json:"exporters,omitempty"` + // User defined databases to target for default per-db metrics // --- // +optional diff --git a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go index d25ac44d1e..747e363854 100644 --- a/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go +++ b/pkg/apis/postgres-operator.crunchydata.com/v1beta1/zz_generated.deepcopy.go @@ -570,6 +570,11 @@ func (in *InstrumentationMetricsSpec) DeepCopyInto(out *InstrumentationMetricsSp *out = new(InstrumentationCustomQueriesSpec) (*in).DeepCopyInto(*out) } + if in.Exporters != nil { + in, out := &in.Exporters, &out.Exporters + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.PerDBMetricTargets != nil { in, out := &in.PerDBMetricTargets, &out.PerDBMetricTargets *out = make([]string, len(*in)) diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml deleted file mode 100644 index dc85f9707c..0000000000 --- a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-exporter.yaml +++ /dev/null @@ -1,6 +0,0 @@ -apiVersion: kuttl.dev/v1beta1 -kind: TestStep -apply: -- files/19--add-logs-exporter.yaml -assert: -- files/19-logs-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml new file mode 100644 index 0000000000..7b21e0ef50 --- /dev/null +++ b/testing/kuttl/e2e/otel-logging-and-metrics/19--add-logs-metrics-exporter.yaml @@ -0,0 +1,6 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +apply: +- files/19--add-logs-metrics-exporter.yaml +assert: +- files/19-logs-metrics-exporter-added.yaml diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml similarity index 90% rename from testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml index 8b86743cc0..2022397ce9 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-exported.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/20-assert-logs-metrics-exported.yaml @@ -44,3 +44,9 @@ commands: retry "gunicorn logs not found" exit 1 } + + metrics=$(kubectl logs "${pod}" --namespace "${NAMESPACE}" -c otel-collector | grep ccp) + { contains "${metrics}" 'ccp_stat'; } || { + retry "metrics not found" + exit 1 + } diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml similarity index 98% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml index 9943f61341..67926505c0 100644 --- a/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-exporter.yaml +++ b/testing/kuttl/e2e/otel-logging-and-metrics/files/19--add-logs-metrics-exporter.yaml @@ -35,6 +35,7 @@ spec: pgBouncer: {} instrumentation: metrics: + exporters: ['otlp'] customQueries: add: - name: slow-custom-queries @@ -121,6 +122,9 @@ data: logs/1: receivers: [otlp] exporters: [debug] + metrics/1: + receivers: [otlp] + exporters: [debug] --- apiVersion: v1 kind: Service diff --git a/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml b/testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml similarity index 100% rename from testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-exporter-added.yaml rename to testing/kuttl/e2e/otel-logging-and-metrics/files/19-logs-metrics-exporter-added.yaml