diff --git a/cli_flags.go b/cli_flags.go index 3ca3db6f4..d6ea79d10 100644 --- a/cli_flags.go +++ b/cli_flags.go @@ -28,6 +28,7 @@ const ( defaultArgSendErrorFrames = false defaultOffCPUThreshold = 0 defaultEnvVarsValue = "" + defaultBPFFSRoot = "/sys/fs/bpf/" // This is the X in 2^(n + x) where n is the default hardcoded map size value defaultArgMapScaleFactor = 0 @@ -80,6 +81,9 @@ var ( "Expected format: probe_type:target[:symbol]. probe_type can be kprobe, kretprobe, uprobe, or uretprobe." loadProbeHelper = "Load generic eBPF program that can be attached externally to " + "various user or kernel space hooks." + bpffsHelp = fmt.Sprintf("Set the root BPF FS path for pinned maps. Only used for OBI span/trace ID communication. Default is %s", + defaultBPFFSRoot) + obiProcessCtxHelp = "Load or create a pinned eBPF map for sharing process context information with OBI." ) // Package-scope variable, so that conditionally compiled other components can refer @@ -141,11 +145,15 @@ func parseArgs() (*controller.Config, error) { fs.StringVar(&args.IncludeEnvVars, "env-vars", defaultEnvVarsValue, envVarsHelp) + fs.StringVar(&args.BPFFSRoot, "bpffs-root", defaultBPFFSRoot, bpffsHelp) + fs.Func("probe-link", probeLinkHelper, func(link string) error { args.ProbeLinks = append(args.ProbeLinks, link) return nil }) + fs.BoolVar(&args.OBIProcessCtx, "obi-process-ctx", false, obiProcessCtxHelp) + fs.BoolVar(&args.LoadProbe, "load-probe", false, loadProbeHelper) fs.Usage = func() { diff --git a/collector/config/config.go b/collector/config/config.go index 1f4ebe96d..03e2eec54 100644 --- a/collector/config/config.go +++ b/collector/config/config.go @@ -61,7 +61,9 @@ type Config struct { NoKernelVersionCheck bool `mapstructure:"no_kernel_version_check"` MaxGRPCRetries uint32 `mapstructure:"max_grpc_retries"` MaxRPCMsgSize int `mapstructure:"max_rpc_msg_size"` + BPFFSRoot string `mapstructure:"bpf_fs_root"` ErrorMode ErrorMode `mapstructure:"error_mode"` + OBIProcessCtx bool `mapstructure:"obi_process_ctx"` } // Validate validates the config. diff --git a/collector/factory.go b/collector/factory.go index 1988b48ee..2a9a0ab53 100644 --- a/collector/factory.go +++ b/collector/factory.go @@ -39,6 +39,7 @@ func defaultConfig() component.Config { ClockSyncInterval: 3 * time.Minute, MaxGRPCRetries: 5, MaxRPCMsgSize: 32 << 20, // 32 MiB, + BPFFSRoot: "/sys/fs/bpf/", ErrorMode: config.PropagateError, } } diff --git a/internal/controller/controller.go b/internal/controller/controller.go index cb3e378b0..9bdd602a8 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -109,6 +109,8 @@ func (c *Controller) Start(ctx context.Context) error { ProbeLinks: c.config.ProbeLinks, LoadProbe: c.config.LoadProbe, ExecutableReporter: c.config.ExecutableReporter, + BPFFSRoot: c.config.BPFFSRoot, + OBIProcessCtx: c.config.OBIProcessCtx, }) if err != nil { return fmt.Errorf("failed to load eBPF tracer: %w", err) diff --git a/processmanager/manager.go b/processmanager/manager.go index 0148d99b6..f280a4fbd 100644 --- a/processmanager/manager.go +++ b/processmanager/manager.go @@ -319,6 +319,8 @@ func (pm *ProcessManager) HandleTrace(bpfTrace *libpf.EbpfTrace) { Origin: bpfTrace.Origin, OffTime: bpfTrace.OffTime, EnvVars: bpfTrace.EnvVars, + TraceID: bpfTrace.APMTraceID, + SpanID: bpfTrace.APMTransactionID, } pid := bpfTrace.PID diff --git a/reporter/base_reporter.go b/reporter/base_reporter.go index 84266918c..c33f8eff3 100644 --- a/reporter/base_reporter.go +++ b/reporter/base_reporter.go @@ -88,6 +88,8 @@ func (b *baseReporter) ReportTraceEvent(trace *libpf.Trace, meta *samples.TraceE Comm: meta.Comm, TID: int64(meta.TID), CPU: int64(meta.CPU), + SpanID: meta.SpanID, + TraceID: meta.TraceID, ExtraMeta: extraMeta, } if events, exists := rtp.Events[meta.Origin][sampleKey]; exists { diff --git a/support/ebpf/interpreter_dispatcher.ebpf.c b/support/ebpf/interpreter_dispatcher.ebpf.c index aa2d54c24..ae9a1c9a7 100644 --- a/support/ebpf/interpreter_dispatcher.ebpf.c +++ b/support/ebpf/interpreter_dispatcher.ebpf.c @@ -118,6 +118,15 @@ struct trace_events_t { // End shared maps +// Implements the specification to share span/trace IDs according to: +// https://github.com/open-telemetry/opentelemetry-ebpf-instrumentation/blob/main/devdocs/trace-profile-correlation.md +struct traces_ctx_v1_t { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, u64); + __type(value, SpanTraceInfo); + __uint(max_entries, 1 << 14); +} traces_ctx_v1 SEC(".maps"); + struct apm_int_procs_t { __uint(type, BPF_MAP_TYPE_HASH); __type(key, pid_t); @@ -188,6 +197,26 @@ static EBPF_INLINE void maybe_add_go_custom_labels(struct pt_regs *ctx, PerCPURe tail_call(ctx, PROG_GO_LABELS); } +// Implements the specification to share span/trace IDs according to: +// https://github.com/open-telemetry/opentelemetry-ebpf-instrumentation/blob/main/devdocs/trace-profile-correlation.md +static EBPF_INLINE void maybe_add_otel_span_trace_id(Trace *trace) +{ + u64 id = bpf_get_current_pid_tgid(); + + SpanTraceInfo *info = bpf_map_lookup_elem(&traces_ctx_v1, &id); + if (!info) { + return; + } + + // The structure of apm_[transaction|trace]_id happens to be the same + // as proposed in + // https://github.com/open-telemetry/opentelemetry-ebpf-instrumentation/blob/main/devdocs/trace-profile-correlation.md + + trace->apm_trace_id.as_int.hi = info->trace_id.as_int.hi; + trace->apm_trace_id.as_int.lo = info->trace_id.as_int.lo; + trace->apm_transaction_id.as_int = info->span_id.as_int; +} + static EBPF_INLINE void maybe_add_apm_info(Trace *trace) { u32 pid = trace->pid; // verifier needs this to be on stack on 4.15 kernel @@ -247,6 +276,12 @@ static EBPF_INLINE int unwind_stop(struct pt_regs *ctx) UnwindState *state = &record->state; maybe_add_apm_info(trace); + if ( + trace->apm_trace_id.as_int.hi == 0 && trace->apm_trace_id.as_int.lo == 0 && + trace->apm_transaction_id.as_int == 0) { + // Populate OTel span/trace ID only if span/trace ID is not yet set. + maybe_add_otel_span_trace_id(trace); + } // If the stack is otherwise empty, push an error for that: we should // never encounter empty stacks for successful unwinding. diff --git a/support/ebpf/tracer.ebpf.amd64 b/support/ebpf/tracer.ebpf.amd64 index 9a44839d7..8425df149 100644 Binary files a/support/ebpf/tracer.ebpf.amd64 and b/support/ebpf/tracer.ebpf.amd64 differ diff --git a/support/ebpf/tracer.ebpf.arm64 b/support/ebpf/tracer.ebpf.arm64 index 4d55359b6..d1a47909a 100644 Binary files a/support/ebpf/tracer.ebpf.arm64 and b/support/ebpf/tracer.ebpf.arm64 differ diff --git a/support/ebpf/types.h b/support/ebpf/types.h index 048e3c02c..5c7cd815f 100644 --- a/support/ebpf/types.h +++ b/support/ebpf/types.h @@ -565,6 +565,11 @@ typedef union ApmSpanID { _Static_assert(sizeof(ApmSpanID) == 8, "unexpected trace ID size"); +typedef struct __attribute__((packed)) SpanTraceInfo { + ApmTraceID trace_id; + ApmSpanID span_id; +} SpanTraceInfo; + // Defines the format of the APM correlation TLS buffer. // // Specification: diff --git a/tracer/tracer.go b/tracer/tracer.go index f64d9ff39..950232114 100644 --- a/tracer/tracer.go +++ b/tracer/tracer.go @@ -12,6 +12,8 @@ import ( "fmt" "math" "math/rand/v2" + "os" + "path" "slices" "strings" "sync" @@ -63,6 +65,12 @@ const ( schedProcessFreeV2 = "tracepoint__sched_process_free" ) +// Shared map name according to +// https://github.com/open-telemetry/opentelemetry-ebpf-instrumentation/blob/main/devdocs/trace-profile-correlation.md +const ( + obiSpanTracesMap = "traces_ctx_v1" +) + // Intervals is a subset of config.IntervalsAndTimers. type Intervals interface { MonitorInterval() time.Duration @@ -177,6 +185,10 @@ type Config struct { // LoadProbe indicates whether the generic eBPF program should be loaded // without being attached to something. LoadProbe bool + // BPFFSRoot is the root path to BPF filesystem for pinned maps and programs. + BPFFSRoot string + // OBIProcessCtx enable the use of a known shared eBPF map with OBI. + OBIProcessCtx bool } // hookPoint specifies the group and name of the hooked point in the kernel. @@ -620,6 +632,24 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, // Off CPU Profiling is disabled. So do not load this map. continue } + if mapName == obiSpanTracesMap { + if cfg.BPFFSRoot == "" || !cfg.OBIProcessCtx { + // As BPF FS is not set or process context sharing with OBI is not + // enabled, the map can not be shared with other OTel components. + // To reduce the memory footprint in this case reduce the size of the map. + mapSpec.MaxEntries = 1 + } else { + // Try to load it from a known path: + mPath := path.Join(cfg.BPFFSRoot, "otel", mapName) + ebpfMap, err := cebpf.LoadPinnedMap(mPath, &cebpf.LoadPinOptions{}) + if err == nil { + log.Infof("Using shared map for OBI span/trace ID communication") + ebpfMaps[mapName] = ebpfMap + continue + } + // The shared map does not yet exist or BPF FS is not set - so continue as usual + } + } if !types.IsMapEnabled(mapName, cfg.IncludeTracers) { log.Debugf("Skipping eBPF map %s: tracer not enabled", mapName) @@ -634,6 +664,32 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config, return fmt.Errorf("failed to load %s: %v", mapName, err) } ebpfMaps[mapName] = ebpfMap + + if mapName == obiSpanTracesMap { + if cfg.BPFFSRoot == "" || !cfg.OBIProcessCtx { + // In environments, where BPF FS is not available, + // we just load the map to not break eBPF programs. + log.Infof("Skip pinning eBPF map to share OTel span/trace IDs") + continue + } + + // Pin the loaded map to a known path, so that other + // OTel components can also use it. + otelBPFFS := path.Join(cfg.BPFFSRoot, "otel") + if err := os.MkdirAll(otelBPFFS, 0o1700); err != nil { + // This is a non-fatal error for the functionality + // of the profiler. So just log it. + log.Warnf("Failed to create '%s'. OTel span/trace IDs can not be shared: %v", + otelBPFFS, err) + continue + } + if err := ebpfMap.Pin(path.Join(otelBPFFS, mapName)); err != nil { + // This is a non-fatal error for the functionality + // of the profiler. So just log it. + log.Warnf("Failed to pin '%s'. OTel span/trace IDs can not be shared: %v", + mapName, err) + } + } } return nil