rpc,internal/telemetry: fix deferred spanEnd to capture errors via pointer (#33772)

The endSpan closure accepted error by value, meaning deferred calls like
defer spanEnd(err) captured the error at defer-time (always nil), not at
function-return time. This meant errors were never recorded on spans.

- Changed endSpan to accept *error
- Updated all call sites in rpc/handler.go to pass error pointers, and
adjusted handleCall to avoid propagating child-span errors to the parent
- Added TestTracingHTTPErrorRecording to verify that errors from RPC
methods are properly recorded on the rpc.runMethod span
This commit is contained in:
Jonny Rhea 2026-02-13 18:00:14 -06:00 committed by GitHub
parent ac85a6f254
commit d8b92cb9e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 62 additions and 18 deletions

View file

@ -46,12 +46,12 @@ func BoolAttribute(key string, val bool) Attribute {
}
// StartSpan creates a SpanKind=INTERNAL span.
func StartSpan(ctx context.Context, spanName string, attributes ...Attribute) (context.Context, trace.Span, func(error)) {
func StartSpan(ctx context.Context, spanName string, attributes ...Attribute) (context.Context, trace.Span, func(*error)) {
return StartSpanWithTracer(ctx, otel.Tracer(""), spanName, attributes...)
}
// StartSpanWithTracer requires a tracer to be passed in and creates a SpanKind=INTERNAL span.
func StartSpanWithTracer(ctx context.Context, tracer trace.Tracer, name string, attributes ...Attribute) (context.Context, trace.Span, func(error)) {
func StartSpanWithTracer(ctx context.Context, tracer trace.Tracer, name string, attributes ...Attribute) (context.Context, trace.Span, func(*error)) {
return startSpan(ctx, tracer, trace.SpanKindInternal, name, attributes...)
}
@ -67,7 +67,7 @@ type RPCInfo struct {
// The span name is formatted as $rpcSystem.$rpcService/$rpcMethod
// (e.g. "jsonrpc.engine/newPayloadV4") which follows the Open Telemetry
// semantic convensions: https://opentelemetry.io/docs/specs/semconv/rpc/rpc-spans/#span-name.
func StartServerSpan(ctx context.Context, tracer trace.Tracer, rpc RPCInfo, others ...Attribute) (context.Context, func(error)) {
func StartServerSpan(ctx context.Context, tracer trace.Tracer, rpc RPCInfo, others ...Attribute) (context.Context, func(*error)) {
var (
name = fmt.Sprintf("%s.%s/%s", rpc.System, rpc.Service, rpc.Method)
attributes = append([]Attribute{
@ -84,7 +84,7 @@ func StartServerSpan(ctx context.Context, tracer trace.Tracer, rpc RPCInfo, othe
}
// startSpan creates a span with the given kind.
func startSpan(ctx context.Context, tracer trace.Tracer, kind trace.SpanKind, spanName string, attributes ...Attribute) (context.Context, trace.Span, func(error)) {
func startSpan(ctx context.Context, tracer trace.Tracer, kind trace.SpanKind, spanName string, attributes ...Attribute) (context.Context, trace.Span, func(*error)) {
ctx, span := tracer.Start(ctx, spanName, trace.WithSpanKind(kind))
if len(attributes) > 0 {
span.SetAttributes(attributes...)
@ -93,11 +93,11 @@ func startSpan(ctx context.Context, tracer trace.Tracer, kind trace.SpanKind, sp
}
// endSpan ends the span and handles error recording.
func endSpan(span trace.Span) func(error) {
return func(err error) {
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
func endSpan(span trace.Span) func(*error) {
return func(err *error) {
if err != nil && *err != nil {
span.RecordError(*err)
span.SetStatus(codes.Error, (*err).Error())
}
span.End()
}

View file

@ -524,7 +524,6 @@ func (h *handler) handleCall(cp *callProc, msg *jsonrpcMessage) *jsonrpcMessage
}
// Start root span for the request.
var err error
rpcInfo := telemetry.RPCInfo{
System: "jsonrpc",
Service: service,
@ -535,24 +534,25 @@ func (h *handler) handleCall(cp *callProc, msg *jsonrpcMessage) *jsonrpcMessage
telemetry.BoolAttribute("rpc.batch", cp.isBatch),
}
ctx, spanEnd := telemetry.StartServerSpan(cp.ctx, h.tracer(), rpcInfo, attrib...)
defer spanEnd(err)
defer spanEnd(nil) // don't propagate errors to parent spans
// Start tracing span before parsing arguments.
_, _, pSpanEnd := telemetry.StartSpanWithTracer(ctx, h.tracer(), "rpc.parsePositionalArguments")
args, err := parsePositionalArguments(msg.Params, callb.argTypes)
pSpanEnd(err)
if err != nil {
return msg.errorResponse(&invalidParamsError{err.Error()})
args, pErr := parsePositionalArguments(msg.Params, callb.argTypes)
pSpanEnd(&pErr)
if pErr != nil {
return msg.errorResponse(&invalidParamsError{pErr.Error()})
}
start := time.Now()
// Start tracing span before running the method.
rctx, _, rSpanEnd := telemetry.StartSpanWithTracer(ctx, h.tracer(), "rpc.runMethod")
answer := h.runMethod(rctx, msg, callb, args)
var rErr error
if answer.Error != nil {
err = errors.New(answer.Error.Message)
rErr = errors.New(answer.Error.Message)
}
rSpanEnd(err)
rSpanEnd(&rErr)
// Collect the statistics for RPC calls if metrics is enabled.
rpcRequestGauge.Inc(1)
@ -625,7 +625,7 @@ func (h *handler) runMethod(ctx context.Context, msg *jsonrpcMessage, callb *cal
if response.Error != nil {
err = errors.New(response.Error.Message)
}
spanEnd(err)
spanEnd(&err)
return response
}

View file

@ -23,6 +23,7 @@ import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/propagation"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
"go.opentelemetry.io/otel/sdk/trace/tracetest"
@ -141,6 +142,49 @@ func TestTracingHTTP(t *testing.T) {
}
}
// TestTracingErrorRecording verifies that errors are recorded on spans.
func TestTracingHTTPErrorRecording(t *testing.T) {
t.Parallel()
server, tracer, exporter := newTracingServer(t)
httpsrv := httptest.NewServer(server)
t.Cleanup(httpsrv.Close)
client, err := DialHTTP(httpsrv.URL)
if err != nil {
t.Fatalf("failed to dial: %v", err)
}
t.Cleanup(client.Close)
// Call a method that returns an error.
var result any
err = client.Call(&result, "test_returnError")
if err == nil {
t.Fatal("expected error from test_returnError")
}
// Flush and verify spans recorded the error.
if err := tracer.ForceFlush(context.Background()); err != nil {
t.Fatalf("failed to flush: %v", err)
}
spans := exporter.GetSpans()
// Only the runMethod span should have error status.
if len(spans) == 0 {
t.Fatal("no spans were emitted")
}
for _, span := range spans {
switch span.Name {
case "rpc.runMethod":
if span.Status.Code != codes.Error {
t.Errorf("expected %s span status Error, got %v", span.Name, span.Status.Code)
}
default:
if span.Status.Code == codes.Error {
t.Errorf("unexpected error status on span %s", span.Name)
}
}
}
}
// TestTracingBatchHTTP verifies that RPC spans are emitted for batched JSON-RPC calls over HTTP.
func TestTracingBatchHTTP(t *testing.T) {
t.Parallel()