mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-02-26 15:47:21 +00:00
p2p: fix dial metrics not picking up the right error (#31621)
Our metrics related to dial errors were off. The original error was not wrapped, so the caller function had no chance of picking it up. Therefore the most common error, which is "TooManyPeers", was not correctly counted. The metrics were originally introduced in https://github.com/ethereum/go-ethereum/pull/27621 I was thinking of various possible solutions. - the one proposed here wraps both the new error and the origial error. It is not a pattern we use in other parts of the code, but works. This is maybe the smallest possible change. - as an alternate, I could write a proper `errProtoHandshakeError` with it's own wrapped error - finally, I'm not even sure we need `errProtoHandshakeError`, maybe we could just pass up the original error. --------- Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com> Co-authored-by: Felix Lange <fjl@twurst.com>
This commit is contained in:
parent
476f117211
commit
6928ec5d92
3 changed files with 34 additions and 23 deletions
|
|
@ -49,7 +49,7 @@ var (
|
|||
serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil)
|
||||
dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil)
|
||||
dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil)
|
||||
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil)
|
||||
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil) // dial timeout; no route to host; connection refused; network is unreachable
|
||||
|
||||
// count peers that stayed connected for at least 1 min
|
||||
serve1MinSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success/1min", nil)
|
||||
|
|
@ -61,34 +61,41 @@ var (
|
|||
dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil)
|
||||
dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil)
|
||||
dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil)
|
||||
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil)
|
||||
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil)
|
||||
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil) // EOF; connection reset during handshake; message too big; i/o timeout
|
||||
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil) // EOF
|
||||
|
||||
// capture the rest of errors that are not handled by the above meters
|
||||
dialOtherError = metrics.NewRegisteredMeter("p2p/dials/error/other", nil)
|
||||
)
|
||||
|
||||
// markDialError matches errors that occur while setting up a dial connection
|
||||
// to the corresponding meter.
|
||||
// markDialError matches errors that occur while setting up a dial connection to the
|
||||
// corresponding meter. We don't maintain meters for evert possible error, just for
|
||||
// the most interesting ones.
|
||||
func markDialError(err error) {
|
||||
if !metrics.Enabled() {
|
||||
return
|
||||
}
|
||||
if err2 := errors.Unwrap(err); err2 != nil {
|
||||
err = err2
|
||||
}
|
||||
switch err {
|
||||
case DiscTooManyPeers:
|
||||
|
||||
var reason DiscReason
|
||||
var handshakeErr *protoHandshakeError
|
||||
d := errors.As(err, &reason)
|
||||
switch {
|
||||
case d && reason == DiscTooManyPeers:
|
||||
dialTooManyPeers.Mark(1)
|
||||
case DiscAlreadyConnected:
|
||||
case d && reason == DiscAlreadyConnected:
|
||||
dialAlreadyConnected.Mark(1)
|
||||
case DiscSelf:
|
||||
case d && reason == DiscSelf:
|
||||
dialSelf.Mark(1)
|
||||
case DiscUselessPeer:
|
||||
case d && reason == DiscUselessPeer:
|
||||
dialUselessPeer.Mark(1)
|
||||
case DiscUnexpectedIdentity:
|
||||
case d && reason == DiscUnexpectedIdentity:
|
||||
dialUnexpectedIdentity.Mark(1)
|
||||
case errEncHandshakeError:
|
||||
dialEncHandshakeError.Mark(1)
|
||||
case errProtoHandshakeError:
|
||||
case errors.As(err, &handshakeErr):
|
||||
dialProtoHandshakeError.Mark(1)
|
||||
case errors.Is(err, errEncHandshakeError):
|
||||
dialEncHandshakeError.Mark(1)
|
||||
default:
|
||||
dialOtherError.Mark(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,11 +66,15 @@ const (
|
|||
)
|
||||
|
||||
var (
|
||||
errServerStopped = errors.New("server stopped")
|
||||
errEncHandshakeError = errors.New("rlpx enc error")
|
||||
errProtoHandshakeError = errors.New("rlpx proto error")
|
||||
errServerStopped = errors.New("server stopped")
|
||||
errEncHandshakeError = errors.New("rlpx enc error")
|
||||
)
|
||||
|
||||
type protoHandshakeError struct{ err error }
|
||||
|
||||
func (e *protoHandshakeError) Error() string { return fmt.Sprintf("rlpx proto error: %v", e.err) }
|
||||
func (e *protoHandshakeError) Unwrap() error { return e.err }
|
||||
|
||||
// Server manages all peer connections.
|
||||
type Server struct {
|
||||
// Config fields may not be modified while the server is running.
|
||||
|
|
@ -907,7 +911,7 @@ func (srv *Server) setupConn(c *conn, dialDest *enode.Node) error {
|
|||
phs, err := c.doProtoHandshake(srv.ourHandshake)
|
||||
if err != nil {
|
||||
clog.Trace("Failed p2p handshake", "err", err)
|
||||
return fmt.Errorf("%w: %v", errProtoHandshakeError, err)
|
||||
return &protoHandshakeError{err: err}
|
||||
}
|
||||
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
|
||||
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))
|
||||
|
|
|
|||
|
|
@ -410,11 +410,11 @@ func TestServerSetupConn(t *testing.T) {
|
|||
wantCloseErr: DiscUnexpectedIdentity,
|
||||
},
|
||||
{
|
||||
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: errProtoHandshakeError},
|
||||
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: DiscTooManyPeers},
|
||||
dialDest: enode.NewV4(clientpub, nil, 0, 0),
|
||||
flags: dynDialedConn,
|
||||
wantCalls: "doEncHandshake,doProtoHandshake,close,",
|
||||
wantCloseErr: errProtoHandshakeError,
|
||||
wantCloseErr: DiscTooManyPeers,
|
||||
},
|
||||
{
|
||||
tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},
|
||||
|
|
|
|||
Loading…
Reference in a new issue