mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-05-13 19:46:39 +00:00
p2p: fix dial metrics not picking up the right error (#31621)
Our metrics related to dial errors were off. The original error was not wrapped, so the caller function had no chance of picking it up. Therefore the most common error, which is "TooManyPeers", was not correctly counted. The metrics were originally introduced in https://github.com/ethereum/go-ethereum/pull/27621 I was thinking of various possible solutions. - the one proposed here wraps both the new error and the origial error. It is not a pattern we use in other parts of the code, but works. This is maybe the smallest possible change. - as an alternate, I could write a proper `errProtoHandshakeError` with it's own wrapped error - finally, I'm not even sure we need `errProtoHandshakeError`, maybe we could just pass up the original error. --------- Signed-off-by: Csaba Kiraly <csaba.kiraly@gmail.com> Co-authored-by: Felix Lange <fjl@twurst.com>
This commit is contained in:
parent
476f117211
commit
6928ec5d92
3 changed files with 34 additions and 23 deletions
|
|
@ -49,7 +49,7 @@ var (
|
||||||
serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil)
|
serveSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success", nil)
|
||||||
dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil)
|
dialMeter = metrics.NewRegisteredMeter("p2p/dials", nil)
|
||||||
dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil)
|
dialSuccessMeter = metrics.NewRegisteredMeter("p2p/dials/success", nil)
|
||||||
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil)
|
dialConnectionError = metrics.NewRegisteredMeter("p2p/dials/error/connection", nil) // dial timeout; no route to host; connection refused; network is unreachable
|
||||||
|
|
||||||
// count peers that stayed connected for at least 1 min
|
// count peers that stayed connected for at least 1 min
|
||||||
serve1MinSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success/1min", nil)
|
serve1MinSuccessMeter = metrics.NewRegisteredMeter("p2p/serves/success/1min", nil)
|
||||||
|
|
@ -61,34 +61,41 @@ var (
|
||||||
dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil)
|
dialSelf = metrics.NewRegisteredMeter("p2p/dials/error/self", nil)
|
||||||
dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil)
|
dialUselessPeer = metrics.NewRegisteredMeter("p2p/dials/error/useless", nil)
|
||||||
dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil)
|
dialUnexpectedIdentity = metrics.NewRegisteredMeter("p2p/dials/error/id/unexpected", nil)
|
||||||
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil)
|
dialEncHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/enc", nil) // EOF; connection reset during handshake; message too big; i/o timeout
|
||||||
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil)
|
dialProtoHandshakeError = metrics.NewRegisteredMeter("p2p/dials/error/rlpx/proto", nil) // EOF
|
||||||
|
|
||||||
|
// capture the rest of errors that are not handled by the above meters
|
||||||
|
dialOtherError = metrics.NewRegisteredMeter("p2p/dials/error/other", nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
// markDialError matches errors that occur while setting up a dial connection
|
// markDialError matches errors that occur while setting up a dial connection to the
|
||||||
// to the corresponding meter.
|
// corresponding meter. We don't maintain meters for evert possible error, just for
|
||||||
|
// the most interesting ones.
|
||||||
func markDialError(err error) {
|
func markDialError(err error) {
|
||||||
if !metrics.Enabled() {
|
if !metrics.Enabled() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err2 := errors.Unwrap(err); err2 != nil {
|
|
||||||
err = err2
|
var reason DiscReason
|
||||||
}
|
var handshakeErr *protoHandshakeError
|
||||||
switch err {
|
d := errors.As(err, &reason)
|
||||||
case DiscTooManyPeers:
|
switch {
|
||||||
|
case d && reason == DiscTooManyPeers:
|
||||||
dialTooManyPeers.Mark(1)
|
dialTooManyPeers.Mark(1)
|
||||||
case DiscAlreadyConnected:
|
case d && reason == DiscAlreadyConnected:
|
||||||
dialAlreadyConnected.Mark(1)
|
dialAlreadyConnected.Mark(1)
|
||||||
case DiscSelf:
|
case d && reason == DiscSelf:
|
||||||
dialSelf.Mark(1)
|
dialSelf.Mark(1)
|
||||||
case DiscUselessPeer:
|
case d && reason == DiscUselessPeer:
|
||||||
dialUselessPeer.Mark(1)
|
dialUselessPeer.Mark(1)
|
||||||
case DiscUnexpectedIdentity:
|
case d && reason == DiscUnexpectedIdentity:
|
||||||
dialUnexpectedIdentity.Mark(1)
|
dialUnexpectedIdentity.Mark(1)
|
||||||
case errEncHandshakeError:
|
case errors.As(err, &handshakeErr):
|
||||||
dialEncHandshakeError.Mark(1)
|
|
||||||
case errProtoHandshakeError:
|
|
||||||
dialProtoHandshakeError.Mark(1)
|
dialProtoHandshakeError.Mark(1)
|
||||||
|
case errors.Is(err, errEncHandshakeError):
|
||||||
|
dialEncHandshakeError.Mark(1)
|
||||||
|
default:
|
||||||
|
dialOtherError.Mark(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,11 +66,15 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errServerStopped = errors.New("server stopped")
|
errServerStopped = errors.New("server stopped")
|
||||||
errEncHandshakeError = errors.New("rlpx enc error")
|
errEncHandshakeError = errors.New("rlpx enc error")
|
||||||
errProtoHandshakeError = errors.New("rlpx proto error")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type protoHandshakeError struct{ err error }
|
||||||
|
|
||||||
|
func (e *protoHandshakeError) Error() string { return fmt.Sprintf("rlpx proto error: %v", e.err) }
|
||||||
|
func (e *protoHandshakeError) Unwrap() error { return e.err }
|
||||||
|
|
||||||
// Server manages all peer connections.
|
// Server manages all peer connections.
|
||||||
type Server struct {
|
type Server struct {
|
||||||
// Config fields may not be modified while the server is running.
|
// Config fields may not be modified while the server is running.
|
||||||
|
|
@ -907,7 +911,7 @@ func (srv *Server) setupConn(c *conn, dialDest *enode.Node) error {
|
||||||
phs, err := c.doProtoHandshake(srv.ourHandshake)
|
phs, err := c.doProtoHandshake(srv.ourHandshake)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
clog.Trace("Failed p2p handshake", "err", err)
|
clog.Trace("Failed p2p handshake", "err", err)
|
||||||
return fmt.Errorf("%w: %v", errProtoHandshakeError, err)
|
return &protoHandshakeError{err: err}
|
||||||
}
|
}
|
||||||
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
|
if id := c.node.ID(); !bytes.Equal(crypto.Keccak256(phs.ID), id[:]) {
|
||||||
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))
|
clog.Trace("Wrong devp2p handshake identity", "phsid", hex.EncodeToString(phs.ID))
|
||||||
|
|
|
||||||
|
|
@ -410,11 +410,11 @@ func TestServerSetupConn(t *testing.T) {
|
||||||
wantCloseErr: DiscUnexpectedIdentity,
|
wantCloseErr: DiscUnexpectedIdentity,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: errProtoHandshakeError},
|
tt: &setupTransport{pubkey: clientpub, protoHandshakeErr: DiscTooManyPeers},
|
||||||
dialDest: enode.NewV4(clientpub, nil, 0, 0),
|
dialDest: enode.NewV4(clientpub, nil, 0, 0),
|
||||||
flags: dynDialedConn,
|
flags: dynDialedConn,
|
||||||
wantCalls: "doEncHandshake,doProtoHandshake,close,",
|
wantCalls: "doEncHandshake,doProtoHandshake,close,",
|
||||||
wantCloseErr: errProtoHandshakeError,
|
wantCloseErr: DiscTooManyPeers,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},
|
tt: &setupTransport{pubkey: srvpub, phs: protoHandshake{ID: crypto.FromECDSAPub(srvpub)[1:]}},
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue