Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,5 @@ storybook-static/
test-results.xml

docsite/

.kilo-format-temp-*
9 changes: 6 additions & 3 deletions pkg/remote/conncontroller/conncontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,8 @@ func (conn *SSHConn) Connect(ctx context.Context, connFlags *wconfig.ConnKeyword
conn.FireConnChangeEvent()
err := conn.connectInternal(ctx, connFlags)
if err != nil {
errorCode := remote.ClassifyConnError(err)
errorCode, subCode := remote.ClassifyConnError(err)
isContextError := errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)
conn.Infof(ctx, "ERROR [%s] %v\n\n", errorCode, err)
conn.WithLock(func() {
conn.Status = Status_Error
Expand All @@ -762,8 +763,10 @@ func (conn *SSHConn) Connect(ctx context.Context, connFlags *wconfig.ConnKeyword
telemetry.GoRecordTEventWrap(&telemetrydata.TEvent{
Event: "conn:connecterror",
Props: telemetrydata.TEventProps{
ConnType: "ssh",
ConnErrorCode: errorCode,
ConnType: "ssh",
ConnErrorCode: errorCode,
ConnSubErrorCode: subCode,
ConnContextError: isContextError,
},
})
} else {
Expand Down
126 changes: 113 additions & 13 deletions pkg/remote/sshclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,27 @@ const (
ConnErrCode_Unknown = "unknown"
)

// Dial error subcodes for more granular classification
const (
DialSubCode_DNS = "dns"
DialSubCode_Refused = "refused"
DialSubCode_Timeout = "timeout"
DialSubCode_ContextCanceled = "context-canceled"
DialSubCode_NoRoute = "no-route"
DialSubCode_HostUnreach = "host-unreachable"
DialSubCode_NetUnreach = "net-unreachable"
DialSubCode_ConnReset = "conn-reset"
DialSubCode_PermDenied = "perm-denied"
DialSubCode_ProxyJump = "proxy-jump"
DialSubCode_Other = "other"
)

// Auth error subcodes for more granular classification
const (
AuthSubCode_UnableToAuth = "unable-to-auth"
AuthSubCode_HandshakeFailed = "handshake-failed"
)

var waveSshConfigUserSettingsInternal *ssh_config.UserSettings
var configUserSettingsOnce = &sync.Once{}

Expand Down Expand Up @@ -118,33 +139,110 @@ func SimpleMessageFromPossibleConnectionError(err error) string {
return err.Error()
}

func ClassifyConnError(err error) string {
func ClassifyConnError(err error) (string, string) {
code := utilds.GetErrorCode(err)
subCode := utilds.GetErrorSubCode(err)
if code != "" {
return code
return code, subCode
}
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) {
return ConnErrCode_Dial
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
}
var opErr *net.OpError
if errors.As(err, &opErr) {
return ConnErrCode_Dial
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
}
errStr := err.Error()
if strings.Contains(errStr, "unable to authenticate") {
return ConnErrCode_AuthFailed
return ConnErrCode_AuthFailed, AuthSubCode_UnableToAuth
}
if strings.Contains(errStr, "handshake failed") {
return ConnErrCode_AuthFailed
return ConnErrCode_AuthFailed, AuthSubCode_HandshakeFailed
}
if strings.Contains(errStr, "connection refused") {
return ConnErrCode_Dial
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
}
if strings.Contains(errStr, "timed out") || strings.Contains(errStr, "timeout") {
return ConnErrCode_Dial
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
}
return ConnErrCode_Unknown, ""
}

// ClassifyDialErrorSubCode provides more granular classification of dial errors
// to help identify root causes (DNS, VPN, timeouts, etc.)
func ClassifyDialErrorSubCode(err error) string {
if err == nil {
return ""
}

// Check for context cancellation first
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return DialSubCode_ContextCanceled
}

// Check if it's a DNS error
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) {
return DialSubCode_DNS
}

// Check if it's a network operation error
var opErr *net.OpError
if errors.As(err, &opErr) {
// Check the underlying error for more details
if opErr.Err != nil {
errStr := opErr.Err.Error()
if strings.Contains(errStr, "connection refused") {
return DialSubCode_Refused
}
if strings.Contains(errStr, "no route to host") {
return DialSubCode_NoRoute
}
if strings.Contains(errStr, "host is unreachable") || strings.Contains(errStr, "host unreachable") {
return DialSubCode_HostUnreach
}
if strings.Contains(errStr, "network is unreachable") || strings.Contains(errStr, "network unreachable") {
return DialSubCode_NetUnreach
}
if strings.Contains(errStr, "connection reset") {
return DialSubCode_ConnReset
}
if strings.Contains(errStr, "permission denied") {
return DialSubCode_PermDenied
}
}
// Generic timeout detection in OpError
if opErr.Timeout() {
return DialSubCode_Timeout
}
}

// Check error string for common patterns
errStr := err.Error()
if strings.Contains(errStr, "connection refused") {
return DialSubCode_Refused
}
if strings.Contains(errStr, "timed out") || strings.Contains(errStr, "timeout") || strings.Contains(errStr, "i/o timeout") {
return DialSubCode_Timeout
}
if strings.Contains(errStr, "no route to host") {
return DialSubCode_NoRoute
}
return ConnErrCode_Unknown
if strings.Contains(errStr, "host is unreachable") || strings.Contains(errStr, "host unreachable") {
return DialSubCode_HostUnreach
}
if strings.Contains(errStr, "network is unreachable") || strings.Contains(errStr, "network unreachable") {
return DialSubCode_NetUnreach
}
if strings.Contains(errStr, "connection reset") {
return DialSubCode_ConnReset
}
if strings.Contains(errStr, "permission denied") {
return DialSubCode_PermDenied
}

return DialSubCode_Other
}

// This exists to trick the ssh library into continuing to try
Expand Down Expand Up @@ -747,15 +845,17 @@ func connectInternal(ctx context.Context, networkAddr string, clientConfig *ssh.
blocklogger.Infof(ctx, "[conndebug] ssh dial %s\n", networkAddr)
clientConn, err = d.DialContext(ctx, "tcp", networkAddr)
if err != nil {
blocklogger.Infof(ctx, "[conndebug] ERROR dial error: %v\n", err)
return nil, utilds.MakeCodedError(ConnErrCode_Dial, err)
subCode := ClassifyDialErrorSubCode(err)
blocklogger.Infof(ctx, "[conndebug] ERROR dial error [%s]: %v\n", subCode, err)
return nil, utilds.MakeSubCodedError(ConnErrCode_Dial, subCode, err)
}
} else {
blocklogger.Infof(ctx, "[conndebug] ssh dial (from client) %s\n", networkAddr)
clientConn, err = currentClient.DialContext(ctx, "tcp", networkAddr)
if err != nil {
blocklogger.Infof(ctx, "[conndebug] ERROR dial error: %v\n", err)
return nil, utilds.MakeCodedError(ConnErrCode_Dial, err)
subCode := DialSubCode_ProxyJump // This is a proxy jump connection error
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Hardcoded subcode loses error classification detail

This hardcodes DialSubCode_ProxyJump for all proxy jump errors, but doesn't classify the actual error type. A proxy jump connection can fail for many reasons (DNS, timeout, refused, etc.).

Consider calling ClassifyDialErrorSubCode(err) to get the actual error type, or use a composite approach that preserves both the proxy-jump context and the underlying error classification.

blocklogger.Infof(ctx, "[conndebug] ERROR dial error [%s]: %v\n", subCode, err)
return nil, utilds.MakeSubCodedError(ConnErrCode_Dial, subCode, err)
}
}
c, chans, reqs, err := ssh.NewClientConn(clientConn, networkAddr, clientConfig)
Expand Down
8 changes: 5 additions & 3 deletions pkg/telemetry/telemetrydata/telemetrydata.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ type TEventProps struct {
WshCmd string `json:"wsh:cmd,omitempty"`
WshHadError bool `json:"wsh:haderror,omitempty"`

ConnType string `json:"conn:conntype,omitempty"`
ConnWshErrorCode string `json:"conn:wsherrorcode,omitempty"`
ConnErrorCode string `json:"conn:errorcode,omitempty"`
ConnType string `json:"conn:conntype,omitempty"`
ConnWshErrorCode string `json:"conn:wsherrorcode,omitempty"`
ConnErrorCode string `json:"conn:errorcode,omitempty"`
ConnSubErrorCode string `json:"conn:suberrorcode,omitempty"`
ConnContextError bool `json:"conn:contexterror,omitempty"`

OnboardingFeature string `json:"onboarding:feature,omitempty" tstype:"\"waveai\" | \"durable\" | \"magnify\" | \"wsh\""`
OnboardingVersion string `json:"onboarding:version,omitempty"`
Expand Down
26 changes: 23 additions & 3 deletions pkg/utilds/codederror.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ import (

// CodedError wraps an error with a string code for categorization.
// The code can be extracted from anywhere in an error chain using GetErrorCode.
// SubCode provides additional granularity for error classification.
type CodedError struct {
Code string
Err error
Code string
SubCode string
Err error
}

func (e CodedError) Error() string {
Expand All @@ -25,7 +27,12 @@ func (e CodedError) Unwrap() error {

// MakeCodedError creates a new CodedError with the given code and error.
func MakeCodedError(code string, err error) CodedError {
return CodedError{Code: code, Err: err}
return CodedError{Code: code, SubCode: "", Err: err}
}

// MakeSubCodedError creates a new CodedError with the given code, subcode, and error.
func MakeSubCodedError(code string, subCode string, err error) CodedError {
return CodedError{Code: code, SubCode: subCode, Err: err}
}

// GetErrorCode extracts the error code from anywhere in the error chain.
Expand All @@ -41,6 +48,19 @@ func GetErrorCode(err error) string {
return ""
}

// GetErrorSubCode extracts the error subcode from anywhere in the error chain.
// Returns empty string if no CodedError is found or if SubCode is not set.
func GetErrorSubCode(err error) string {
if err == nil {
return ""
}
var coded CodedError
if errors.As(err, &coded) {
return coded.SubCode
}
return ""
}

// Errorf creates a formatted error wrapped in a CodedError.
// This is a convenience function that combines fmt.Errorf with MakeCodedError.
func Errorf(code string, format string, args ...interface{}) error {
Expand Down
Loading