Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 224 additions & 0 deletions pkg/webhook/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
// SPDX-License-Identifier: Apache-2.0

package webhook

import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"strconv"
"time"

"github.com/stacklok/toolhive/pkg/networking"
)

// Client is an HTTP client for calling webhook endpoints.
type Client struct {
httpClient *http.Client
config Config
hmacSecret []byte
webhookType Type
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

webhookType is stored but never read. There's no dispatch logic based on type -- callers just pick Call vs CallMutating directly. Either remove the field or, if the plan is to add a Send method that dispatches based on type, leave a TODO.


// NewClient creates a new webhook Client from the given configuration.
// The hmacSecret parameter is the resolved secret bytes for HMAC signing;
// pass nil if signing is not configured.
func NewClient(cfg Config, webhookType Type, hmacSecret []byte) (*Client, error) {
if err := cfg.Validate(); err != nil {
return nil, fmt.Errorf("invalid webhook config: %w", err)
}

timeout := cfg.Timeout
if timeout == 0 {
timeout = DefaultTimeout
}

transport, err := buildTransport(cfg.TLSConfig)
if err != nil {
return nil, fmt.Errorf("failed to build HTTP transport: %w", err)
}

return &Client{
httpClient: &http.Client{
Transport: transport,
Timeout: timeout,
},
config: cfg,
hmacSecret: hmacSecret,
webhookType: webhookType,
}, nil
}

// Call sends a request to a validating webhook and returns its response.
func (c *Client) Call(ctx context.Context, req *Request) (*Response, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, NewInvalidResponseError(c.config.Name, fmt.Errorf("failed to marshal request: %w", err))
}

respBody, err := c.doHTTPCall(ctx, body)
if err != nil {
return nil, err
}

var resp Response
if err := json.Unmarshal(respBody, &resp); err != nil {
return nil, NewInvalidResponseError(c.config.Name, fmt.Errorf("failed to unmarshal response: %w", err))
}

return &resp, nil
}

// CallMutating sends a request to a mutating webhook and returns its response.
func (c *Client) CallMutating(ctx context.Context, req *Request) (*MutatingResponse, error) {
body, err := json.Marshal(req)
if err != nil {
return nil, NewInvalidResponseError(c.config.Name, fmt.Errorf("failed to marshal request: %w", err))
}

respBody, err := c.doHTTPCall(ctx, body)
if err != nil {
return nil, err
}

var resp MutatingResponse
if err := json.Unmarshal(respBody, &resp); err != nil {
return nil, NewInvalidResponseError(c.config.Name, fmt.Errorf("failed to unmarshal mutating response: %w", err))
}

return &resp, nil
}

// doHTTPCall performs the HTTP POST to the webhook endpoint, handling signing,
// error classification, and response size limiting.
func (c *Client) doHTTPCall(ctx context.Context, body []byte) ([]byte, error) {
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, c.config.URL, bytes.NewReader(body))
if err != nil {
return nil, NewNetworkError(c.config.Name, fmt.Errorf("failed to create HTTP request: %w", err))
}
httpReq.Header.Set("Content-Type", "application/json")

// Apply HMAC signing if configured.
if len(c.hmacSecret) > 0 {
timestamp := time.Now().Unix()
signature := SignPayload(c.hmacSecret, timestamp, body)
httpReq.Header.Set(SignatureHeader, signature)
httpReq.Header.Set(TimestampHeader, strconv.FormatInt(timestamp, 10))
}

// #nosec G704 -- URL is validated in Config.Validate and we use ValidatingTransport for SSRF protection.
resp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, classifyError(c.config.Name, err)
}
defer func() {
_ = resp.Body.Close()
}()

// Enforce response size limit.
limitedReader := io.LimitReader(resp.Body, MaxResponseSize+1)
respBody, err := io.ReadAll(limitedReader)
if err != nil {
return nil, NewNetworkError(c.config.Name, fmt.Errorf("failed to read response body: %w", err))
}
if int64(len(respBody)) > MaxResponseSize {
return nil, NewInvalidResponseError(c.config.Name,
fmt.Errorf("response body exceeds maximum size of %d bytes", MaxResponseSize))
}

// 5xx errors indicate webhook operational failures.
if resp.StatusCode >= http.StatusInternalServerError {
Comment on lines +136 to +139
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RFC's failure mode table treats HTTP 422 specially -- it's "Deny (422)" regardless of failure policy. With the current code, 422 gets lumped with all other non-200/non-5xx as InvalidResponseError, so the middleware layer won't be able to distinguish "422 from webhook = always deny" from "400 bad request = follow failure policy."

Consider adding the HTTP status code to the error (e.g., a StatusCode field on InvalidResponseError), or a separate error path for status codes that should bypass failure policy.

return nil, NewNetworkError(c.config.Name,
fmt.Errorf("webhook returned HTTP %d: %s", resp.StatusCode, truncateBody(respBody)))
}

// Non-200 responses (excluding 5xx handled above) are treated as invalid.
if resp.StatusCode != http.StatusOK {
return nil, NewInvalidResponseError(c.config.Name,
fmt.Errorf("webhook returned HTTP %d: %s", resp.StatusCode, truncateBody(respBody)))
}

return respBody, nil
}

// buildTransport creates an http.RoundTripper with the specified TLS configuration,
// wrapped in a ValidatingTransport for security.
func buildTransport(tlsCfg *TLSConfig) (http.RoundTripper, error) {
transport := &http.Transport{
TLSHandshakeTimeout: 10 * time.Second,
ResponseHeaderTimeout: 10 * time.Second,
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
Comment on lines +152 to +160
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When tlsCfg == nil, this returns a bare http.Transport without the ValidatingTransport wrapper. That means the SSRF protections (private IP blocking, etc.) only kick in when TLS is explicitly configured.

The #nosec G704 on line 114 claims coverage via ValidatingTransport, but that's only true in the TLS branch.

The fix is to always wrap in ValidatingTransport:

func buildTransport(tlsCfg *TLSConfig) (http.RoundTripper, error) {
	transport := &http.Transport{
		TLSHandshakeTimeout:   10 * time.Second,
		ResponseHeaderTimeout: 10 * time.Second,
		MaxIdleConns:          100,
		MaxIdleConnsPerHost:   10,
		IdleConnTimeout:       90 * time.Second,
	}

	if tlsCfg != nil {
		// ... apply TLS config to transport ...
	}

	allowHTTP := tlsCfg != nil && tlsCfg.InsecureSkipVerify
	return &networking.ValidatingTransport{
		Transport:         transport,
		InsecureAllowHTTP: allowHTTP,
	}, nil
}

Note that the RFC says "TLS/HTTPS required," but in practice folks will want to hit http://my-service.my-namespace.svc.cluster.local for in-cluster webhooks where TLS isn't configured. So we probably want InsecureSkipVerify (or a new AllowHTTP field) to gate HTTP access explicitly, rather than a hard HTTPS requirement. That way the default is secure, but operators running in-cluster can opt in intentionally.

IdleConnTimeout: 90 * time.Second,
}

if tlsCfg == nil {
return transport, nil
}

tlsConfig := &tls.Config{
MinVersion: tls.VersionTLS12,
}

// Load CA bundle if provided.
if tlsCfg.CABundlePath != "" {
caCert, err := os.ReadFile(tlsCfg.CABundlePath)
if err != nil {
return nil, fmt.Errorf("failed to read CA bundle: %w", err)
}
caCertPool := x509.NewCertPool()
if !caCertPool.AppendCertsFromPEM(caCert) {
return nil, fmt.Errorf("failed to parse CA certificate bundle")
}
tlsConfig.RootCAs = caCertPool
}

// Load client certificate for mTLS if provided.
if tlsCfg.ClientCertPath != "" && tlsCfg.ClientKeyPath != "" {
cert, err := tls.LoadX509KeyPair(tlsCfg.ClientCertPath, tlsCfg.ClientKeyPath)
if err != nil {
return nil, fmt.Errorf("failed to load client certificate: %w", err)
}
tlsConfig.Certificates = []tls.Certificate{cert}
}

if tlsCfg.InsecureSkipVerify {
//#nosec G402 -- InsecureSkipVerify is intentionally user-configurable for development/testing only.
tlsConfig.InsecureSkipVerify = true
}

transport.TLSClientConfig = tlsConfig
return &networking.ValidatingTransport{
Transport: transport,
InsecureAllowHTTP: false,
}, nil
}

// classifyError examines an HTTP client error and returns an appropriately
Comment on lines +199 to +206
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a parent context is cancelled or its deadline exceeds, the error may be context.DeadlineExceeded or context.Canceled without wrapping a net.Error. Those would fall through to NetworkError when they're really timeouts/cancellations.

if errors.Is(err, context.DeadlineExceeded) {
    return NewTimeoutError(webhookName, err)
}

// typed webhook error (TimeoutError or NetworkError).
func classifyError(webhookName string, err error) error {
// Check for timeout errors (context deadline, net.Error timeout).
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
return NewTimeoutError(webhookName, err)
}
return NewNetworkError(webhookName, err)
}

// truncateBody returns a preview of the response body for error messages.
func truncateBody(body []byte) string {
const maxPreview = 256
if len(body) <= maxPreview {
return string(body)
}
return string(body[:maxPreview]) + "..."
}
Loading
Loading