Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 20 additions & 6 deletions builder/mirror/prober.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,19 @@ type probeResult struct {
latency time.Duration
}

// Probe checks every candidate's /v2/ registry endpoint concurrently and
// returns only the alive ones, sorted by ascending latency. A mirror is alive
// when /v2/ answers 200 or 401 (an auth challenge still proves a working
// registry frontend). Candidates keep their scheme; entries without one are
// probed via https.
// probeManifestPath is a real, tiny docker.io manifest. Probing it instead of
// the bare /v2/ ping exercises the mirror's actual proxy path, so a frontend
// that answers pings but stalls on manifests fails the probe timeout.
const probeManifestPath = "/v2/library/alpine/manifests/latest"

// Probe fetches a real manifest from every candidate concurrently and returns
// only the alive ones, sorted by ascending latency. 200 and 401 both count as
// alive: token-auth mirrors (e.g. daocloud) answer 401 to anonymous manifest
// requests, and the containerd/BuildKit token flow handles that during real
// pulls. The probe cannot catch every stall (a mirror may serve alpine fine
// and hang on another image) — the pull-side client timeout is the hard
// safety net; the probe only filters and orders. Candidates keep their
// scheme; entries without one are probed via https.
func Probe(ctx context.Context, candidates []string, timeout time.Duration) []string {
if len(candidates) == 0 {
return nil
Expand Down Expand Up @@ -76,12 +84,18 @@ func probeOne(ctx context.Context, client *http.Client, mirrorURL string) (time.
if !strings.HasPrefix(endpoint, "http://") && !strings.HasPrefix(endpoint, "https://") {
endpoint = "https://" + endpoint
}
endpoint = strings.TrimSuffix(endpoint, "/") + "/v2/"
endpoint = strings.TrimSuffix(endpoint, "/") + probeManifestPath
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
logrus.Debugf("probe mirror %s: build request failure: %v", mirrorURL, err)
return 0, false
}
req.Header.Set("Accept", strings.Join([]string{
"application/vnd.docker.distribution.manifest.v2+json",
"application/vnd.docker.distribution.manifest.list.v2+json",
"application/vnd.oci.image.manifest.v1+json",
"application/vnd.oci.image.index.v1+json",
}, ", "))
start := time.Now()
resp, err := client.Do(req)
if err != nil {
Expand Down
33 changes: 27 additions & 6 deletions builder/mirror/prober_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,33 @@ import (
// capability_id: rainbond.builder.dynamic-mirror-probe
func TestProbeFiltersAndSortsByLatency(t *testing.T) {
slowOK := newRegistryStub(t, http.StatusOK, 300*time.Millisecond)
fastUnauthorized := newRegistryStub(t, http.StatusUnauthorized, 0)
fastOK := newRegistryStub(t, http.StatusOK, 0)
dead := newRegistryStub(t, http.StatusInternalServerError, 0)

got := Probe(context.Background(), []string{slowOK, fastUnauthorized, dead}, 2*time.Second)
got := Probe(context.Background(), []string{slowOK, fastOK, dead}, 2*time.Second)

assertStringSlice(t, got, []string{fastUnauthorized, slowOK})
assertStringSlice(t, got, []string{fastOK, slowOK})
}

// token 认证类 mirror(如 daocloud)对匿名 manifest 请求回 401,但真实拉取时
// resolver 会走 token 流程,因此 401 必须判活。
func TestProbeTokenAuthMirrorIsAlive(t *testing.T) {
authChallenge := newRegistryStub(t, http.StatusUnauthorized, 0)
got := Probe(context.Background(), []string{authChallenge}, time.Second)
assertStringSlice(t, got, []string{authChallenge})
}

// manifest 路径挂起不响应的“假活”源要在探活超时内排除
// (docker.xuanyuan.me 卡死构建事故的场景之一)。
func TestProbeManifestStallTreatedAsDead(t *testing.T) {
stalled := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(2 * time.Second) // longer than probe timeout
}))
t.Cleanup(stalled.Close)
got := Probe(context.Background(), []string{stalled.URL}, 500*time.Millisecond)
if len(got) != 0 {
t.Fatalf("stalled mirror must be treated as dead, got %v", got)
}
}

func TestProbeUnreachableHostDropped(t *testing.T) {
Expand All @@ -32,12 +53,12 @@ func TestProbeEmptyInput(t *testing.T) {
}
}

// newRegistryStub serves /v2/ with the given status after an artificial delay
// and returns the server base URL.
// newRegistryStub serves the probe manifest path with the given status after
// an artificial delay and returns the server base URL.
func newRegistryStub(t *testing.T, status int, delay time.Duration) string {
t.Helper()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/v2/" {
if r.URL.Path != probeManifestPath {
w.WriteHeader(http.StatusNotFound)
return
}
Expand Down
12 changes: 11 additions & 1 deletion builder/sources/mirror_hosts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ package sources

import (
"crypto/tls"
"net"
"net/http"
"strings"
"time"

refdocker "github.com/containerd/containerd/reference/docker"
"github.com/containerd/containerd/remotes/docker"
Expand Down Expand Up @@ -60,8 +62,16 @@ func mirrorRegistryHost(mirrorURL string) docker.RegistryHost {
}
host = strings.TrimSuffix(host, "/")
return docker.RegistryHost{
// 必须带超时:探活通过的 mirror 仍可能在取 manifest/blob 时挂起不响应,
// containerd 这条 pull 路径外层没有 ctx 超时,没有这里的超时会把构建卡死。
Client: &http.Client{
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
Transport: &http.Transport{
DialContext: (&net.Dialer{Timeout: 10 * time.Second}).DialContext,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
TLSHandshakeTimeout: 10 * time.Second,
ResponseHeaderTimeout: 30 * time.Second,
IdleConnTimeout: 90 * time.Second,
},
},
Authorizer: docker.NewDockerAuthorizer(),
Host: host,
Expand Down
Loading