Merge pull request #66 from BenTheElder/HEAD-check

HEAD check and fallback if S3 is missing layers
This commit is contained in:
Kubernetes Prow Robot 2022-04-26 12:06:53 -07:00 committed by GitHub
commit 8727db0027
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 216 additions and 35 deletions

View File

@ -16,10 +16,41 @@ limitations under the License.
package app
func regionToBucket(region string) string {
import "net/http"
// awsRegionToS3URL returns the base S3 bucket URL for an OCI layer blob given the AWS region
//
// blobs in the buckets should be stored at /containers/images/sha256:$hash
func awsRegionToS3URL(region string) string {
// for now always return @ameukam's test bucket
switch region {
default:
return "https://painfully-really-suddenly-many-raccoon-image-layers.s3.us-west-2.amazonaws.com"
}
}
// blobChecker are used to check if a blob exists, possibly with caching
type blobChecker interface {
// layerHash may be used for caching purposes
BlobExists(blobURL, layerHash string) bool
}
// simpleBlobChecker just performs an HTTP HEAD check against the blob
//
// TODO: potentially replace with a caching implementation
// should be plenty fast for now, HTTP HEAD on s3 is cheap
type simpleBlobChecker struct {
http.Client
}
func (s *simpleBlobChecker) BlobExists(blobURL, layerHash string) bool {
r, err := s.Client.Head(blobURL)
// fallback to assuming blob is unavailable on errors
if err != nil {
return false
}
r.Body.Close()
// if the blob exists it HEAD should return 200 OK
// this is true for S3 and for OCI registries
return r.StatusCode == http.StatusOK
}

View File

@ -0,0 +1,64 @@
//go:build !nointegration
// +build !nointegration
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package app
import (
"testing"
)
func TestSimpleBlobChecker(t *testing.T) {
bucket := awsRegionToS3URL("us-east-1")
blobs := &simpleBlobChecker{}
testCases := []struct {
Name string
BlobURL string
HashKey string
ExpectExists bool
}{
{
Name: "known bucket entry",
BlobURL: bucket + "/containers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
HashKey: "3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
ExpectExists: true,
},
{
Name: "known bucket, bad entry",
BlobURL: bucket + "/c0ntainers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
ExpectExists: false,
},
{
Name: "bogus bucket on domain without webserver",
BlobURL: "http://bogus.k8s.io/foo",
HashKey: "b0guS",
ExpectExists: false,
},
}
for i := range testCases {
tc := testCases[i]
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
url := tc.BlobURL
exists := blobs.BlobExists(url, tc.HashKey)
if exists != tc.ExpectExists {
t.Fatalf("expected: %v but got: %v", tc.ExpectExists, exists)
}
})
}
}

View File

@ -22,11 +22,11 @@ import (
"sigs.k8s.io/oci-proxy/pkg/net/cidrs/aws"
)
func TestRegionToBucket(t *testing.T) {
func TestRegionToAWSRegionToS3URL(t *testing.T) {
// ensure all known regions return a configured bucket
regions := aws.Regions()
for region := range regions {
bucket := regionToBucket(region)
bucket := awsRegionToS3URL(region)
if bucket == "" {
t.Fatalf("received empty string for known region %q bucket", region)
}

View File

@ -36,7 +36,8 @@ const (
// upstream registry should be the url to the primary registry
// archeio is fronting.
func MakeHandler(upstreamRegistry string) http.Handler {
doV2 := makeV2Handler(upstreamRegistry)
blobs := &simpleBlobChecker{}
doV2 := makeV2Handler(upstreamRegistry, blobs)
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// all valid registry requests should be at /v2/
// v1 API is super old and not supported by GCR anymore.
@ -53,7 +54,7 @@ func MakeHandler(upstreamRegistry string) http.Handler {
})
}
func makeV2Handler(upstreamRegistry string) func(w http.ResponseWriter, r *http.Request) {
func makeV2Handler(upstreamRegistry string, blobs blobChecker) func(w http.ResponseWriter, r *http.Request) {
// matches blob requests, captures the requested blob hash
reBlob := regexp.MustCompile("^/v2/.*/blobs/sha256:([0-9a-f]{64})$")
// initialize map of clientIP to AWS region
@ -65,33 +66,44 @@ func makeV2Handler(upstreamRegistry string) func(w http.ResponseWriter, r *http.
// check if blob request
matches := reBlob.FindStringSubmatch(path)
if len(matches) != 2 {
// doesn't match so just forward it to the main upstream registry
// not a blob request so forward it to the main upstream registry
klog.V(2).InfoS("redirecting non-blob request to upstream registry", "path", path)
http.Redirect(w, r, upstreamRegistry+path, http.StatusPermanentRedirect)
return
}
// for matches, identify the appropriate backend
// for blob requests, check the client IP and determine the best backend
clientIP, err := getClientIP(r)
if err != nil {
// this should not happen
klog.ErrorS(err, "failed to get client IP")
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
region, matched := regionMapper.GetIP(clientIP)
if !matched {
// check if client is known to be coming from an AWS region
awsRegion, ipIsKnown := regionMapper.GetIP(clientIP)
if !ipIsKnown {
// no region match, redirect to main upstream registry
klog.V(2).InfoS("redirecting blob request to upstream registry", "path", path)
http.Redirect(w, r, upstreamRegistry+path, http.StatusPermanentRedirect)
return
}
bucket := regionToBucket(region)
// check if blob is available in our S3 bucket for the region
bucketURL := awsRegionToS3URL(awsRegion)
hash := matches[1]
// blobs are in the buckets are stored at /containers/images/sha256:$hash
// this matches the GCS bucket backing GCR
klog.V(2).InfoS("redirecting blob request to AWS", "region", region, "path", path)
http.Redirect(w, r, bucket+"/containers/images/sha256%3A"+hash, http.StatusPermanentRedirect)
// this matches GCR's GCS layout, which we will use for other buckets
blobURL := bucketURL + "/containers/images/sha256%3A" + hash
if blobs.BlobExists(blobURL, hash) {
// blob known to be available in S3, redirect client there
klog.V(2).InfoS("redirecting blob request to S3", "path", path)
http.Redirect(w, r, blobURL, http.StatusPermanentRedirect)
return
}
// fall back to redirect to upstream
klog.V(2).InfoS("redirecting blob request to upstream registry", "path", path)
http.Redirect(w, r, upstreamRegistry+path, http.StatusPermanentRedirect)
}
}

View File

@ -55,27 +55,6 @@ func TestMakeHandler(t *testing.T) {
ExpectedStatus: http.StatusPermanentRedirect,
ExpectedURL: "https://k8s.gcr.io/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
},
{
Name: "Somehow bogus remote addr, /v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Request: func() *http.Request {
r := httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e", nil)
r.RemoteAddr = "35.180.1.1asdfasdfsd:888"
return r
}(),
// NOTE: this one really shouldn't happen, but we want full test coverage
// This should only happen with a bug in the stdlib http server ...
ExpectedStatus: http.StatusBadRequest,
},
{
Name: "AWS IP, /v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Request: func() *http.Request {
r := httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e", nil)
r.RemoteAddr = "35.180.1.1:888"
return r
}(),
ExpectedStatus: http.StatusPermanentRedirect,
ExpectedURL: "https://painfully-really-suddenly-many-raccoon-image-layers.s3.us-west-2.amazonaws.com/containers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
},
}
for i := range testCases {
tc := testCases[i]
@ -111,3 +90,98 @@ func TestMakeHandler(t *testing.T) {
})
}
}
type fakeBlobsChecker struct {
knownURLs map[string]bool
}
func (f *fakeBlobsChecker) BlobExists(blobURL, hashKey string) bool {
return f.knownURLs[blobURL]
}
func TestMakeV2Handler(t *testing.T) {
const upstreamRegistry = "https://k8s.gcr.io"
blobs := fakeBlobsChecker{
knownURLs: map[string]bool{
"https://painfully-really-suddenly-many-raccoon-image-layers.s3.us-west-2.amazonaws.com/containers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e": true,
},
}
handler := makeV2Handler(upstreamRegistry, &blobs)
testCases := []struct {
Name string
Request *http.Request
ExpectedStatus int
ExpectedURL string
}{
{
Name: "/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Request: httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e", nil),
ExpectedStatus: http.StatusPermanentRedirect,
ExpectedURL: "https://k8s.gcr.io/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
},
{
Name: "Somehow bogus remote addr, /v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Request: func() *http.Request {
r := httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e", nil)
r.RemoteAddr = "35.180.1.1asdfasdfsd:888"
return r
}(),
// NOTE: this one really shouldn't happen, but we want full test coverage
// This should only happen with a bug in the stdlib http server ...
ExpectedStatus: http.StatusBadRequest,
},
{
Name: "AWS IP, /v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
Request: func() *http.Request {
r := httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:da86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e", nil)
r.RemoteAddr = "35.180.1.1:888"
return r
}(),
ExpectedStatus: http.StatusPermanentRedirect,
ExpectedURL: "https://painfully-really-suddenly-many-raccoon-image-layers.s3.us-west-2.amazonaws.com/containers/images/sha256%3Ada86e6ba6ca197bf6bc5e9d900febd906b133eaa4750e6bed647b0fbe50ed43e",
},
{
Name: "AWS IP, /v2/pause/blobs/sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1234567",
Request: func() *http.Request {
r := httptest.NewRequest("GET", "http://localhost:8080/v2/pause/blobs/sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1234567", nil)
r.RemoteAddr = "35.180.1.1:888"
return r
}(),
ExpectedStatus: http.StatusPermanentRedirect,
ExpectedURL: "https://k8s.gcr.io/v2/pause/blobs/sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1234567",
},
}
for i := range testCases {
tc := testCases[i]
t.Run(tc.Name, func(t *testing.T) {
t.Parallel()
recorder := httptest.NewRecorder()
handler(recorder, tc.Request)
response := recorder.Result()
if response == nil {
t.Fatalf("nil response")
}
if response.StatusCode != tc.ExpectedStatus {
t.Fatalf(
"expected status: %v, but got status: %v",
http.StatusText(tc.ExpectedStatus),
http.StatusText(response.StatusCode),
)
}
location, err := response.Location()
if err != nil {
if !errors.Is(err, http.ErrNoLocation) {
t.Fatalf("failed to get response location with error: %v", err)
} else if tc.ExpectedURL != "" {
t.Fatalf("expected url: %q but no location was available", tc.ExpectedURL)
}
} else if location.String() != tc.ExpectedURL {
t.Fatalf(
"expected url: %q, but got: %q",
tc.ExpectedURL,
location,
)
}
})
}
}