Merge pull request #549 from justinsb/fix_538

Skip directory objects in S3 when listing files
2016-09-30 10:49:53 -04:00 · 2016-09-30 10:49:53 -04:00 · 39cbe506d4
parent 4ffa5032db 39c3c85262
commit 39cbe506d4
2 changed files with 10 additions and 1 deletions
--- a/util/pkg/vfs/s3fs.go
+++ b/util/pkg/vfs/s3fs.go
@ -154,10 +154,19 @@ func (p *S3Path) ReadDir() ([]Path, error) {
 	request.Prefix = aws.String(prefix)
 	request.Delimiter = aws.String("/")

+	glog.V(4).Infof("Listing objects in S3 bucket %q with prefix %q", p.bucket, prefix)
 	var paths []Path
 	err := p.client.ListObjectsPages(request, func(page *s3.ListObjectsOutput, lastPage bool) bool {
 		for _, o := range page.Contents {
 			key := aws.StringValue(o.Key)
+			if key == prefix {
+				// We have reports (#548 and #520) of the directory being returned as a file
+				// And this will indeed happen if the directory has been created as a file,
+				// which seems to happen if you use some external tools to manipulate the S3 bucket.
+				// We need to tolerate that, so skip the parent directory.
+				glog.V(4).Infof("Skipping read of directory: %q", key)
+				continue
+			}
 			child := &S3Path{
 				client: p.client,
 				bucket: p.bucket,
--- a/util/pkg/vfs/vfs.go
+++ b/util/pkg/vfs/vfs.go
@ -35,7 +35,7 @@ type Path interface {
 	// Path returns a string representing the full path
 	Path() string

-	// ReadDir lists the files in a particular Pathss
+	// ReadDir lists the files in a particular Path
 	ReadDir() ([]Path, error)

 	// ReadTree lists all files in the subtree rooted at the current Path