// ------------------------------------------------------------ // Copyright (c) Microsoft Corporation and Dapr Contributors. // Licensed under the MIT License. // ------------------------------------------------------------ package blobstorage import ( "bytes" "context" b64 "encoding/base64" "encoding/json" "errors" "fmt" "net/url" "strconv" "github.com/Azure/azure-storage-blob-go/azblob" "github.com/google/uuid" "github.com/dapr/components-contrib/bindings" "github.com/dapr/kit/logger" ) const ( // Used to reference the blob relative to the container. metadataKeyBlobName = "blobName" // A string value that identifies the portion of the list to be returned with the next list operation. // The operation returns a marker value within the response body if the list returned was not complete. The marker // value may then be used in a subsequent call to request the next set of list items. // See: https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#uri-parameters metadataKeyMarker = "marker" // The number of blobs that will be returned in a list operation. metadataKeyNumber = "number" // Defines if the user defined metadata should be returned in the get operation. metadataKeyIncludeMetadata = "includeMetadata" // Defines the delete snapshots option for the delete operation. // See: https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob#request-headers metadataKeyDeleteSnapshots = "deleteSnapshots" // HTTP headers to be associated with the blob. // See: https://docs.microsoft.com/en-us/rest/api/storageservices/put-blob#request-headers-all-blob-types metadataKeyContentType = "contentType" metadataKeyContentMD5 = "contentMD5" metadataKeyContentEncoding = "contentEncoding" metadataKeyContentLanguage = "contentLanguage" metadataKeyContentDisposition = "contentDisposition" meatdataKeyCacheControl = "cacheControl" // Specifies the maximum number of HTTP GET requests that will be made while reading from a RetryReader. A value // of zero means that no additional HTTP GET requests will be made. defaultGetBlobRetryCount = 10 // Specifies the maximum number of blobs to return, including all BlobPrefix elements. If the request does not // specify maxresults the server will return up to 5,000 items. // See: https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs#uri-parameters maxResults = 5000 // TODO: remove the pascal case support when the component moves to GA // See: https://github.com/dapr/components-contrib/pull/999#issuecomment-876890210 metadataKeyContentTypeBC = "ContentType" metadataKeyContentMD5BC = "ContentMD5" metadataKeyContentEncodingBC = "ContentEncoding" metadataKeyContentLanguageBC = "ContentLanguage" metadataKeyContentDispositionBC = "ContentDisposition" metadataKeyCacheControlBC = "CacheControl" metadataKeyDeleteSnapshotOptionsBC = "DeleteSnapshotOptions" ) var ErrMissingBlobName = errors.New("blobName is a required attribute") // AzureBlobStorage allows saving blobs to an Azure Blob Storage account. type AzureBlobStorage struct { metadata *blobStorageMetadata containerURL azblob.ContainerURL logger logger.Logger } type blobStorageMetadata struct { StorageAccount string `json:"storageAccount"` StorageAccessKey string `json:"storageAccessKey"` Container string `json:"container"` GetBlobRetryCount int `json:"getBlobRetryCount,string"` DecodeBase64 bool `json:"decodeBase64,string"` PublicAccessLevel azblob.PublicAccessType `json:"publicAccessLevel"` } type createResponse struct { BlobURL string `json:"blobURL"` } type listInclude struct { Copy bool `json:"copy"` Metadata bool `json:"metadata"` Snapshots bool `json:"snapshots"` UncommittedBlobs bool `json:"uncommittedBlobs"` Deleted bool `json:"deleted"` } type listPayload struct { Marker string `json:"marker"` Prefix string `json:"prefix"` MaxResults int32 `json:"maxResults"` Include listInclude `json:"include"` } // NewAzureBlobStorage returns a new Azure Blob Storage instance. func NewAzureBlobStorage(logger logger.Logger) *AzureBlobStorage { return &AzureBlobStorage{logger: logger} } // Init performs metadata parsing. func (a *AzureBlobStorage) Init(metadata bindings.Metadata) error { m, err := a.parseMetadata(metadata) if err != nil { return err } a.metadata = m credential, err := azblob.NewSharedKeyCredential(m.StorageAccount, m.StorageAccessKey) if err != nil { return fmt.Errorf("invalid credentials with error: %w", err) } p := azblob.NewPipeline(credential, azblob.PipelineOptions{}) containerName := a.metadata.Container URL, _ := url.Parse( fmt.Sprintf("https://%s.blob.core.windows.net/%s", m.StorageAccount, containerName)) containerURL := azblob.NewContainerURL(*URL, p) ctx := context.Background() _, err = containerURL.Create(ctx, azblob.Metadata{}, m.PublicAccessLevel) // Don't return error, container might already exist a.logger.Debugf("error creating container: %w", err) a.containerURL = containerURL return nil } func (a *AzureBlobStorage) parseMetadata(metadata bindings.Metadata) (*blobStorageMetadata, error) { connInfo := metadata.Properties b, err := json.Marshal(connInfo) if err != nil { return nil, err } var m blobStorageMetadata err = json.Unmarshal(b, &m) if err != nil { return nil, err } if m.GetBlobRetryCount == 0 { m.GetBlobRetryCount = defaultGetBlobRetryCount } if !a.isValidPublicAccessType(m.PublicAccessLevel) { return nil, fmt.Errorf("invalid public access level: %s; allowed: %s", m.PublicAccessLevel, azblob.PossiblePublicAccessTypeValues()) } return &m, nil } func (a *AzureBlobStorage) Operations() []bindings.OperationKind { return []bindings.OperationKind{ bindings.CreateOperation, bindings.GetOperation, bindings.DeleteOperation, bindings.ListOperation, } } func (a *AzureBlobStorage) create(req *bindings.InvokeRequest) (*bindings.InvokeResponse, error) { var blobHTTPHeaders azblob.BlobHTTPHeaders var blobURL azblob.BlockBlobURL if val, ok := req.Metadata[metadataKeyBlobName]; ok && val != "" { blobURL = a.getBlobURL(val) delete(req.Metadata, metadataKeyBlobName) } else { blobURL = a.getBlobURL(uuid.New().String()) } if val, ok := req.Metadata[metadataKeyContentType]; ok && val != "" { blobHTTPHeaders.ContentType = val delete(req.Metadata, metadataKeyContentType) } if val, ok := req.Metadata[metadataKeyContentMD5]; ok && val != "" { sDec, err := b64.StdEncoding.DecodeString(val) if err != nil || len(sDec) != 16 { return nil, fmt.Errorf("the MD5 value specified in Content MD5 is invalid, MD5 value must be 128 bits and base64 encoded") } blobHTTPHeaders.ContentMD5 = sDec delete(req.Metadata, metadataKeyContentMD5) } if val, ok := req.Metadata[metadataKeyContentEncoding]; ok && val != "" { blobHTTPHeaders.ContentEncoding = val delete(req.Metadata, metadataKeyContentEncoding) } if val, ok := req.Metadata[metadataKeyContentLanguage]; ok && val != "" { blobHTTPHeaders.ContentLanguage = val delete(req.Metadata, metadataKeyContentLanguage) } if val, ok := req.Metadata[metadataKeyContentDisposition]; ok && val != "" { blobHTTPHeaders.ContentDisposition = val delete(req.Metadata, metadataKeyContentDisposition) } if val, ok := req.Metadata[meatdataKeyCacheControl]; ok && val != "" { blobHTTPHeaders.CacheControl = val delete(req.Metadata, meatdataKeyCacheControl) } d, err := strconv.Unquote(string(req.Data)) if err == nil { req.Data = []byte(d) } if a.metadata.DecodeBase64 { decoded, decodeError := b64.StdEncoding.DecodeString(string(req.Data)) if decodeError != nil { return nil, decodeError } req.Data = decoded } _, err = azblob.UploadBufferToBlockBlob(context.Background(), req.Data, blobURL, azblob.UploadToBlockBlobOptions{ Parallelism: 16, Metadata: req.Metadata, BlobHTTPHeaders: blobHTTPHeaders, }) if err != nil { return nil, fmt.Errorf("error uploading az blob: %w", err) } resp := createResponse{ BlobURL: blobURL.String(), } b, err := json.Marshal(resp) if err != nil { return nil, fmt.Errorf("error marshalling create response for azure blob: %w", err) } return &bindings.InvokeResponse{ Data: b, }, nil } func (a *AzureBlobStorage) get(req *bindings.InvokeRequest) (*bindings.InvokeResponse, error) { var blobURL azblob.BlockBlobURL if val, ok := req.Metadata[metadataKeyBlobName]; ok && val != "" { blobURL = a.getBlobURL(val) } else { return nil, ErrMissingBlobName } ctx := context.TODO() resp, err := blobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false) if err != nil { return nil, fmt.Errorf("error downloading az blob: %w", err) } bodyStream := resp.Body(azblob.RetryReaderOptions{MaxRetryRequests: a.metadata.GetBlobRetryCount}) b := bytes.Buffer{} _, err = b.ReadFrom(bodyStream) if err != nil { return nil, fmt.Errorf("error reading az blob body: %w", err) } var metadata map[string]string fetchMetadata, err := req.GetMetadataAsBool(metadataKeyIncludeMetadata) if err != nil { return nil, fmt.Errorf("error parsing metadata: %w", err) } if fetchMetadata { props, err := blobURL.GetProperties(ctx, azblob.BlobAccessConditions{}) if err != nil { return nil, fmt.Errorf("error reading blob metadata: %w", err) } metadata = props.NewMetadata() } return &bindings.InvokeResponse{ Data: b.Bytes(), Metadata: metadata, }, nil } func (a *AzureBlobStorage) delete(req *bindings.InvokeRequest) (*bindings.InvokeResponse, error) { var blobURL azblob.BlockBlobURL if val, ok := req.Metadata[metadataKeyBlobName]; ok && val != "" { blobURL = a.getBlobURL(val) } else { return nil, ErrMissingBlobName } deleteSnapshotsOptions := azblob.DeleteSnapshotsOptionNone if val, ok := req.Metadata[metadataKeyDeleteSnapshots]; ok && val != "" { deleteSnapshotsOptions = azblob.DeleteSnapshotsOptionType(val) if !a.isValidDeleteSnapshotsOptionType(deleteSnapshotsOptions) { return nil, fmt.Errorf("invalid delete snapshot option type: %s; allowed: %s", deleteSnapshotsOptions, azblob.PossibleDeleteSnapshotsOptionTypeValues()) } } _, err := blobURL.Delete(context.Background(), deleteSnapshotsOptions, azblob.BlobAccessConditions{}) return nil, err } func (a *AzureBlobStorage) list(req *bindings.InvokeRequest) (*bindings.InvokeResponse, error) { options := azblob.ListBlobsSegmentOptions{} var payload listPayload err := json.Unmarshal(req.Data, &payload) if err != nil { return nil, err } options.Details.Copy = payload.Include.Copy options.Details.Metadata = payload.Include.Metadata options.Details.Snapshots = payload.Include.Snapshots options.Details.UncommittedBlobs = payload.Include.UncommittedBlobs options.Details.Deleted = payload.Include.Deleted if payload.MaxResults != int32(0) { options.MaxResults = payload.MaxResults } else { options.MaxResults = maxResults } if payload.Prefix != "" { options.Prefix = payload.Prefix } var initialMarker azblob.Marker if payload.Marker != "" { initialMarker = azblob.Marker{Val: &payload.Marker} } else { initialMarker = azblob.Marker{} } var blobs []azblob.BlobItem metadata := map[string]string{} ctx := context.Background() for currentMaker := initialMarker; currentMaker.NotDone(); { var listBlob *azblob.ListBlobsFlatSegmentResponse listBlob, err = a.containerURL.ListBlobsFlatSegment(ctx, currentMaker, options) if err != nil { return nil, fmt.Errorf("error listing blobs: %w", err) } blobs = append(blobs, listBlob.Segment.BlobItems...) numBlobs := len(blobs) currentMaker = listBlob.NextMarker metadata[metadataKeyMarker] = *currentMaker.Val metadata[metadataKeyNumber] = strconv.FormatInt(int64(numBlobs), 10) if options.MaxResults-maxResults > 0 { options.MaxResults -= maxResults } else { break } } jsonResponse, err := json.Marshal(blobs) if err != nil { return nil, fmt.Errorf("cannot marshal blobs to json: %w", err) } return &bindings.InvokeResponse{ Data: jsonResponse, Metadata: metadata, }, nil } func (a *AzureBlobStorage) Invoke(req *bindings.InvokeRequest) (*bindings.InvokeResponse, error) { req.Metadata = a.handleBackwardCompatibilityForMetadata(req.Metadata) switch req.Operation { case bindings.CreateOperation: return a.create(req) case bindings.GetOperation: return a.get(req) case bindings.DeleteOperation: return a.delete(req) case bindings.ListOperation: return a.list(req) default: return nil, fmt.Errorf("unsupported operation %s", req.Operation) } } func (a *AzureBlobStorage) getBlobURL(name string) azblob.BlockBlobURL { blobURL := a.containerURL.NewBlockBlobURL(name) return blobURL } func (a *AzureBlobStorage) isValidPublicAccessType(accessType azblob.PublicAccessType) bool { validTypes := azblob.PossiblePublicAccessTypeValues() for _, item := range validTypes { if item == accessType { return true } } return false } func (a *AzureBlobStorage) isValidDeleteSnapshotsOptionType(accessType azblob.DeleteSnapshotsOptionType) bool { validTypes := azblob.PossibleDeleteSnapshotsOptionTypeValues() for _, item := range validTypes { if item == accessType { return true } } return false } // TODO: remove the pascal case support when the component moves to GA // See: https://github.com/dapr/components-contrib/pull/999#issuecomment-876890210 func (a *AzureBlobStorage) handleBackwardCompatibilityForMetadata(metadata map[string]string) map[string]string { if val, ok := metadata[metadataKeyContentTypeBC]; ok && val != "" { metadata[metadataKeyContentType] = val delete(metadata, metadataKeyContentTypeBC) } if val, ok := metadata[metadataKeyContentMD5BC]; ok && val != "" { metadata[metadataKeyContentMD5] = val delete(metadata, metadataKeyContentMD5BC) } if val, ok := metadata[metadataKeyContentEncodingBC]; ok && val != "" { metadata[metadataKeyContentEncoding] = val delete(metadata, metadataKeyContentEncodingBC) } if val, ok := metadata[metadataKeyContentLanguageBC]; ok && val != "" { metadata[metadataKeyContentLanguage] = val delete(metadata, metadataKeyContentLanguageBC) } if val, ok := metadata[metadataKeyContentDispositionBC]; ok && val != "" { metadata[metadataKeyContentDisposition] = val delete(metadata, metadataKeyContentDispositionBC) } if val, ok := metadata[metadataKeyCacheControlBC]; ok && val != "" { metadata[meatdataKeyCacheControl] = val delete(metadata, metadataKeyCacheControlBC) } if val, ok := metadata[metadataKeyDeleteSnapshotOptionsBC]; ok && val != "" { metadata[metadataKeyDeleteSnapshots] = val delete(metadata, metadataKeyDeleteSnapshotOptionsBC) } return metadata }