Added support for uploading local dependencies to GCS

This commit is contained in:
Yinan Li 2018-01-25 10:13:38 -08:00
parent cdfa5bf2ab
commit 3373b30e6d
8 changed files with 295 additions and 38 deletions

86
Gopkg.lock generated
View File

@ -13,6 +13,18 @@
revision = "c2a68353555b68de3ee8455a4fd3e890a0ac6d99"
version = "v9.8.1"
[[projects]]
name = "github.com/PuerkitoBio/purell"
packages = ["."]
revision = "0bcb03f4b4d0a9428594752bd2a3b9aa0a9d4bd4"
version = "v1.1.0"
[[projects]]
branch = "master"
name = "github.com/PuerkitoBio/urlesc"
packages = ["."]
revision = "de5bf2ad457846296e2031421a34e2568e304e35"
[[projects]]
name = "github.com/davecgh/go-spew"
packages = ["spew"]
@ -31,12 +43,48 @@
packages = [".","spdy"]
revision = "bc6354cbbc295e925e4c611ffe90c1f287ee54db"
[[projects]]
name = "github.com/emicklei/go-restful"
packages = [".","log"]
revision = "2dd44038f0b95ae693b266c5f87593b5d2fdd78d"
version = "v2.5.0"
[[projects]]
name = "github.com/emicklei/go-restful-swagger12"
packages = ["."]
revision = "dcef7f55730566d41eae5db10e7d6981829720f6"
version = "1.0.1"
[[projects]]
name = "github.com/ghodss/yaml"
packages = ["."]
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
version = "v1.0.0"
[[projects]]
branch = "master"
name = "github.com/go-openapi/jsonpointer"
packages = ["."]
revision = "779f45308c19820f1a69e9a4cd965f496e0da10f"
[[projects]]
branch = "master"
name = "github.com/go-openapi/jsonreference"
packages = ["."]
revision = "36d33bfe519efae5632669801b180bf1a245da3b"
[[projects]]
branch = "master"
name = "github.com/go-openapi/spec"
packages = ["."]
revision = "fa03337d7da5735229ee8f5e9d5d0b996014b7f8"
[[projects]]
branch = "master"
name = "github.com/go-openapi/swag"
packages = ["."]
revision = "84f4bee7c0a6db40e3166044c7983c1c32125429"
[[projects]]
name = "github.com/gogo/protobuf"
packages = ["proto","sortkeys"]
@ -133,6 +181,12 @@
revision = "59fac5042749a5afb9af70e813da1dd5474f0167"
version = "1.0.1"
[[projects]]
branch = "master"
name = "github.com/mailru/easyjson"
packages = ["buffer","jlexer","jwriter"]
revision = "32fa128f234d041f196a9f3e0fea5ac9772c08e1"
[[projects]]
branch = "master"
name = "github.com/petar/GoLLRB"
@ -191,12 +245,12 @@
branch = "master"
name = "golang.org/x/sys"
packages = ["unix","windows"]
revision = "af9a21289d0040306ecc55fa668c95246f1f8425"
revision = "03467258950d845cd1877eab69461b98e8c09219"
[[projects]]
branch = "master"
name = "golang.org/x/text"
packages = ["collate","collate/build","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","language","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"]
packages = ["collate","collate/build","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","language","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable","width"]
revision = "e19ae1496984b1c655b8044a65c0300a3c878dd3"
[[projects]]
@ -215,7 +269,7 @@
branch = "master"
name = "google.golang.org/genproto"
packages = ["googleapis/api/annotations","googleapis/iam/v1","googleapis/rpc/status"]
revision = "14790a1795ea4c3387aaf24aed670ddb51f18292"
revision = "4eb30f4778eed4c258ba66527a0d4f9ec8a36c45"
[[projects]]
name = "google.golang.org/grpc"
@ -236,38 +290,38 @@
revision = "d670f9405373e636a5a2765eea47fac0c9bc91a4"
[[projects]]
branch = "master"
name = "k8s.io/api"
packages = ["admissionregistration/v1alpha1","admissionregistration/v1beta1","apps/v1","apps/v1beta1","apps/v1beta2","authentication/v1","authentication/v1beta1","authorization/v1","authorization/v1beta1","autoscaling/v1","autoscaling/v2beta1","batch/v1","batch/v1beta1","batch/v2alpha1","certificates/v1beta1","core/v1","events/v1beta1","extensions/v1beta1","networking/v1","policy/v1beta1","rbac/v1","rbac/v1alpha1","rbac/v1beta1","scheduling/v1alpha1","settings/v1alpha1","storage/v1","storage/v1alpha1","storage/v1beta1"]
revision = "fbe336854453ac8e27bffe14e1964555245cbd05"
packages = ["admissionregistration/v1alpha1","apps/v1beta1","apps/v1beta2","authentication/v1","authentication/v1beta1","authorization/v1","authorization/v1beta1","autoscaling/v1","autoscaling/v2beta1","batch/v1","batch/v1beta1","batch/v2alpha1","certificates/v1beta1","core/v1","extensions/v1beta1","networking/v1","policy/v1beta1","rbac/v1","rbac/v1alpha1","rbac/v1beta1","scheduling/v1alpha1","settings/v1alpha1","storage/v1","storage/v1beta1"]
revision = "4df58c811fe2e65feb879227b2b245e4dc26e7ad"
version = "kubernetes-1.8.2"
[[projects]]
branch = "master"
name = "k8s.io/apiextensions-apiserver"
packages = ["pkg/apis/apiextensions","pkg/apis/apiextensions/v1beta1","pkg/client/clientset/clientset","pkg/client/clientset/clientset/fake","pkg/client/clientset/clientset/scheme","pkg/client/clientset/clientset/typed/apiextensions/v1beta1","pkg/client/clientset/clientset/typed/apiextensions/v1beta1/fake"]
revision = "ddd9f73609e9a03a76dc8566ba6296db0cda63d3"
revision = "e509bb64fe1116e12a32273a2032426aa1a5fd26"
version = "kubernetes-1.8.2"
[[projects]]
branch = "master"
name = "k8s.io/apimachinery"
packages = ["pkg/api/errors","pkg/api/meta","pkg/api/resource","pkg/apis/meta/internalversion","pkg/apis/meta/v1","pkg/apis/meta/v1/unstructured","pkg/apis/meta/v1alpha1","pkg/conversion","pkg/conversion/queryparams","pkg/fields","pkg/labels","pkg/runtime","pkg/runtime/schema","pkg/runtime/serializer","pkg/runtime/serializer/json","pkg/runtime/serializer/protobuf","pkg/runtime/serializer/recognizer","pkg/runtime/serializer/streaming","pkg/runtime/serializer/versioning","pkg/selection","pkg/types","pkg/util/cache","pkg/util/clock","pkg/util/diff","pkg/util/errors","pkg/util/framer","pkg/util/httpstream","pkg/util/httpstream/spdy","pkg/util/intstr","pkg/util/json","pkg/util/mergepatch","pkg/util/net","pkg/util/runtime","pkg/util/sets","pkg/util/strategicpatch","pkg/util/validation","pkg/util/validation/field","pkg/util/wait","pkg/util/yaml","pkg/version","pkg/watch","third_party/forked/golang/json","third_party/forked/golang/netutil","third_party/forked/golang/reflect"]
revision = "2f1e02d3e57b8fb5206c5326bcb65217edc63a8e"
packages = ["pkg/api/equality","pkg/api/errors","pkg/api/meta","pkg/api/resource","pkg/apis/meta/internalversion","pkg/apis/meta/v1","pkg/apis/meta/v1/unstructured","pkg/apis/meta/v1alpha1","pkg/conversion","pkg/conversion/queryparams","pkg/conversion/unstructured","pkg/fields","pkg/labels","pkg/runtime","pkg/runtime/schema","pkg/runtime/serializer","pkg/runtime/serializer/json","pkg/runtime/serializer/protobuf","pkg/runtime/serializer/recognizer","pkg/runtime/serializer/streaming","pkg/runtime/serializer/versioning","pkg/selection","pkg/types","pkg/util/cache","pkg/util/clock","pkg/util/diff","pkg/util/errors","pkg/util/framer","pkg/util/httpstream","pkg/util/httpstream/spdy","pkg/util/intstr","pkg/util/json","pkg/util/mergepatch","pkg/util/net","pkg/util/runtime","pkg/util/sets","pkg/util/strategicpatch","pkg/util/validation","pkg/util/validation/field","pkg/util/wait","pkg/util/yaml","pkg/version","pkg/watch","third_party/forked/golang/json","third_party/forked/golang/netutil","third_party/forked/golang/reflect"]
revision = "019ae5ada31de202164b118aee88ee2d14075c31"
version = "kubernetes-1.8.0"
[[projects]]
name = "k8s.io/client-go"
packages = ["discovery","discovery/fake","kubernetes","kubernetes/fake","kubernetes/scheme","kubernetes/typed/admissionregistration/v1alpha1","kubernetes/typed/admissionregistration/v1alpha1/fake","kubernetes/typed/admissionregistration/v1beta1","kubernetes/typed/admissionregistration/v1beta1/fake","kubernetes/typed/apps/v1","kubernetes/typed/apps/v1/fake","kubernetes/typed/apps/v1beta1","kubernetes/typed/apps/v1beta1/fake","kubernetes/typed/apps/v1beta2","kubernetes/typed/apps/v1beta2/fake","kubernetes/typed/authentication/v1","kubernetes/typed/authentication/v1/fake","kubernetes/typed/authentication/v1beta1","kubernetes/typed/authentication/v1beta1/fake","kubernetes/typed/authorization/v1","kubernetes/typed/authorization/v1/fake","kubernetes/typed/authorization/v1beta1","kubernetes/typed/authorization/v1beta1/fake","kubernetes/typed/autoscaling/v1","kubernetes/typed/autoscaling/v1/fake","kubernetes/typed/autoscaling/v2beta1","kubernetes/typed/autoscaling/v2beta1/fake","kubernetes/typed/batch/v1","kubernetes/typed/batch/v1/fake","kubernetes/typed/batch/v1beta1","kubernetes/typed/batch/v1beta1/fake","kubernetes/typed/batch/v2alpha1","kubernetes/typed/batch/v2alpha1/fake","kubernetes/typed/certificates/v1beta1","kubernetes/typed/certificates/v1beta1/fake","kubernetes/typed/core/v1","kubernetes/typed/core/v1/fake","kubernetes/typed/events/v1beta1","kubernetes/typed/events/v1beta1/fake","kubernetes/typed/extensions/v1beta1","kubernetes/typed/extensions/v1beta1/fake","kubernetes/typed/networking/v1","kubernetes/typed/networking/v1/fake","kubernetes/typed/policy/v1beta1","kubernetes/typed/policy/v1beta1/fake","kubernetes/typed/rbac/v1","kubernetes/typed/rbac/v1/fake","kubernetes/typed/rbac/v1alpha1","kubernetes/typed/rbac/v1alpha1/fake","kubernetes/typed/rbac/v1beta1","kubernetes/typed/rbac/v1beta1/fake","kubernetes/typed/scheduling/v1alpha1","kubernetes/typed/scheduling/v1alpha1/fake","kubernetes/typed/settings/v1alpha1","kubernetes/typed/settings/v1alpha1/fake","kubernetes/typed/storage/v1","kubernetes/typed/storage/v1/fake","kubernetes/typed/storage/v1alpha1","kubernetes/typed/storage/v1alpha1/fake","kubernetes/typed/storage/v1beta1","kubernetes/typed/storage/v1beta1/fake","pkg/version","plugin/pkg/client/auth","plugin/pkg/client/auth/azure","plugin/pkg/client/auth/gcp","plugin/pkg/client/auth/oidc","plugin/pkg/client/auth/openstack","rest","rest/watch","testing","third_party/forked/golang/template","tools/auth","tools/cache","tools/clientcmd","tools/clientcmd/api","tools/clientcmd/api/latest","tools/clientcmd/api/v1","tools/metrics","tools/pager","tools/portforward","tools/record","tools/reference","transport","transport/spdy","util/buffer","util/cert","util/flowcontrol","util/homedir","util/integer","util/jsonpath","util/workqueue"]
revision = "78700dec6369ba22221b72770783300f143df150"
version = "v6.0.0"
packages = ["discovery","discovery/fake","kubernetes","kubernetes/fake","kubernetes/scheme","kubernetes/typed/admissionregistration/v1alpha1","kubernetes/typed/admissionregistration/v1alpha1/fake","kubernetes/typed/apps/v1beta1","kubernetes/typed/apps/v1beta1/fake","kubernetes/typed/apps/v1beta2","kubernetes/typed/apps/v1beta2/fake","kubernetes/typed/authentication/v1","kubernetes/typed/authentication/v1/fake","kubernetes/typed/authentication/v1beta1","kubernetes/typed/authentication/v1beta1/fake","kubernetes/typed/authorization/v1","kubernetes/typed/authorization/v1/fake","kubernetes/typed/authorization/v1beta1","kubernetes/typed/authorization/v1beta1/fake","kubernetes/typed/autoscaling/v1","kubernetes/typed/autoscaling/v1/fake","kubernetes/typed/autoscaling/v2beta1","kubernetes/typed/autoscaling/v2beta1/fake","kubernetes/typed/batch/v1","kubernetes/typed/batch/v1/fake","kubernetes/typed/batch/v1beta1","kubernetes/typed/batch/v1beta1/fake","kubernetes/typed/batch/v2alpha1","kubernetes/typed/batch/v2alpha1/fake","kubernetes/typed/certificates/v1beta1","kubernetes/typed/certificates/v1beta1/fake","kubernetes/typed/core/v1","kubernetes/typed/core/v1/fake","kubernetes/typed/extensions/v1beta1","kubernetes/typed/extensions/v1beta1/fake","kubernetes/typed/networking/v1","kubernetes/typed/networking/v1/fake","kubernetes/typed/policy/v1beta1","kubernetes/typed/policy/v1beta1/fake","kubernetes/typed/rbac/v1","kubernetes/typed/rbac/v1/fake","kubernetes/typed/rbac/v1alpha1","kubernetes/typed/rbac/v1alpha1/fake","kubernetes/typed/rbac/v1beta1","kubernetes/typed/rbac/v1beta1/fake","kubernetes/typed/scheduling/v1alpha1","kubernetes/typed/scheduling/v1alpha1/fake","kubernetes/typed/settings/v1alpha1","kubernetes/typed/settings/v1alpha1/fake","kubernetes/typed/storage/v1","kubernetes/typed/storage/v1/fake","kubernetes/typed/storage/v1beta1","kubernetes/typed/storage/v1beta1/fake","pkg/version","plugin/pkg/client/auth","plugin/pkg/client/auth/azure","plugin/pkg/client/auth/gcp","plugin/pkg/client/auth/oidc","plugin/pkg/client/auth/openstack","rest","rest/watch","testing","third_party/forked/golang/template","tools/auth","tools/cache","tools/clientcmd","tools/clientcmd/api","tools/clientcmd/api/latest","tools/clientcmd/api/v1","tools/metrics","tools/pager","tools/portforward","tools/record","tools/reference","transport","transport/spdy","util/cert","util/flowcontrol","util/homedir","util/integer","util/jsonpath","util/workqueue"]
revision = "35ccd4336052e7d73018b1382413534936f34eee"
version = "kubernetes-1.8.2"
[[projects]]
branch = "master"
name = "k8s.io/kube-openapi"
packages = ["pkg/util/proto"]
packages = ["pkg/common"]
revision = "a07b7bbb58e7fdc5144f8d7046331d29fc9ad3b3"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "a81cd276b276d590dc270d27351d6f8ac88efbc9db32ea8ca3f73ff6a02e6c25"
inputs-digest = "d5115664a2f8d7dd4f0554bc949e758c1ac05fd5d5fda8d71491523829150ccd"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -56,12 +56,16 @@
[[constraint]]
name = "k8s.io/api"
version = "kubernetes-1.8.2"
[[constraint]]
name = "k8s.io/apiextensions-apiserver"
version = "kubernetes-1.8.2"
[[constraint]]
name = "k8s.io/apimachinery"
version = "kubernetes-1.8.2"
[[constraint]]
name = "k8s.io/client-go"
version = "kubernetes-1.8.2"

View File

@ -164,7 +164,7 @@ func TestProcessSingleDriverStateUpdate(t *testing.T) {
nodeName: "node1",
podPhase: apiv1.PodRunning,
},
expectedAppState: v1alpha1.NewState,
expectedAppState: v1alpha1.RunningState,
},
}
@ -251,21 +251,11 @@ func TestProcessSingleAppStateUpdate(t *testing.T) {
name: "completed app with initial state SubmittedState",
update: appStateUpdate{
appID: appID,
state: v1alpha1.SubmittedState,
state: v1alpha1.FailedSubmissionState,
errorMessage: "",
},
initialAppState: v1alpha1.CompletedState,
expectedAppState: v1alpha1.CompletedState,
},
{
name: "failed app with initial state SubmittedState",
update: appStateUpdate{
appID: appID,
state: v1alpha1.SubmittedState,
errorMessage: "",
},
initialAppState: v1alpha1.FailedState,
expectedAppState: v1alpha1.FailedState,
initialAppState: v1alpha1.RunningState,
expectedAppState: v1alpha1.FailedSubmissionState,
},
}

View File

@ -36,7 +36,44 @@ environment variable `HADOOP_CONF_DIR`, create a Kubernetes `ConfigMap` from the
the `SparkApplication` object so it gets mounted into the driver and executor pods by the Spark Operator. The
environment variable `HADOOP_CONF_DIR` is also set in the driver and executor containers.
It is planned to add support for staging local application dependencies as part of the `create` command in the future.
The `create` command also supports staging local application dependencies, though currently only uploading to a Google
Cloud Storage (GCS) bucket is supported. The way it works is as follows. It checks if there is any local dependencies
in `spec.mainApplicationFile`, `spec.deps.jars`, `spec.deps.files`, etc. in the parsed `SaprkApplication` object. If so,
it tries to upload the local dependencies to the remote location specified by `--upload-to`. The command fails if local
dependencies are used but `--upload-to` is not specified.
For uploading to GCS, the value should be in the form of `gs://<bucket>`. The bucket must exist and uploading fails if
otherwise. The local dependencies will be uploaded to the path
`spark-app-dependencies/<SaprkApplication namespace>/<SparkApplication name>` in the given bucket. It replaces the
file path of each local dependency with the URI of the remote copy in the parsed `SaprkApplication` object if uploading
is successful.
Usage:
```base
$ sparkctl create <path to YAML file> --upload-to gs://<bucket> --project <GCP project the GCS bucket is associated to>
```
Note that uploading to GCS requires a GCP service account with the necessary IAM permission to use the GCP project
specified by `--project` (`serviceusage.services.use`) and the permission to create GCS objects (`storage.object.create`).
The service account JSON key file must be locally available and be pointed to by the environment variable
`GOOGLE_APPLICATION_CREDENTIALS`. For more information on IAM authentication, please check
[Getting Started with Authentication](https://cloud.google.com/docs/authentication/getting-started).
By default, the uploaded dependencies are not made publicly accessible and are referenced using URIs in the form of
`gs://bucket/path/to/file`. Such dependencies are referenced through URIs of the form `gs://bucket/path/to/file`. To
download the dependencies from GCS, a custom-built Spark init-container with the
[GCS connector](https://cloud.google.com/dataproc/docs/concepts/connectors/cloud-storage) installed and necessary
Hadoop configuration properties specified is needed. An example Docker file of such an init-container can be found
[here](https://gist.github.com/liyinan926/f9e81f7b54d94c05171a663345eb58bf).
If you want to make uploaded dependencies publicly available so they can be downloaded by the built-in init-container,
simply add `--public` to the `create` command, as the following example shows:
```bash
$ sparkctl create <path to YAML file> --upload-to gs://<bucket> --project <GCP project the GCS bucket is associated to> --public
```
Publicly available files are referenced through URIs of the form `https://storage.googleapis.com/bucket/path/to/file`.
### List

View File

@ -37,6 +37,10 @@ import (
const bufferSize = 1024
var UploadTo string
var Project string
var Public bool
var createCmd = &cobra.Command{
Use: "create <yaml file>",
Short: "Create a SparkApplication object",
@ -65,6 +69,15 @@ var createCmd = &cobra.Command{
},
}
func init() {
createCmd.Flags().StringVarP(&UploadTo, "upload-to", "u", "",
"A URL of the remote location where local application dependencies are to be submitted to")
createCmd.Flags().StringVarP(&Project, "project", "p", "",
"The GCP project with which the GCS bucket is associated")
createCmd.Flags().BoolVarP(&Public, "public", "c", false,
"Whether to make uploaded files publicly available")
}
func doCreate(yamlFile string, kubeClientset clientset.Interface, crdClientset crdclientset.Interface) error {
app, err := loadFromYAML(yamlFile)
if err != nil {
@ -109,20 +122,45 @@ func loadFromYAML(yamlFile string) (*v1alpha1.SparkApplication, error) {
}
func handleLocalDependencies(app *v1alpha1.SparkApplication) error {
if app.Spec.MainApplicationFile != nil {
isMainAppFileLocal, err := isLocalFile(*app.Spec.MainApplicationFile)
if err != nil {
return err
}
if isMainAppFileLocal {
uploadedMainFile, err := uploadLocalDependencies(app, []string{*app.Spec.MainApplicationFile})
if err != nil {
return fmt.Errorf("failed to upload local main application file: %v", err)
}
app.Spec.MainApplicationFile = &uploadedMainFile[0]
}
}
localJars, err := filterLocalFiles(app.Spec.Deps.Jars)
if err != nil {
return fmt.Errorf("failed to filter local jars: %v", err)
}
if err = uploadLocalFiles(localJars); err != nil {
return fmt.Errorf("failed to upload local jars: %v", err)
if len(localJars) > 0 {
uploadedJars, err := uploadLocalDependencies(app, localJars)
if err != nil {
return fmt.Errorf("failed to upload local jars: %v", err)
}
app.Spec.Deps.Jars = uploadedJars
}
localFiles, err := filterLocalFiles(app.Spec.Deps.Files)
if err != nil {
return fmt.Errorf("failed to filter local files: %v", err)
}
if err = uploadLocalFiles(localFiles); err != nil {
return fmt.Errorf("failed to upload local files: %v", err)
if len(localFiles) > 0 {
uploadedFiles, err := uploadLocalDependencies(app, localFiles)
if err != nil {
return fmt.Errorf("failed to upload local files: %v", err)
}
app.Spec.Deps.Files = uploadedFiles
}
return nil
@ -154,8 +192,28 @@ func isLocalFile(file string) (bool, error) {
return false, nil
}
func uploadLocalFiles(files []string) error {
return nil
func uploadLocalDependencies(app *v1alpha1.SparkApplication, files []string) ([]string, error) {
if UploadTo == "" {
return nil, fmt.Errorf(
"unable to upload local dependencies: no upload location specified via --upload-to")
}
uploadLocationUrl, err := url.Parse(UploadTo)
if err != nil {
return nil, err
}
switch uploadLocationUrl.Scheme {
case "gs":
if Project == "" {
return nil, fmt.Errorf("--project must be specified to upload dependencies to GCS")
}
return uploadToGCS(uploadLocationUrl.Host, app.Namespace, app.Name, Project, files, Public)
case "s3":
return nil, nil
default:
return nil, fmt.Errorf("unsupported upload location URL scheme: %s", uploadLocationUrl.Scheme)
}
}
func handleHadoopConfiguration(

View File

@ -51,6 +51,7 @@ func TestIsLocalFile(t *testing.T) {
func TestFilterLocalFiles(t *testing.T) {
files := []string{
"path/to/file",
"/path/to/file",
"file:///file/to/path",
"http://localhost/path/to/file",
@ -59,6 +60,7 @@ func TestFilterLocalFiles(t *testing.T) {
}
expected := []string{
"path/to/file",
"/path/to/file",
"file:///file/to/path",
}

112
sparkctl/cmd/gcs.go Normal file
View File

@ -0,0 +1,112 @@
/*
Copyright 2017 Google LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cmd
import (
"fmt"
"io"
"path/filepath"
"os"
"cloud.google.com/go/storage"
"golang.org/x/net/context"
)
const rootPath = "spark-app-dependencies"
type gcsUploader struct {
client *storage.Client
handle *storage.BucketHandle
bucket string
path string
}
func newGcsUploader(bucket string, path string, projectID string, ctx context.Context) (*gcsUploader, error) {
client, err := storage.NewClient(ctx)
if err != nil {
return nil, err
}
handle := client.Bucket(bucket)
// Check if the bucket exists.
if _, err := handle.Attrs(ctx); err != nil {
return nil, err
}
uploader := &gcsUploader{
client: client,
handle: handle.UserProject(projectID),
bucket: bucket,
path: path}
return uploader, nil
}
func (g *gcsUploader) upload(ctx context.Context, localFile string, public bool) (string, error) {
fmt.Printf("Uploading local file: %s\n", localFile)
file, err := os.Open(localFile)
if err != nil {
return "", fmt.Errorf("failed to open file %s: %v", localFile, err)
}
object := g.handle.Object(filepath.Join(g.path, filepath.Base(localFile)))
writer := object.NewWriter(ctx)
if _, err = io.Copy(writer, file); err != nil {
return "", fmt.Errorf("failed to copy file %s to GCS: %v", localFile, err)
}
if err = writer.Close(); err != nil {
return "", err
}
objectAttrs, err := object.Attrs(ctx)
if err != nil {
return "", err
}
if public {
if err = object.ACL().Set(ctx, storage.AllUsers, storage.RoleReader); err != nil {
return "", fmt.Errorf("failed to set ACL on GCS object %s: %v", objectAttrs.Name, err)
}
return fmt.Sprintf("https://storage.googleapis.com/%s/%s", objectAttrs.Bucket, objectAttrs.Name), nil
}
return fmt.Sprintf("gs://%s/%s", objectAttrs.Bucket, objectAttrs.Name), nil
}
func uploadToGCS(
bucket string,
appNamespace string,
appName string,
projectID string,
files []string,
public bool) ([]string, error) {
ctx := context.Background()
uploader, err := newGcsUploader(bucket, filepath.Join(rootPath, appNamespace, appName), projectID, ctx)
if err != nil {
return nil, err
}
defer uploader.client.Close()
var uploadedFiles []string
for _, file := range files {
remoteFile, err := uploader.upload(ctx, file, public)
if err != nil {
return nil, err
}
uploadedFiles = append(uploadedFiles, remoteFile)
}
return uploadedFiles, nil
}

View File

@ -38,7 +38,7 @@ var rootCmd = &cobra.Command{
func init() {
rootCmd.PersistentFlags().StringVarP(&Namespace, "namespace", "n", "default",
"The namespace in which the SparkApplication is to be created")
rootCmd.PersistentFlags().StringVarP(&KubeConfig, "kubeconfig", "c", defaultKubeConfig,
rootCmd.PersistentFlags().StringVarP(&KubeConfig, "kubeconfig", "k", defaultKubeConfig,
"The namespace in which the SparkApplication is to be created")
rootCmd.AddCommand(createCmd, deleteCmd, statusCmd, logCommand, listCmd, forwardCmd)
}