package helm import ( "bytes" "context" "errors" "fmt" "io" "net/http" "net/url" "path" "regexp" "strconv" "strings" "sync" "github.com/Masterminds/semver/v3" "github.com/artifacthub/hub/internal/hub" "github.com/artifacthub/hub/internal/license" "github.com/artifacthub/hub/internal/pkg" "github.com/artifacthub/hub/internal/repo" "github.com/artifacthub/hub/internal/tracker/source" "github.com/artifacthub/hub/internal/util" "github.com/hashicorp/go-multierror" "golang.org/x/time/rate" "gopkg.in/yaml.v3" "helm.sh/helm/v3/pkg/action" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/chartutil" helmrepo "helm.sh/helm/v3/pkg/repo" ) const ( concurrency = 10 changesAnnotation = "artifacthub.io/changes" crdsAnnotation = "artifacthub.io/crds" crdsExamplesAnnotation = "artifacthub.io/crdsExamples" imagesAnnotation = "artifacthub.io/images" licenseAnnotation = "artifacthub.io/license" linksAnnotation = "artifacthub.io/links" maintainersAnnotation = "artifacthub.io/maintainers" operatorAnnotation = "artifacthub.io/operator" operatorCapabilitiesAnnotation = "artifacthub.io/operatorCapabilities" prereleaseAnnotation = "artifacthub.io/prerelease" recommendationsAnnotation = "artifacthub.io/recommendations" securityUpdatesAnnotation = "artifacthub.io/containsSecurityUpdates" signKeyAnnotation = "artifacthub.io/signKey" helmChartContentLayerMediaType = "application/vnd.cncf.helm.chart.content.v1.tar+gzip" helmChartProvenanceLayerMediaType = "application/vnd.cncf.helm.chart.provenance.v1.prov" apiVersionKey = "apiVersion" dependenciesKey = "dependencies" kubeVersionKey = "kubeVersion" typeKey = "type" ) var ( // containersImagesRE is a regexp used to extract containers images from // kubernetes manifests files. containersImagesRE = regexp.MustCompile(`\simage:\s(\S+)`) // errInvalidAnnotation indicates that the annotation provided is not valid. errInvalidAnnotation = errors.New("invalid annotation") // validOperatorCapabilities represents the valid operator capabilities // values that can be provided. validOperatorCapabilities = []string{ "basic install", "seamless upgrades", "full lifecycle", "deep insights", "auto pilot", } ) // TrackerSource is a hub.TrackerSource implementation for Helm repositories. type TrackerSource struct { i *hub.TrackerSourceInput il hub.HelmIndexLoader tg hub.OCITagsGetter } // NewTrackerSource creates a new TrackerSource instance. func NewTrackerSource(i *hub.TrackerSourceInput, opts ...func(s *TrackerSource)) *TrackerSource { s := &TrackerSource{i: i} for _, o := range opts { o(s) } if s.il == nil { s.il = &repo.HelmIndexLoader{} } if s.tg == nil { s.tg = &repo.OCITagsGetter{} } return s } // GetPackagesAvailable implements the TrackerSource interface. func (s *TrackerSource) GetPackagesAvailable() (map[string]*hub.Package, error) { var mu sync.Mutex packagesAvailable := make(map[string]*hub.Package) // Iterate over charts versions available in the repository charts, err := s.getCharts() if err != nil { return nil, err } limiter := make(chan struct{}, concurrency) var wg sync.WaitGroup for _, chartVersions := range charts { for _, chartVersion := range chartVersions { // Return ASAP if context is cancelled select { case <-s.i.Svc.Ctx.Done(): wg.Wait() return nil, s.i.Svc.Ctx.Err() default: } // Prepare and store package version limiter <- struct{}{} wg.Add(1) go func(chartVersion *helmrepo.ChartVersion) { defer func() { <-limiter wg.Done() }() p, err := s.preparePackage(chartVersion) if err != nil { s.warn(chartVersion.Metadata, fmt.Errorf("error preparing package: %w", err)) return } mu.Lock() packagesAvailable[pkg.BuildKey(p)] = p mu.Unlock() }(chartVersion) } } wg.Wait() return packagesAvailable, nil } // getCharts returns the charts available in the repository. func (s *TrackerSource) getCharts() (map[string][]*helmrepo.ChartVersion, error) { charts := make(map[string][]*helmrepo.ChartVersion) u, _ := url.Parse(s.i.Repository.URL) switch u.Scheme { case "http", "https": // Load repository index file indexFile, _, err := s.il.LoadIndex(s.i.Repository) if err != nil { return nil, fmt.Errorf("error loading repository index file: %w", err) } // Read available charts versions from index file for name, chartVersions := range indexFile.Entries { for _, chartVersion := range chartVersions { charts[name] = append(charts[name], chartVersion) } } case "oci": // Get versions (tags) available in the repository versions, err := s.tg.Tags(s.i.Svc.Ctx, s.i.Repository) if err != nil { return nil, fmt.Errorf("error getting repository available versions: %w", err) } // Prepare chart versions using the list of versions available name := path.Base(s.i.Repository.URL) for _, version := range versions { charts[name] = append(charts[name], &helmrepo.ChartVersion{ Metadata: &chart.Metadata{ Name: name, Version: version, }, URLs: []string{s.i.Repository.URL + ":" + version}, }) } default: return nil, repo.ErrSchemeNotSupported } return charts, nil } // preparePackage prepares a package version using the chart version provided. func (s *TrackerSource) preparePackage(chartVersion *helmrepo.ChartVersion) (*hub.Package, error) { // Parse package version md := chartVersion.Metadata sv, err := semver.NewVersion(md.Version) if err != nil { return nil, fmt.Errorf("invalid package version: %w", err) } version := sv.String() // Prepare chart archive url if len(chartVersion.URLs) == 0 { return nil, errors.New("chart version does not contain any url") } chartURL, err := url.Parse(chartVersion.URLs[0]) if err != nil { return nil, fmt.Errorf("invalid chart url %s: %w", chartVersion.URLs[0], err) } if !chartURL.IsAbs() { repoURL, _ := url.Parse(s.i.Repository.URL) chartURL.Scheme = repoURL.Scheme chartURL.Host = repoURL.Host if !strings.HasPrefix(chartURL.Path, "/") { chartURL.Path = path.Join(repoURL.Path, chartURL.Path) } } // Prepare package version p := &hub.Package{ Name: chartVersion.Name, Version: version, Digest: chartVersion.Digest, ContentURL: chartURL.String(), Repository: s.i.Repository, } if !chartVersion.Created.IsZero() { p.TS = chartVersion.Created.Unix() } // If the package version is not registered yet or if it needs to be // registered again, we need to enrich the package with extra information // available in the chart archive, like the readme file, the license, etc. // Otherwise, the minimal version of the package prepared above is enough. bypassDigestCheck := s.i.Svc.Cfg.GetBool("tracker.bypassDigestCheck") digest, ok := s.i.PackagesRegistered[pkg.BuildKey(p)] if !ok || chartVersion.Digest != digest || bypassDigestCheck { // Load chart from remote archive chrt, err := LoadChartArchive( s.i.Svc.Ctx, chartURL, &LoadChartArchiveOptions{ HC: s.i.Svc.Hc, GithubToken: s.i.Svc.Cfg.GetString("creds.githubToken"), GithubRL: s.i.Svc.GithubRL, Username: s.i.Repository.AuthUser, Password: s.i.Repository.AuthPass, }, ) if err != nil { return nil, fmt.Errorf("error loading chart (%s): %w", chartURL.String(), err) } md := chrt.Metadata // Validate chart version metadata for known issues and sanitize some strings if err := chrt.Validate(); err != nil { return nil, fmt.Errorf("invalid metadata: %w", err) } // Store logo when available if requested if md.Icon != "" { logoImageID, err := s.i.Svc.Is.DownloadAndSaveImage(s.i.Svc.Ctx, md.Icon) if err == nil { p.LogoURL = md.Icon p.LogoImageID = logoImageID } else { s.warn(md, fmt.Errorf("error getting logo image %s: %w", md.Icon, err)) } } // Check if the chart version is signed (has provenance file) hasProvenanceFile, err := s.chartHasProvenanceFile(chartURL) if err != nil { s.warn(md, fmt.Errorf("error checking provenance file: %w", err)) } p.Signed = hasProvenanceFile // Enrich package with data available in chart archive EnrichPackageFromChart(p, chrt) // Enrich package with information from annotations if err := EnrichPackageFromAnnotations(p, chrt.Metadata.Annotations); err != nil { return nil, fmt.Errorf("error enriching package from annotations: %w", err) } } return p, nil } // chartHasProvenanceFile checks if a chart version has a provenance file. func (s *TrackerSource) chartHasProvenanceFile(chartURL *url.URL) (bool, error) { var data []byte switch chartURL.Scheme { case "http", "https": req, _ := http.NewRequest("GET", chartURL.String()+".prov", nil) req = req.WithContext(s.i.Svc.Ctx) if s.i.Repository.AuthUser != "" || s.i.Repository.AuthPass != "" { req.SetBasicAuth(s.i.Repository.AuthUser, s.i.Repository.AuthPass) } resp, err := s.i.Svc.Hc.Do(req) if err != nil { return false, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return false, nil } data, err = io.ReadAll(resp.Body) if err != nil { return false, fmt.Errorf("error reading provenance file: %w", err) } case "oci": var err error _, data, err = util.OCIPullLayer( s.i.Svc.Ctx, strings.TrimPrefix(chartURL.String(), hub.RepositoryOCIPrefix), helmChartProvenanceLayerMediaType, s.i.Repository.AuthUser, s.i.Repository.AuthPass, ) if err != nil { return false, fmt.Errorf("error pulling provenance layer: %w", err) } default: return false, nil } if !bytes.Contains(data, []byte("PGP SIGNATURE")) { return false, errors.New("invalid provenance file") } return true, nil } // warn is a helper that sends the error provided to the errors collector and // logs it as a warning. func (s *TrackerSource) warn(md *chart.Metadata, err error) { err = fmt.Errorf("%w (package: %s version: %s)", err, md.Name, md.Version) s.i.Svc.Logger.Warn().Err(err).Send() if !md.Deprecated { s.i.Svc.Ec.Append(s.i.Repository.RepositoryID, err.Error()) } } // LoadChartArchiveOptions represents some options that can be provided to load // a chart archive from its remote location. type LoadChartArchiveOptions struct { HC hub.HTTPClient Username string Password string GithubToken string GithubRL *rate.Limiter } // LoadChartArchive loads a chart from a remote archive located at the url // provided. func LoadChartArchive(ctx context.Context, u *url.URL, o *LoadChartArchiveOptions) (*chart.Chart, error) { var r io.Reader switch u.Scheme { case "http", "https": // Get chart content req, _ := http.NewRequest("GET", u.String(), nil) req = req.WithContext(ctx) req.Header.Set("Accept-Encoding", "*") if u.Host == "github.com" || u.Host == "raw.githubusercontent.com" { // Authenticate and rate limit requests to Github if o.GithubToken != "" { req.Header.Set("Authorization", fmt.Sprintf("token %s", o.GithubToken)) } if o.GithubRL != nil { _ = o.GithubRL.Wait(ctx) } } if o.Username != "" || o.Password != "" { req.SetBasicAuth(o.Username, o.Password) } hc := o.HC if hc == nil { hc = util.SetupHTTPClient(false) } resp, err := hc.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("unexpected status code received: %d", resp.StatusCode) } r = resp.Body case "oci": ref := strings.TrimPrefix(u.String(), hub.RepositoryOCIPrefix) _, data, err := util.OCIPullLayer(ctx, ref, helmChartContentLayerMediaType, o.Username, o.Password) if err != nil { return nil, err } r = bytes.NewReader(data) default: return nil, repo.ErrSchemeNotSupported } // Load chart from reader previously set up chrt, err := loader.LoadArchive(r) if err != nil { return nil, err } return chrt, nil } // EnrichPackageFromChart adds some extra information to the package from the // chart archive. func EnrichPackageFromChart(p *hub.Package, chrt *chart.Chart) { md := chrt.Metadata p.Description = md.Description p.Keywords = md.Keywords p.HomeURL = md.Home p.AppVersion = md.AppVersion p.Deprecated = md.Deprecated p.ValuesSchema = chrt.Schema p.Data = map[string]interface{}{} // API version p.Data[apiVersionKey] = chrt.Metadata.APIVersion // Containers images imagesRefs, err := extractContainersImages(chrt) if err == nil && len(imagesRefs) > 0 { containersImages := make([]*hub.ContainerImage, 0, len(imagesRefs)) for _, imageRef := range imagesRefs { containersImages = append(containersImages, &hub.ContainerImage{Image: imageRef}) } if err := pkg.ValidateContainersImages(containersImages); err == nil { p.ContainersImages = containersImages } } // Dependencies dependencies := make([]map[string]string, 0, len(md.Dependencies)) for _, dependency := range md.Dependencies { dependencies = append(dependencies, map[string]string{ "name": dependency.Name, "version": dependency.Version, "repository": dependency.Repository, }) } if len(dependencies) > 0 { p.Data[dependenciesKey] = dependencies } // Kubernetes version p.Data[kubeVersionKey] = chrt.Metadata.KubeVersion // License licenseFile := getFile(chrt, "LICENSE") if licenseFile != nil { p.License = license.Detect(licenseFile.Data) } // Links links := make([]*hub.Link, 0, len(md.Sources)) for _, sourceURL := range md.Sources { links = append(links, &hub.Link{ Name: "source", URL: sourceURL, }) } if len(links) > 0 { p.Links = links } // Maintainers var maintainers []*hub.Maintainer for _, entry := range md.Maintainers { if entry.Email != "" { maintainers = append(maintainers, &hub.Maintainer{ Name: entry.Name, Email: entry.Email, }) } } if len(maintainers) > 0 { p.Maintainers = maintainers } // Operator if strings.Contains(strings.ToLower(md.Name), "operator") { p.IsOperator = true } // Readme readme := getFile(chrt, "README.md") if readme != nil { p.Readme = string(readme.Data) } // Type p.Data[typeKey] = chrt.Metadata.Type } // extractContainersImages extracts the containers images references found in // the manifest generated as a result of Helm dry-run install with the default // values. func extractContainersImages(chrt *chart.Chart) ([]string, error) { // Dry-run Helm install install := action.NewInstall(&action.Configuration{ Log: func(string, ...interface{}) {}, }) install.ReleaseName = "release-name" install.DryRun = true install.DisableHooks = true install.Replace = true install.ClientOnly = true install.IncludeCRDs = true install.DependencyUpdate = false release, err := install.Run(chrt, chartutil.Values{}) if err != nil { return nil, err } // Extract containers images from release manifest results := containersImagesRE.FindAllStringSubmatch(release.Manifest, -1) images := make([]string, 0, len(results)) for _, result := range results { image := strings.Trim(result[1], `"'`) if image != "" && !contains(images, image) { images = append(images, image) } } return images, nil } // EnrichPackageFromAnnotations adds some extra information to the package from // the provided annotations. func EnrichPackageFromAnnotations(p *hub.Package, annotations map[string]string) error { var errs *multierror.Error // Changes if v, ok := annotations[changesAnnotation]; ok { changes, err := source.ParseChangesAnnotation(v) if err != nil { errs = multierror.Append(errs, err) } else { p.Changes = changes } } // CRDs if v, ok := annotations[crdsAnnotation]; ok { var crds []interface{} if err := yaml.Unmarshal([]byte(v), &crds); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid crds value", errInvalidAnnotation)) } else { p.CRDs = crds } } // CRDs examples if v, ok := annotations[crdsExamplesAnnotation]; ok { var crdsExamples []interface{} if err := yaml.Unmarshal([]byte(v), &crdsExamples); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid crdsExamples value", errInvalidAnnotation)) } else { p.CRDsExamples = crdsExamples } } // Images if v, ok := annotations[imagesAnnotation]; ok { var images []*hub.ContainerImage if err := yaml.Unmarshal([]byte(v), &images); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid images value", errInvalidAnnotation)) } else { if err := pkg.ValidateContainersImages(images); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: %s", errInvalidAnnotation, err.Error())) } else { p.ContainersImages = images } } } // License if v, ok := annotations[licenseAnnotation]; ok && v != "" { p.License = v } // Links if v, ok := annotations[linksAnnotation]; ok { var links []*hub.Link if err := yaml.Unmarshal([]byte(v), &links); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid links value", errInvalidAnnotation)) } else { LL: for _, link := range links { for _, pLink := range p.Links { if link.URL == pLink.URL { pLink.Name = link.Name continue LL } } p.Links = append(p.Links, link) } } } // Maintainers if v, ok := annotations[maintainersAnnotation]; ok { var maintainers []*hub.Maintainer if err := yaml.Unmarshal([]byte(v), &maintainers); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid maintainers value", errInvalidAnnotation)) } else { ML: for _, maintainer := range maintainers { for _, pMaintainer := range p.Maintainers { if maintainer.Email == pMaintainer.Email { pMaintainer.Name = maintainer.Name continue ML } } p.Maintainers = append(p.Maintainers, maintainer) } } } // Operator flag if v, ok := annotations[operatorAnnotation]; ok { isOperator, err := strconv.ParseBool(v) if err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid operator value", errInvalidAnnotation)) } else { p.IsOperator = isOperator } } // Operator capabilities if v, ok := annotations[operatorCapabilitiesAnnotation]; ok { v = strings.ToLower(v) if !contains(validOperatorCapabilities, v) { errs = multierror.Append(errs, fmt.Errorf("%w: invalid operator capabilities value", errInvalidAnnotation)) } else { p.Capabilities = v } } // Prerelease if v, ok := annotations[prereleaseAnnotation]; ok { prerelease, err := strconv.ParseBool(v) if err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid prerelease value", errInvalidAnnotation)) } else { p.Prerelease = prerelease } } // Recommendations if v, ok := annotations[recommendationsAnnotation]; ok { var recommendations []*hub.Recommendation if err := yaml.Unmarshal([]byte(v), &recommendations); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid recommendations value", errInvalidAnnotation)) } else { p.Recommendations = recommendations } } // Security updates if v, ok := annotations[securityUpdatesAnnotation]; ok { containsSecurityUpdates, err := strconv.ParseBool(v) if err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid containsSecurityUpdates value", errInvalidAnnotation)) } else { p.ContainsSecurityUpdates = containsSecurityUpdates } } // Sign key if v, ok := annotations[signKeyAnnotation]; ok { var signKey *hub.SignKey if err := yaml.Unmarshal([]byte(v), &signKey); err != nil { errs = multierror.Append(errs, fmt.Errorf("%w: invalid sign key value", errInvalidAnnotation)) } else { if signKey.URL == "" { errs = multierror.Append(errs, fmt.Errorf("%w: sign key url not provided", errInvalidAnnotation)) } else { p.SignKey = signKey } } } return errs.ErrorOrNil() } // getFile returns the file requested from the provided chart. func getFile(chrt *chart.Chart, name string) *chart.File { for _, file := range chrt.Files { if file.Name == name { return file } } return nil } // contains is a helper to check if a list contains the string provided. func contains(l []string, e string) bool { for _, x := range l { if x == e { return true } } return false }