mirror of https://github.com/artifacthub/hub.git
Integrate category classifier model in tracker (#2756)
Signed-off-by: Sergio Castaño Arteaga <tegioz@icloud.com>
This commit is contained in:
parent
815d95f60a
commit
910d2d0b22
|
|
@ -3,4 +3,4 @@ web/build/** linguist-generated
|
|||
__fixtures__/** linguist-generated
|
||||
*.sql linguist-detectable=true
|
||||
*.sql linguist-language=sql
|
||||
ml/categories/model/** linguist-generated
|
||||
ml/category/model/** linguist-generated
|
||||
|
|
|
|||
|
|
@ -13,6 +13,12 @@ jobs:
|
|||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: 1.19
|
||||
- name: Install TensorFlow C library
|
||||
run: |
|
||||
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
|
||||
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
|
||||
sudo tar -C /usr/local -xzf ${FILENAME}
|
||||
sudo ldconfig /usr/local/lib
|
||||
- name: Run golangci-lint
|
||||
uses: golangci/golangci-lint-action@v3
|
||||
with:
|
||||
|
|
@ -98,6 +104,12 @@ jobs:
|
|||
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-
|
||||
- name: Install TensorFlow C library
|
||||
run: |
|
||||
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
|
||||
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
|
||||
sudo tar -C /usr/local -xzf ${FILENAME}
|
||||
sudo ldconfig /usr/local/lib
|
||||
- name: Run backend tests
|
||||
run: go test -cover -race -v -mod=readonly ./...
|
||||
|
||||
|
|
@ -171,6 +183,12 @@ jobs:
|
|||
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-
|
||||
- name: Install TensorFlow C library
|
||||
run: |
|
||||
FILENAME=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
|
||||
wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}
|
||||
sudo tar -C /usr/local -xzf ${FILENAME}
|
||||
sudo ldconfig /usr/local/lib
|
||||
- name: Build hub
|
||||
working-directory: ./cmd/hub
|
||||
run: go build -v
|
||||
|
|
|
|||
|
|
@ -19,4 +19,4 @@ docs/www/content/topics/*
|
|||
docs/www/content/topics/annotations/*
|
||||
!docs/www/content/topics/annotations/_index.md
|
||||
dist
|
||||
ml/categories/data/generated
|
||||
ml/category/data/generated
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ apiVersion: v2
|
|||
name: artifact-hub
|
||||
description: Artifact Hub is a web-based application that enables finding, installing, and publishing Kubernetes packages.
|
||||
type: application
|
||||
version: 1.12.1-3
|
||||
version: 1.12.1-4
|
||||
appVersion: 1.12.0
|
||||
kubeVersion: ">= 1.19.0-0"
|
||||
home: https://artifacthub.io
|
||||
|
|
|
|||
|
|
@ -28,3 +28,4 @@ stringData:
|
|||
repositoriesNames: {{ .Values.tracker.repositoriesNames }}
|
||||
repositoriesKinds: {{ .Values.tracker.repositoriesKinds }}
|
||||
bypassDigestCheck: {{ .Values.tracker.bypassDigestCheck }}
|
||||
categoryModelPath: ./ml/category/model
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /scanner .
|
|||
# Trivy installer
|
||||
FROM alpine:3.17.1 AS trivy-installer
|
||||
RUN apk --no-cache add curl
|
||||
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/master/contrib/install.sh | sh -s -- -b /usr/local/bin v0.36.1
|
||||
RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.36.1
|
||||
|
||||
# Final stage
|
||||
FROM alpine:3.17.1
|
||||
|
|
|
|||
|
|
@ -1,22 +1,39 @@
|
|||
# Build tracker
|
||||
FROM golang:1.19.5-alpine3.17 AS builder
|
||||
FROM golang:1.19.5-bullseye AS builder
|
||||
WORKDIR /tmp
|
||||
ENV LIBTENSORFLOW_TGZ libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
|
||||
RUN wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/$LIBTENSORFLOW_TGZ
|
||||
RUN tar -C /usr/local -xzf $LIBTENSORFLOW_TGZ
|
||||
RUN ldconfig /usr/local/lib
|
||||
WORKDIR /go/src/github.com/artifacthub/hub
|
||||
COPY go.* ./
|
||||
COPY cmd/tracker cmd/tracker
|
||||
COPY internal internal
|
||||
WORKDIR /go/src/github.com/artifacthub/hub/cmd/tracker
|
||||
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o /tracker .
|
||||
RUN CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o /tracker .
|
||||
|
||||
# OPM installer
|
||||
FROM golang:1.17-alpine3.16 AS opm-installer
|
||||
RUN apk --no-cache add build-base
|
||||
FROM golang:1.17-bullseye AS opm-installer
|
||||
RUN GO111MODULE=on go get github.com/operator-framework/operator-registry/cmd/opm@v1.26.2
|
||||
|
||||
# Final stage
|
||||
FROM alpine:3.17.1
|
||||
RUN apk --no-cache add ca-certificates && addgroup -S tracker -g 1000 && adduser -S tracker -u 1000 -G tracker
|
||||
FROM debian:bullseye-slim
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y ca-certificates \
|
||||
&& groupadd -g 1000 tracker \
|
||||
&& useradd -u 1000 -g tracker tracker
|
||||
WORKDIR /tmp
|
||||
ENV LIBTENSORFLOW_TGZ libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz
|
||||
RUN apt-get install -y wget \
|
||||
&& export LIBTENSORFLOW_TGZ=libtensorflow-cpu-linux-x86_64-2.11.0.tar.gz \
|
||||
&& wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/$LIBTENSORFLOW_TGZ \
|
||||
&& tar -C /usr/local -xzf $LIBTENSORFLOW_TGZ \
|
||||
&& rm $LIBTENSORFLOW_TGZ \
|
||||
&& apt-get remove -y wget \
|
||||
&& ldconfig /usr/local/lib
|
||||
USER 1000
|
||||
WORKDIR /home/tracker
|
||||
COPY ml ./ml
|
||||
COPY --from=builder /tracker ./
|
||||
COPY --from=opm-installer /go/bin/opm /usr/local/bin
|
||||
CMD ["./tracker"]
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ func main() {
|
|||
}
|
||||
ec := repo.NewErrorsCollector(rm, repo.Tracker)
|
||||
op := oci.NewPuller(cfg)
|
||||
pcc := tracker.NewPackageCategoryClassifierML(cfg.GetString("tracker.categoryModelPath"))
|
||||
svc := &hub.TrackerServices{
|
||||
Ctx: ctx,
|
||||
Cfg: cfg,
|
||||
|
|
@ -82,6 +83,7 @@ func main() {
|
|||
Op: op,
|
||||
Is: is,
|
||||
Sc: oci.NewSignatureChecker(cfg, op),
|
||||
Pcc: pcc,
|
||||
SetupTrackerSource: tracker.SetupSource,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ begin
|
|||
is_operator,
|
||||
channels,
|
||||
default_channel,
|
||||
package_category_id,
|
||||
repository_id
|
||||
) values (
|
||||
v_name,
|
||||
|
|
@ -75,6 +76,7 @@ begin
|
|||
(p_pkg->>'is_operator')::boolean,
|
||||
nullif(p_pkg->'channels', 'null'),
|
||||
nullif(p_pkg->>'default_channel', ''),
|
||||
nullif((p_pkg->>'category')::int, 0),
|
||||
v_repository_id
|
||||
)
|
||||
on conflict (repository_id, name) do update
|
||||
|
|
@ -85,7 +87,8 @@ begin
|
|||
tsdoc = generate_package_tsdoc(v_name, v_alternative_name, v_display_name, v_description, v_keywords, v_ts_repository, v_ts_publisher),
|
||||
is_operator = excluded.is_operator,
|
||||
channels = excluded.channels,
|
||||
default_channel = excluded.default_channel
|
||||
default_channel = excluded.default_channel,
|
||||
package_category_id = excluded.package_category_id
|
||||
where is_latest(
|
||||
v_repository_kind_id,
|
||||
v_version,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,21 @@
|
|||
create table if not exists package_category (
|
||||
package_category_id integer primary key,
|
||||
name text not null check (name <> ''),
|
||||
display_name text not null check (display_name <> '')
|
||||
);
|
||||
|
||||
insert into package_category values (1, 'ai-machine-learning', 'AI / Machine learning');
|
||||
insert into package_category values (2, 'database', 'Database');
|
||||
insert into package_category values (3, 'integration-delivery', 'Integration and delivery');
|
||||
insert into package_category values (4, 'monitoring-logging', 'Monitoring and logging');
|
||||
insert into package_category values (5, 'networking', 'Networking');
|
||||
insert into package_category values (6, 'security', 'Security');
|
||||
insert into package_category values (7, 'storage', 'Storage');
|
||||
insert into package_category values (8, 'streaming-messaging', 'Streaming and messaging');
|
||||
|
||||
alter table package add column package_category_id integer;
|
||||
|
||||
---- create above / drop below ----
|
||||
|
||||
alter table package drop column package_category_id;
|
||||
drop table if exists package_category;
|
||||
|
|
@ -116,6 +116,7 @@ select register_package('
|
|||
"url": "https://key.url"
|
||||
},
|
||||
"relative_path": "path1/path2",
|
||||
"category": 1,
|
||||
"repository": {
|
||||
"repository_id": "00000000-0000-0000-0000-000000000001"
|
||||
}
|
||||
|
|
@ -130,6 +131,7 @@ select results_eq(
|
|||
is_operator,
|
||||
channels,
|
||||
default_channel,
|
||||
package_category_id,
|
||||
repository_id
|
||||
from package
|
||||
where name='package1'
|
||||
|
|
@ -151,6 +153,7 @@ select results_eq(
|
|||
}
|
||||
]'::jsonb,
|
||||
'stable',
|
||||
1,
|
||||
'00000000-0000-0000-0000-000000000001'::uuid
|
||||
)
|
||||
$$,
|
||||
|
|
@ -306,6 +309,7 @@ select register_package('
|
|||
"email": "email1"
|
||||
}
|
||||
],
|
||||
"category": 2,
|
||||
"repository": {
|
||||
"repository_id": "00000000-0000-0000-0000-000000000001"
|
||||
}
|
||||
|
|
@ -313,12 +317,19 @@ select register_package('
|
|||
');
|
||||
select results_eq(
|
||||
$$
|
||||
select is_operator from package where name = 'package1'
|
||||
select
|
||||
is_operator,
|
||||
package_category_id
|
||||
from package
|
||||
where name = 'package1'
|
||||
$$,
|
||||
$$
|
||||
values (false)
|
||||
values (
|
||||
false,
|
||||
2
|
||||
)
|
||||
$$,
|
||||
'is_operator flag should have been updated'
|
||||
'is_operator flag and category should have been updated'
|
||||
);
|
||||
select results_eq(
|
||||
$$
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
-- Start transaction and plan tests
|
||||
begin;
|
||||
select plan(185);
|
||||
select plan(189);
|
||||
|
||||
-- Check default_text_search_config is correct
|
||||
select results_eq(
|
||||
|
|
@ -28,6 +28,7 @@ select has_table('notification');
|
|||
select has_table('opt_out');
|
||||
select has_table('organization');
|
||||
select has_table('package');
|
||||
select has_table('package_category');
|
||||
select has_table('package_views');
|
||||
select has_table('package__maintainer');
|
||||
select has_table('password_reset_code');
|
||||
|
|
@ -139,8 +140,14 @@ select columns_are('package', array[
|
|||
'channels',
|
||||
'default_channel',
|
||||
'created_at',
|
||||
'package_category_id',
|
||||
'repository_id'
|
||||
]);
|
||||
select columns_are('package_category', array[
|
||||
'package_category_id',
|
||||
'name',
|
||||
'display_name'
|
||||
]);
|
||||
select columns_are('package_views', array[
|
||||
'package_id',
|
||||
'version',
|
||||
|
|
@ -339,6 +346,9 @@ select indexes_are('package', array[
|
|||
'package_repository_id_idx',
|
||||
'package_repository_id_name_key'
|
||||
]);
|
||||
select indexes_are('package_category', array[
|
||||
'package_category_pkey'
|
||||
]);
|
||||
select indexes_are('package_views', array[
|
||||
'package_views_package_id_version_day_key'
|
||||
]);
|
||||
|
|
@ -504,6 +514,22 @@ select has_function('get_webhooks_subscribed_to_package');
|
|||
select has_function('update_webhook');
|
||||
select has_function('user_has_access_to_webhook');
|
||||
|
||||
-- Check package categories exist
|
||||
select results_eq(
|
||||
'select * from package_category',
|
||||
$$ values
|
||||
(1, 'ai-machine-learning', 'AI / Machine learning'),
|
||||
(2, 'database', 'Database'),
|
||||
(3, 'integration-delivery', 'Integration and delivery'),
|
||||
(4, 'monitoring-logging', 'Monitoring and logging'),
|
||||
(5, 'networking', 'Networking'),
|
||||
(6, 'security', 'Security'),
|
||||
(7, 'storage', 'Storage'),
|
||||
(8, 'streaming-messaging', 'Streaming and messaging')
|
||||
$$,
|
||||
'Event kinds should exist'
|
||||
);
|
||||
|
||||
-- Check repository kinds exist
|
||||
select results_eq(
|
||||
'select * from repository_kind',
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ The `hub_server` alias runs the `hub` cmd, one of the two processes of the Artif
|
|||
|
||||
### Tracker
|
||||
|
||||
The `tracker` is another backend cmd in charge of indexing registered repositories metadata. On production deployments, it is usually run periodically using a `cronjob` on Kubernetes. Locally while developing, you can just run it as often as you need as any other CLI tool. The tracker requires the [OPM cli tool](https://github.com/operator-framework/operator-registry/releases) to be installed and available in your PATH.
|
||||
The `tracker` is another backend cmd in charge of indexing registered repositories metadata. On production deployments, it is usually run periodically using a `cronjob` on Kubernetes. Locally while developing, you can just run it as often as you need as any other CLI tool. The tracker requires the [OPM cli tool](https://github.com/operator-framework/operator-registry/releases) to be installed and available in your PATH, and the [TensorFlow C library](https://www.tensorflow.org/install/lang_c), so please make sure it's available before proceeding.
|
||||
|
||||
If you opened the url suggested before, you probably noticed there were no packages listed yet. This happened because no repositories had been indexed yet. If you used the configuration file suggested for Tern, some sample repositories should have been registered in the database owned by the `demo` user. To index them, we need to run the `tracker`.
|
||||
|
||||
|
|
@ -167,6 +167,7 @@ tracker:
|
|||
repositoriesNames: []
|
||||
repositoriesKinds: []
|
||||
bypassDigestCheck: false
|
||||
categoryModelPath: ../../ml/category/model
|
||||
images:
|
||||
store: pg
|
||||
```
|
||||
|
|
|
|||
2
go.mod
2
go.mod
|
|
@ -8,6 +8,8 @@ require (
|
|||
github.com/coreos/go-oidc v2.2.1+incompatible
|
||||
github.com/disintegration/imaging v1.6.2
|
||||
github.com/domodwyer/mailyak v3.1.1+incompatible
|
||||
github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e
|
||||
github.com/galeone/tfgo v0.0.0-20221023090852-d89a5c7e31e1
|
||||
github.com/ghodss/yaml v1.0.0
|
||||
github.com/go-chi/chi/v5 v5.0.8
|
||||
github.com/go-enry/go-license-detector/v4 v4.3.0
|
||||
|
|
|
|||
4
go.sum
4
go.sum
|
|
@ -332,6 +332,10 @@ github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3
|
|||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
||||
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
||||
github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e h1:9+2AEFZymTi25FIIcDwuzcOPH04z9+fV6XeLiGORPDI=
|
||||
github.com/galeone/tensorflow/tensorflow/go v0.0.0-20221023090153-6b7fa0680c3e/go.mod h1:TelZuq26kz2jysARBwOrTv16629hyUsHmIoj54QqyFo=
|
||||
github.com/galeone/tfgo v0.0.0-20221023090852-d89a5c7e31e1 h1:fsU+Je1A2kNu7e1LUAyOUrzXk50/2yP1tFGqKyNkOsI=
|
||||
github.com/galeone/tfgo v0.0.0-20221023090852-d89a5c7e31e1/go.mod h1:3YgYBeIX42t83uP27Bd4bSMxTnQhSbxl0pYSkCDB1tc=
|
||||
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
|
||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ type Package struct {
|
|||
Name string `json:"name"`
|
||||
NormalizedName string `json:"normalized_name" hash:"ignore"`
|
||||
AlternativeName string `json:"alternative_name"`
|
||||
Category PackageCategory `json:"category"`
|
||||
LogoURL string `json:"logo_url"`
|
||||
LogoImageID string `json:"logo_image_id" hash:"ignore"`
|
||||
IsOperator bool `json:"is_operator"`
|
||||
|
|
@ -129,6 +130,27 @@ func (p *Package) SetAutoGeneratedDigest() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// PackageCategory represents the category of a given package.
|
||||
type PackageCategory int64
|
||||
|
||||
const (
|
||||
UnknownCategory PackageCategory = 0
|
||||
AIMachineLearning PackageCategory = 1
|
||||
Database PackageCategory = 2
|
||||
IntegrationDelivery PackageCategory = 3
|
||||
MonitoringLogging PackageCategory = 4
|
||||
Networking PackageCategory = 5
|
||||
Security PackageCategory = 6
|
||||
Storage PackageCategory = 7
|
||||
StreamingMessaging PackageCategory = 8
|
||||
)
|
||||
|
||||
// PackageCategoryClassifier describes the methods a PackageCategoryClassifier
|
||||
// implementation must provide.
|
||||
type PackageCategoryClassifier interface {
|
||||
Predict(p *Package) PackageCategory
|
||||
}
|
||||
|
||||
// PackageManager describes the methods a PackageManager implementation must
|
||||
// provide.
|
||||
type PackageManager interface {
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ type HelmIndexLoader interface {
|
|||
}
|
||||
|
||||
// OLMOCIExporter describes the methods an OLMOCIExporter implementation must
|
||||
// must provide.
|
||||
// provide.
|
||||
type OLMOCIExporter interface {
|
||||
ExportRepository(ctx context.Context, r *Repository) (tmpDir string, err error)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ type TrackerServices struct {
|
|||
Op OCIPuller
|
||||
Is img.Store
|
||||
Sc OCISignatureChecker
|
||||
Pcc PackageCategoryClassifier
|
||||
SetupTrackerSource TrackerSourceLoader
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
package tracker
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/artifacthub/hub/internal/hub"
|
||||
tf "github.com/galeone/tensorflow/tensorflow/go"
|
||||
tg "github.com/galeone/tfgo"
|
||||
)
|
||||
|
||||
// PackageCategoryClassifierML classifies packages by category using a ML model.
|
||||
type PackageCategoryClassifierML struct {
|
||||
model *tg.Model
|
||||
}
|
||||
|
||||
// NewPackageCategoryClassifierML creates a new CategoryClassifier instance.
|
||||
func NewPackageCategoryClassifierML(modelPath string) *PackageCategoryClassifierML {
|
||||
// Set TF log level to INFO
|
||||
os.Setenv("TF_CPP_MIN_LOG_LEVEL", "2")
|
||||
|
||||
return &PackageCategoryClassifierML{
|
||||
model: tg.LoadModel(modelPath, []string{"serve"}, nil),
|
||||
}
|
||||
}
|
||||
|
||||
// Predict returns the predicted category according to the model for the
|
||||
// package provided. The prediction is based on the package's keywords.
|
||||
func (c *PackageCategoryClassifierML) Predict(p *hub.Package) hub.PackageCategory {
|
||||
defer func() {
|
||||
// model.Exec panics on error. If this happens, the predicted category
|
||||
// will be unknown.
|
||||
_ = recover()
|
||||
}()
|
||||
|
||||
// The prediction is based on the keywords, so they are required to proceed
|
||||
if p == nil || len(p.Keywords) == 0 {
|
||||
return hub.UnknownCategory
|
||||
}
|
||||
|
||||
// Prepare input tensor
|
||||
keywords := strings.ToLower(strings.Join(p.Keywords, ","))
|
||||
input, err := tf.NewTensor([][]string{{keywords}})
|
||||
if err != nil {
|
||||
return hub.UnknownCategory
|
||||
}
|
||||
|
||||
// Get prediction from model
|
||||
results := c.model.Exec([]tf.Output{
|
||||
c.model.Op("StatefulPartitionedCall", 0),
|
||||
}, map[tf.Output]*tf.Tensor{
|
||||
c.model.Op("serving_default_input_1", 0): input,
|
||||
})
|
||||
var prediction []float32
|
||||
if len(results) == 1 {
|
||||
v, ok := results[0].Value().([][]float32)
|
||||
if ok && len(v) == 1 {
|
||||
prediction = v[0]
|
||||
}
|
||||
}
|
||||
if prediction == nil {
|
||||
return hub.UnknownCategory
|
||||
}
|
||||
|
||||
// Return corresponding category from prediction
|
||||
var max float32
|
||||
var maxIndex int
|
||||
for i, v := range prediction {
|
||||
if v > max {
|
||||
max = v
|
||||
maxIndex = i
|
||||
}
|
||||
}
|
||||
return hub.PackageCategory(maxIndex + 1) // Package categories start at 1
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
package tracker
|
||||
|
||||
import (
|
||||
"github.com/artifacthub/hub/internal/hub"
|
||||
"github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// PackageCategoryClassifierMock is a mock implementation of the
|
||||
// PackageCategoryClassifier interface.
|
||||
type PackageCategoryClassifierMock struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// Predict implements the PackageCategoryClassifier interface.
|
||||
func (m *PackageCategoryClassifierMock) Predict(p *hub.Package) hub.PackageCategory {
|
||||
args := m.Called(p)
|
||||
category, _ := args.Get(0).(hub.PackageCategory)
|
||||
return category
|
||||
}
|
||||
|
|
@ -109,6 +109,9 @@ func (t *Tracker) Run() error {
|
|||
continue
|
||||
}
|
||||
|
||||
// Set package category from ML model prediction
|
||||
p.Category = t.svc.Pcc.Predict(p)
|
||||
|
||||
// Register package
|
||||
t.logger.Debug().Str("name", p.Name).Str("v", p.Version).Msg("registering package")
|
||||
if err := t.svc.Pm.Register(t.svc.Ctx, p); err != nil {
|
||||
|
|
|
|||
|
|
@ -196,10 +196,12 @@ func TestTracker(t *testing.T) {
|
|||
sw.ec.On("Init", r1.RepositoryID)
|
||||
sw.rm.On("GetMetadata", r1, "").Return(nil, nil)
|
||||
sw.rm.On("GetPackagesDigest", sw.svc.Ctx, r1.RepositoryID).Return(nil, nil)
|
||||
p := source.ClonePackage(p1v1)
|
||||
sw.src.On("GetPackagesAvailable").Return(map[string]*hub.Package{
|
||||
pkg.BuildKey(p1v1): p1v1,
|
||||
pkg.BuildKey(p1v1): p,
|
||||
}, nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p1v1).Return(tests.ErrFake)
|
||||
sw.pcc.On("Predict", p).Return(hub.UnknownCategory)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p).Return(tests.ErrFake)
|
||||
expectedErr := "error registering package pkg1 version 1.0.0: fake error for tests"
|
||||
sw.ec.On("Append", r1.RepositoryID, expectedErr).Return()
|
||||
|
||||
|
|
@ -287,10 +289,12 @@ func TestTracker(t *testing.T) {
|
|||
sw.ec.On("Init", r1.RepositoryID)
|
||||
sw.rm.On("GetMetadata", r1, "").Return(nil, fmt.Errorf("error: %w", repo.ErrMetadataNotFound))
|
||||
sw.rm.On("GetPackagesDigest", sw.svc.Ctx, r1.RepositoryID).Return(nil, nil)
|
||||
p := source.ClonePackage(p1v1)
|
||||
sw.src.On("GetPackagesAvailable").Return(map[string]*hub.Package{
|
||||
pkg.BuildKey(p1v1): p1v1,
|
||||
pkg.BuildKey(p): p,
|
||||
}, nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p1v1).Return(nil)
|
||||
sw.pcc.On("Predict", p).Return(hub.UnknownCategory)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p).Return(nil)
|
||||
|
||||
// Run test and check expectations
|
||||
err := New(sw.svc, r1, zerolog.Nop()).Run()
|
||||
|
|
@ -311,10 +315,12 @@ func TestTracker(t *testing.T) {
|
|||
sw.rm.On("GetPackagesDigest", sw.svc.Ctx, r1.RepositoryID).Return(map[string]string{
|
||||
pkg.BuildKey(p1v1): "new digest",
|
||||
}, nil)
|
||||
p := source.ClonePackage(p1v1)
|
||||
sw.src.On("GetPackagesAvailable").Return(map[string]*hub.Package{
|
||||
pkg.BuildKey(p1v1): p1v1,
|
||||
pkg.BuildKey(p): p,
|
||||
}, nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p1v1).Return(nil)
|
||||
sw.pcc.On("Predict", p).Return(hub.UnknownCategory)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p).Return(nil)
|
||||
|
||||
// Run test and check expectations
|
||||
err := New(sw.svc, r1, zerolog.Nop()).Run()
|
||||
|
|
@ -331,12 +337,16 @@ func TestTracker(t *testing.T) {
|
|||
sw.ec.On("Init", r1.RepositoryID)
|
||||
sw.rm.On("GetMetadata", r1, "").Return(nil, nil)
|
||||
sw.rm.On("GetPackagesDigest", sw.svc.Ctx, r1.RepositoryID).Return(nil, nil)
|
||||
p1 := source.ClonePackage(p1v1)
|
||||
p2 := source.ClonePackage(p2v1)
|
||||
sw.src.On("GetPackagesAvailable").Return(map[string]*hub.Package{
|
||||
pkg.BuildKey(p1v1): p1v1,
|
||||
pkg.BuildKey(p2v1): p2v1,
|
||||
pkg.BuildKey(p1): p1,
|
||||
pkg.BuildKey(p2): p2,
|
||||
}, nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p1v1).Return(nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p2v1).Return(nil)
|
||||
sw.pcc.On("Predict", p1).Return(hub.UnknownCategory)
|
||||
sw.pcc.On("Predict", p2).Return(hub.UnknownCategory)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p1).Return(nil)
|
||||
sw.pm.On("Register", sw.svc.Ctx, p2).Return(nil)
|
||||
|
||||
// Run test and check expectations
|
||||
err := New(sw.svc, r1, zerolog.Nop()).Run()
|
||||
|
|
@ -492,6 +502,7 @@ type servicesWrapper struct {
|
|||
ec *repo.ErrorsCollectorMock
|
||||
hc *tests.HTTPClientMock
|
||||
is *img.StoreMock
|
||||
pcc *PackageCategoryClassifierMock
|
||||
src *source.Mock
|
||||
svc *hub.TrackerServices
|
||||
}
|
||||
|
|
@ -505,6 +516,7 @@ func newServicesWrapper() *servicesWrapper {
|
|||
ec := &repo.ErrorsCollectorMock{}
|
||||
hc := &tests.HTTPClientMock{}
|
||||
is := &img.StoreMock{}
|
||||
pcc := &PackageCategoryClassifierMock{}
|
||||
src := &source.Mock{}
|
||||
|
||||
// Setup tracker services using mocks
|
||||
|
|
@ -518,6 +530,7 @@ func newServicesWrapper() *servicesWrapper {
|
|||
Ec: ec,
|
||||
Hc: hc,
|
||||
Is: is,
|
||||
Pcc: pcc,
|
||||
SetupTrackerSource: func(i *hub.TrackerSourceInput) hub.TrackerSource {
|
||||
return src
|
||||
},
|
||||
|
|
@ -532,6 +545,7 @@ func newServicesWrapper() *servicesWrapper {
|
|||
ec: ec,
|
||||
hc: hc,
|
||||
is: is,
|
||||
pcc: pcc,
|
||||
src: src,
|
||||
svc: svc,
|
||||
}
|
||||
|
|
@ -545,5 +559,6 @@ func (sw *servicesWrapper) assertExpectations(t *testing.T) {
|
|||
sw.ec.AssertExpectations(t)
|
||||
sw.hc.AssertExpectations(t)
|
||||
sw.is.AssertExpectations(t)
|
||||
sw.pcc.AssertExpectations(t)
|
||||
sw.src.AssertExpectations(t)
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -119,7 +119,6 @@ database;keyvalue,in-memory,database,serverless,raft
|
|||
database;kubernetes-dbaas,operator,database-as-a-service,dbaas,stored procedures,kubernetes operator,go,dbms,database management systems,stored procedures,automation,provisioning,databases
|
||||
database;kubernetes,database,opendj,ldap
|
||||
database;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,couchbase,big data,nosql,memcached,couchdb
|
||||
database;kubernetes,kubectl,plugin
|
||||
database;ldap,openldap,iam-stack,high availability
|
||||
database;low-code,database,cluster
|
||||
database;mariadb-operator,mariadb,kubernetes,database,database
|
||||
|
|
@ -574,7 +573,6 @@ monitoring-logging;kubernetes,cloudwatch,monitoring
|
|||
monitoring-logging;kubernetes,cncf,networking,cni,security,open vswitch,ovs,antrea,observability,flow visibility
|
||||
monitoring-logging;kubernetes,cncf,networking,cni,security,open vswitch,ovs,antrea,observability,flow visibility,flow aggregator
|
||||
monitoring-logging;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,logging,efk,elastic,fluend,kibana
|
||||
monitoring-logging;kubernetes,kubectl,plugin
|
||||
monitoring-logging;kubernetes,metrics-server,metrics
|
||||
monitoring-logging;kubernetes,monitoring-mixin,portefaix
|
||||
monitoring-logging;kubernetes,monitoring,slack
|
||||
|
|
@ -866,7 +864,6 @@ networking;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,istio,kia
|
|||
networking;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,openshift,router,haproxy
|
||||
networking;kubernetes,ingress,nginx,controller
|
||||
networking;kubernetes,istio,operator,service,mesh
|
||||
networking;kubernetes,kubectl,plugin
|
||||
networking;kubernetes,network,calico,calico-node,bgp,plugin,cni,node,felix
|
||||
networking;kubernetes,operator,multi-tenancy,multi-tenant,multitenancy,multitenant,namespace,proxy
|
||||
networking;kusk-gateway,kusk-gateway-api,api,openapi
|
||||
|
|
@ -1054,7 +1051,6 @@ security;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,security,si
|
|||
security;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,security,vault,secret management
|
||||
security;kubernetes,helm,startx,cluster-chart,cluster,infrastructure,security,vault,vault-config,secret management
|
||||
security;kubernetes,kube-hunter,security
|
||||
security;kubernetes,kubectl,plugin
|
||||
security;kubernetes,nirmata,kyverno,aws,adapter,policy
|
||||
security;kubernetes,nirmata,policy agent,validating webhook,admissions controller
|
||||
security;kubernetes,oauth,authentication,google,github
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue