Merge pull request #1336 from aleksandra-malinowska/gce-api-metrics

Add metrics for GCP API usage
This commit is contained in:
k8s-ci-robot 2018-11-06 07:32:55 -08:00 committed by GitHub
commit 08d2bfd1f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 114 additions and 0 deletions

View File

@ -95,10 +95,12 @@ func NewCustomAutoscalingGceClientV1(client *http.Client, projectId, serverUrl s
}
func (client *autoscalingGceClientV1) FetchMachineType(zone, machineType string) (*gce.MachineType, error) {
registerRequest("machine_types", "get")
return client.gceService.MachineTypes.Get(client.projectId, zone, machineType).Do()
}
func (client *autoscalingGceClientV1) FetchMachineTypes(zone string) ([]*gce.MachineType, error) {
registerRequest("machine_types", "list")
machines, err := client.gceService.MachineTypes.List(client.projectId, zone).Do()
if err != nil {
return nil, err
@ -107,6 +109,7 @@ func (client *autoscalingGceClientV1) FetchMachineTypes(zone string) ([]*gce.Mac
}
func (client *autoscalingGceClientV1) FetchMigTargetSize(migRef GceRef) (int64, error) {
registerRequest("instance_group_managers", "get")
igm, err := client.gceService.InstanceGroupManagers.Get(migRef.Project, migRef.Zone, migRef.Name).Do()
if err != nil {
return 0, err
@ -115,6 +118,7 @@ func (client *autoscalingGceClientV1) FetchMigTargetSize(migRef GceRef) (int64,
}
func (client *autoscalingGceClientV1) FetchMigBasename(migRef GceRef) (string, error) {
registerRequest("instance_group_managers", "get")
igm, err := client.gceService.InstanceGroupManagers.Get(migRef.Project, migRef.Zone, migRef.Name).Do()
if err != nil {
return "", err
@ -123,6 +127,7 @@ func (client *autoscalingGceClientV1) FetchMigBasename(migRef GceRef) (string, e
}
func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error {
registerRequest("instance_group_managers", "resize")
op, err := client.gceService.InstanceGroupManagers.Resize(migRef.Project, migRef.Zone, migRef.Name, size).Do()
if err != nil {
return err
@ -133,6 +138,7 @@ func (client *autoscalingGceClientV1) ResizeMig(migRef GceRef, size int64) error
func (client *autoscalingGceClientV1) waitForOp(operation *gce.Operation, project, zone string) error {
for start := time.Now(); time.Since(start) < client.operationWaitTimeout; time.Sleep(client.operationPollInterval) {
glog.V(4).Infof("Waiting for operation %s %s %s", project, zone, operation.Name)
registerRequest("zone_operations", "get")
if op, err := client.gceService.ZoneOperations.Get(project, zone, operation.Name).Do(); err == nil {
glog.V(4).Infof("Operation %s %s %s status: %s", project, zone, operation.Name, op.Status)
if op.Status == "DONE" {
@ -152,6 +158,7 @@ func (client *autoscalingGceClientV1) DeleteInstances(migRef GceRef, instances [
for _, i := range instances {
req.Instances = append(req.Instances, GenerateInstanceUrl(*i))
}
registerRequest("instance_group_managers", "delete_instances")
op, err := client.gceService.InstanceGroupManagers.DeleteInstances(migRef.Project, migRef.Zone, migRef.Name, &req).Do()
if err != nil {
return err
@ -160,6 +167,7 @@ func (client *autoscalingGceClientV1) DeleteInstances(migRef GceRef, instances [
}
func (client *autoscalingGceClientV1) FetchMigInstances(migRef GceRef) ([]GceRef, error) {
registerRequest("instance_group_managers", "list_managed_instances")
instances, err := client.gceService.InstanceGroupManagers.ListManagedInstances(migRef.Project, migRef.Zone, migRef.Name).Do()
if err != nil {
glog.V(4).Infof("Failed MIG info request for %s %s %s: %v", migRef.Project, migRef.Zone, migRef.Name, err)
@ -177,6 +185,7 @@ func (client *autoscalingGceClientV1) FetchMigInstances(migRef GceRef) ([]GceRef
}
func (client *autoscalingGceClientV1) FetchZones(region string) ([]string, error) {
registerRequest("regions", "get")
r, err := client.gceService.Regions.Get(client.projectId, region).Do()
if err != nil {
return nil, fmt.Errorf("cannot get zones for GCE region %s: %v", region, err)
@ -189,6 +198,7 @@ func (client *autoscalingGceClientV1) FetchZones(region string) ([]string, error
}
func (client *autoscalingGceClientV1) FetchMigTemplate(migRef GceRef) (*gce.InstanceTemplate, error) {
registerRequest("instance_group_managers", "get")
igm, err := client.gceService.InstanceGroupManagers.Get(migRef.Project, migRef.Zone, migRef.Name).Do()
if err != nil {
return nil, err
@ -198,12 +208,14 @@ func (client *autoscalingGceClientV1) FetchMigTemplate(migRef GceRef) (*gce.Inst
return nil, err
}
_, templateName := path.Split(templateUrl.EscapedPath())
registerRequest("instance_templates", "get")
return client.gceService.InstanceTemplates.Get(migRef.Project, templateName).Do()
}
func (client *autoscalingGceClientV1) FetchMigsWithName(zone string, name *regexp.Regexp) ([]string, error) {
filter := fmt.Sprintf("name eq %s", name)
links := make([]string, 0)
registerRequest("instance_groups", "list")
req := client.gceService.InstanceGroups.List(client.projectId, zone).Filter(filter)
if err := req.Pages(context.TODO(), func(page *gce.InstanceGroupList) error {
for _, ig := range page.Items {

View File

@ -338,5 +338,7 @@ func BuildGCE(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
if err != nil {
glog.Fatalf("Failed to create GCE cloud provider: %v", err)
}
// Register GCE API usage metrics.
RegisterMetrics()
return provider
}

View File

@ -0,0 +1,46 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gce
import (
"github.com/prometheus/client_golang/prometheus"
)
const (
caNamespace = "cluster_autoscaler"
)
var (
/**** Metrics related to GCE API usage ****/
requestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: caNamespace,
Name: "gce_request_count",
Help: "Counter of GCE API requests for each verb and API resource.",
}, []string{"resource", "verb"},
)
)
// RegisterMetrics registers all GCE metrics.
func RegisterMetrics() {
prometheus.MustRegister(requestCounter)
}
// registerRequest registers request to GCE API.
func registerRequest(resource string, verb string) {
requestCounter.WithLabelValues(resource, verb).Add(1.0)
}

View File

@ -52,6 +52,7 @@ func NewAutoscalingGkeClientV1(client *http.Client, projectId, location, cluster
}
func (m *autoscalingGkeClientV1) GetCluster() (Cluster, error) {
registerRequest("clusters", "get")
clusterResponse, err := m.gkeService.Projects.Locations.Clusters.Get(m.clusterPath).Do()
if err != nil {
return Cluster{}, err

View File

@ -74,6 +74,7 @@ func NewAutoscalingGkeClientV1beta1(client *http.Client, projectId, location, cl
}
func (m *autoscalingGkeClientV1beta1) GetCluster() (Cluster, error) {
registerRequest("clusters", "get")
clusterResponse, err := m.gkeBetaService.Projects.Locations.Clusters.Get(m.clusterPath).Do()
if err != nil {
return Cluster{}, err
@ -126,6 +127,7 @@ func buildResourceLimiter(cluster *gke_api_beta.Cluster) *cloudprovider.Resource
}
func (m *autoscalingGkeClientV1beta1) DeleteNodePool(toBeRemoved string) error {
registerRequest("node_pools", "delete")
deleteOp, err := m.gkeBetaService.Projects.Locations.Clusters.NodePools.Delete(
fmt.Sprintf(m.nodePoolPath, toBeRemoved)).Do()
if err != nil {
@ -200,6 +202,7 @@ func (m *autoscalingGkeClientV1beta1) CreateNodePool(mig *GkeMig) error {
Autoscaling: &autoscaling,
},
}
registerRequest("node_pools", "create")
createOp, err := m.gkeBetaService.Projects.Locations.Clusters.NodePools.Create(
m.clusterPath, &createRequest).Do()
if err != nil {
@ -211,6 +214,7 @@ func (m *autoscalingGkeClientV1beta1) CreateNodePool(mig *GkeMig) error {
func (m *autoscalingGkeClientV1beta1) waitForGkeOp(op *gke_api_beta.Operation) error {
for start := time.Now(); time.Since(start) < m.operationWaitTimeout; time.Sleep(m.operationPollInterval) {
glog.V(4).Infof("Waiting for operation %s %s", op.TargetLink, op.Name)
registerRequest("operations", "get")
if op, err := m.gkeBetaService.Projects.Locations.Operations.Get(
fmt.Sprintf(m.operationPath, op.Name)).Do(); err == nil {
glog.V(4).Infof("Operation %s %s status: %s", op.TargetLink, op.Name, op.Status)

View File

@ -444,5 +444,8 @@ func BuildGKE(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
if err != nil {
glog.Fatalf("Failed to create GKE cloud provider: %v", err)
}
// Register GKE & GCE API usage metrics.
registerMetrics()
gce.RegisterMetrics()
return provider
}

View File

@ -0,0 +1,46 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gke
import (
"github.com/prometheus/client_golang/prometheus"
)
const (
caNamespace = "cluster_autoscaler"
)
var (
/**** Metrics related to GKE API usage ****/
requestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: caNamespace,
Name: "gke_request_count",
Help: "Counter of GKE API requests for each verb and API resource.",
}, []string{"resource", "verb"},
)
)
// registerMetrics registers all GKE metrics.
func registerMetrics() {
prometheus.MustRegister(requestCounter)
}
// registerRequest registers request to GKE API.
func registerRequest(resource string, verb string) {
requestCounter.WithLabelValues(resource, verb).Add(1.0)
}