#!/usr/bin/env python3 # Copyright 2019 The Kubernetes Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Output devstats repo_groups.sql based on subproject definitions in sigs.yaml This is likely missing a few repos because: - some repos lack an owner (eg: kubernetes/kubernetes) - it doesn't enumerate all repos from all kubernetes-owned orgs - it ignores the fact that committees can own repos, only grouping by sig The sql generated is NOT intended to overwrite/replace the file that lives at github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, but instead aid a human in doing some manual updates to the file. Future improvements to this script could eliminate that part of the process, but it's where we are today. """ import argparse import ruamel.yaml as yaml import json import re import sys repo_group_sql_template = """ update gha_repos set repo_group = '{}' where name in ( {} ); """ # copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, # if this differs, consider cncf the authoritative source and update this repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py -- Add repository groups """ # copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, # if this differs, consider cncf the authoritative source and update this repo_groups_sql_footer = """ -- All other unknown repositories should have 'Other' repository group -- update gha_repos set repo_group = 'Other' where repo_group is null; -- By default alias is the newest repo name for given repo ID update gha_repos r set alias = coalesce(( select e.dup_repo_name from gha_events e where e.repo_id = r.id order by e.created_at desc limit 1 ), name) ; update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/'; select repo_group, count(*) as number_of_repos from gha_repos where repo_group is not null group by repo_group order by number_of_repos desc, repo_group asc; """ special_case_groups = [{ # the main repo has no single owner and has gone by many names 'name': 'Kubernetes', 'repos': [ 'kubernetes/kubernetes', 'GoogleCloudPlatform/kubernetes', 'kubernetes', 'kubernetes/' ] }] # devstats isn't aware of repo renames or migrations; we need to keep # old repo names in its sql groups present for historical purposes; # # when reconciling deletions from repo_groups.sql by this script, use # github.com/kubernetes/org issues to determine why; renamed, migrated, # or used-and-retired repos belong here; unused/deleted repos do not renamed_repos = { 'sig-architecture': [ 'kubernetes/contrib', ], 'sig-api-machinery': [ 'kubernetes-incubator/apiserver-builder', ], 'sig-cluster-lifecycle': [ 'kubernetes-incubator/kubespray', ], 'sig-multicluster': [ 'kubernetes-sigs/federation-v2', ], 'sig-node': [ 'kubernetes-incubator/node-feature-discovery', ], 'sig-pm': [ 'kubernetes/features', ], 'sig-service-catalog': [ 'kubernetes-incubator/service-catalog', ] } def repos_from_k8s_group(k8s_group): """Returns a list of org/repos given a kubernetes community group""" repos = {} subprojects = k8s_group.get('subprojects', []) if subprojects is None: subprojects = [] for sp in subprojects: for uri in sp['owners']: owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri) path_parts = owners_path.split('/') # org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects if path_parts[2] == 'OWNERS': repo = '/'.join(path_parts[0:2]) repos[repo] = True return sorted(repos.keys()) def k8s_group_name(k8s_group): group_dir = k8s_group.get('dir', '') if group_dir.startswith('sig-'): return "SIG " + k8s_group['name'] if group_dir.startswith('committee-'): return k8s_group['name'] + " Committee" return "UNKNOWN " + group_dir def write_repo_groups_template(name, repos, fp): if len(repos): fp.write( repo_group_sql_template.format( name, ',\n'.join([' \'{}\''.format(r) for r in repos]))) def write_repo_groups_sql(k8s_groups, fp): fp.write(repo_groups_sql_header) for g in special_case_groups: write_repo_groups_template(g['name'], g['repos'], fp) for group_type in ['sigs', 'committees']: for g in k8s_groups[group_type]: repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[])) repos = sorted(list(repos)) write_repo_groups_template(k8s_group_name(g), repos, fp) fp.write(repo_groups_sql_footer) def main(sigs_yaml, repo_groups_sql): with open(sigs_yaml) as fp: k8s_groups = yaml.round_trip_load(fp) if repo_groups_sql is not None: with open(repo_groups_sql, 'w') as fp: write_repo_groups_sql(k8s_groups, fp) else: write_repo_groups_sql(k8s_groups, sys.stdout) if __name__ == '__main__': PARSER = argparse.ArgumentParser( description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql') PARSER.add_argument( '--sigs-yaml', default='./sigs.yaml', help='Path to sigs.yaml') PARSER.add_argument( '--repo-groups-sql', help='Path to output repo_groups.sql if provided') ARGS = PARSER.parse_args() main(ARGS.sigs_yaml, ARGS.repo_groups_sql)