diff --git a/hack/generate-devstats-repo-sql.py b/hack/generate-devstats-repo-sql.py index 8ce88c212..71d774975 100755 --- a/hack/generate-devstats-repo-sql.py +++ b/hack/generate-devstats-repo-sql.py @@ -34,50 +34,153 @@ import json import re import sys -update_gha_repos_template = """ -update gha_repos set repo_group = 'SIG {}' where name in ( +repo_group_sql_template = """ +update gha_repos set repo_group = '{}' where name in ( {} ); """ -def repos_from_sig(sig): - """Returns a list of org/repos given a sig""" +# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, +# if this differs, consider cncf the authoritative source and update this +repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py +-- Add repository groups +""" + +# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, +# if this differs, consider cncf the authoritative source and update this +repo_groups_sql_footer = """ +-- All other unknown repositories should have 'Other' repository group +-- update gha_repos set repo_group = 'Other' where repo_group is null; + +-- By default alias is the newest repo name for given repo ID +update + gha_repos r +set + alias = coalesce(( + select e.dup_repo_name + from + gha_events e + where + e.repo_id = r.id + order by + e.created_at desc + limit 1 + ), name) +; + +update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/'; + +select + repo_group, + count(*) as number_of_repos +from + gha_repos +where + repo_group is not null +group by + repo_group +order by + number_of_repos desc, + repo_group asc; + +""" + +special_case_groups = [{ + # the main repo has no single owner and has gone by many names + 'name': 'Kubernetes', + 'repos': [ + 'kubernetes/kubernetes', + 'GoogleCloudPlatform/kubernetes', + 'kubernetes', + 'kubernetes/' + ] +}] + +# devstats isn't aware of repo renames or migrations; we need to keep +# old repo names in its sql groups present for historical purposes; +# +# when reconciling deletions from repo_groups.sql by this script, use +# github.com/kubernetes/org issues to determine why; renamed, migrated, +# or used-and-retired repos belong here; unused/deleted repos do not +renamed_repos = { + 'sig-architecture': [ + 'kubernetes/contrib', + ], + 'sig-api-machinery': [ + 'kubernetes-incubator/apiserver-builder', + ], + 'sig-cluster-lifecycle': [ + 'kubernetes-incubator/kubespray', + ], + 'sig-multicluster': [ + 'kubernetes-sigs/federation-v2', + ], + 'sig-node': [ + 'kubernetes-incubator/node-feature-discovery', + ], + 'sig-pm': [ + 'kubernetes/features', + ], + 'sig-service-catalog': [ + 'kubernetes-incubator/service-catalog', + ] +} + +def repos_from_k8s_group(k8s_group): + """Returns a list of org/repos given a kubernetes community group""" repos = {} - subprojects = sig.get('subprojects', []) + subprojects = k8s_group.get('subprojects', []) if subprojects is None: subprojects = [] for sp in subprojects: for uri in sp['owners']: owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri) path_parts = owners_path.split('/') - # org/repo is owned by sig if org/repo/OWNERS os in one of their subprojects + # org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects if path_parts[2] == 'OWNERS': repo = '/'.join(path_parts[0:2]) repos[repo] = True return sorted(repos.keys()) -def write_repo_groups_sql(sigs, fp): - for sig in sigs['sigs']: - repos = repos_from_sig(sig) - if len(repos): - fp.write( - update_gha_repos_template.format( - sig['name'], - ',\n'.join([' \'{}\''.format(r) for r in repos]))) +def k8s_group_name(k8s_group): + group_dir = k8s_group.get('dir', '') + if group_dir.startswith('sig-'): + return "SIG " + k8s_group['name'] + if group_dir.startswith('committee-'): + return k8s_group['name'] + " Committee" + return "UNKNOWN " + group_dir + +def write_repo_groups_template(name, repos, fp): + if len(repos): + fp.write( + repo_group_sql_template.format( + name, + ',\n'.join([' \'{}\''.format(r) for r in repos]))) + +def write_repo_groups_sql(k8s_groups, fp): + fp.write(repo_groups_sql_header) + for g in special_case_groups: + write_repo_groups_template(g['name'], g['repos'], fp) + for group_type in ['sigs', 'committees']: + for g in k8s_groups[group_type]: + repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[])) + repos = sorted(list(repos)) + write_repo_groups_template(k8s_group_name(g), repos, fp) + fp.write(repo_groups_sql_footer) def main(sigs_yaml, repo_groups_sql): with open(sigs_yaml) as fp: - sigs = yaml.round_trip_load(fp) + k8s_groups = yaml.round_trip_load(fp) if repo_groups_sql is not None: with open(repo_groups_sql, 'w') as fp: - write_repo_groups_sql(sigs, fp) + write_repo_groups_sql(k8s_groups, fp) else: - write_repo_groups_sql(sigs, sys.stdout) + write_repo_groups_sql(k8s_groups, sys.stdout) if __name__ == '__main__': PARSER = argparse.ArgumentParser( - description='Do things with sigs.yaml') + description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql') PARSER.add_argument( '--sigs-yaml', default='./sigs.yaml',