Add more details to generate-devstats-repo-sql.py
There was still a lot of manual massaging of the script's output that needed to happen before it was usable as a PR to cncf/devstats. I have now tried to encode as much of that knowledge in code and comments here to allow the output of the script to overwrite the existing file. - add header/footer - add comment pointing to this script - add special case group for "Kubernetes" - add support for committees that own code - (manually) keep track of old repo names for history
This commit is contained in:
parent
f784eb4ab8
commit
7bd7f2c344
|
@ -34,50 +34,153 @@ import json
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
update_gha_repos_template = """
|
repo_group_sql_template = """
|
||||||
update gha_repos set repo_group = 'SIG {}' where name in (
|
update gha_repos set repo_group = '{}' where name in (
|
||||||
{}
|
{}
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def repos_from_sig(sig):
|
# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
|
||||||
"""Returns a list of org/repos given a sig"""
|
# if this differs, consider cncf the authoritative source and update this
|
||||||
|
repo_groups_sql_header = """-- generated by github.com/kubernetes/community/hack/generate-devstats-repo-sql.py
|
||||||
|
-- Add repository groups
|
||||||
|
"""
|
||||||
|
|
||||||
|
# copied from github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql,
|
||||||
|
# if this differs, consider cncf the authoritative source and update this
|
||||||
|
repo_groups_sql_footer = """
|
||||||
|
-- All other unknown repositories should have 'Other' repository group
|
||||||
|
-- update gha_repos set repo_group = 'Other' where repo_group is null;
|
||||||
|
|
||||||
|
-- By default alias is the newest repo name for given repo ID
|
||||||
|
update
|
||||||
|
gha_repos r
|
||||||
|
set
|
||||||
|
alias = coalesce((
|
||||||
|
select e.dup_repo_name
|
||||||
|
from
|
||||||
|
gha_events e
|
||||||
|
where
|
||||||
|
e.repo_id = r.id
|
||||||
|
order by
|
||||||
|
e.created_at desc
|
||||||
|
limit 1
|
||||||
|
), name)
|
||||||
|
;
|
||||||
|
|
||||||
|
update gha_repos set alias = 'kubernetes/kubernetes' where name like '%kubernetes' or name = 'kubernetes/';
|
||||||
|
|
||||||
|
select
|
||||||
|
repo_group,
|
||||||
|
count(*) as number_of_repos
|
||||||
|
from
|
||||||
|
gha_repos
|
||||||
|
where
|
||||||
|
repo_group is not null
|
||||||
|
group by
|
||||||
|
repo_group
|
||||||
|
order by
|
||||||
|
number_of_repos desc,
|
||||||
|
repo_group asc;
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
special_case_groups = [{
|
||||||
|
# the main repo has no single owner and has gone by many names
|
||||||
|
'name': 'Kubernetes',
|
||||||
|
'repos': [
|
||||||
|
'kubernetes/kubernetes',
|
||||||
|
'GoogleCloudPlatform/kubernetes',
|
||||||
|
'kubernetes',
|
||||||
|
'kubernetes/'
|
||||||
|
]
|
||||||
|
}]
|
||||||
|
|
||||||
|
# devstats isn't aware of repo renames or migrations; we need to keep
|
||||||
|
# old repo names in its sql groups present for historical purposes;
|
||||||
|
#
|
||||||
|
# when reconciling deletions from repo_groups.sql by this script, use
|
||||||
|
# github.com/kubernetes/org issues to determine why; renamed, migrated,
|
||||||
|
# or used-and-retired repos belong here; unused/deleted repos do not
|
||||||
|
renamed_repos = {
|
||||||
|
'sig-architecture': [
|
||||||
|
'kubernetes/contrib',
|
||||||
|
],
|
||||||
|
'sig-api-machinery': [
|
||||||
|
'kubernetes-incubator/apiserver-builder',
|
||||||
|
],
|
||||||
|
'sig-cluster-lifecycle': [
|
||||||
|
'kubernetes-incubator/kubespray',
|
||||||
|
],
|
||||||
|
'sig-multicluster': [
|
||||||
|
'kubernetes-sigs/federation-v2',
|
||||||
|
],
|
||||||
|
'sig-node': [
|
||||||
|
'kubernetes-incubator/node-feature-discovery',
|
||||||
|
],
|
||||||
|
'sig-pm': [
|
||||||
|
'kubernetes/features',
|
||||||
|
],
|
||||||
|
'sig-service-catalog': [
|
||||||
|
'kubernetes-incubator/service-catalog',
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def repos_from_k8s_group(k8s_group):
|
||||||
|
"""Returns a list of org/repos given a kubernetes community group"""
|
||||||
repos = {}
|
repos = {}
|
||||||
subprojects = sig.get('subprojects', [])
|
subprojects = k8s_group.get('subprojects', [])
|
||||||
if subprojects is None:
|
if subprojects is None:
|
||||||
subprojects = []
|
subprojects = []
|
||||||
for sp in subprojects:
|
for sp in subprojects:
|
||||||
for uri in sp['owners']:
|
for uri in sp['owners']:
|
||||||
owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri)
|
owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri)
|
||||||
path_parts = owners_path.split('/')
|
path_parts = owners_path.split('/')
|
||||||
# org/repo is owned by sig if org/repo/OWNERS os in one of their subprojects
|
# org/repo is owned by k8s_group if org/repo/OWNERS os in one of their subprojects
|
||||||
if path_parts[2] == 'OWNERS':
|
if path_parts[2] == 'OWNERS':
|
||||||
repo = '/'.join(path_parts[0:2])
|
repo = '/'.join(path_parts[0:2])
|
||||||
repos[repo] = True
|
repos[repo] = True
|
||||||
return sorted(repos.keys())
|
return sorted(repos.keys())
|
||||||
|
|
||||||
def write_repo_groups_sql(sigs, fp):
|
def k8s_group_name(k8s_group):
|
||||||
for sig in sigs['sigs']:
|
group_dir = k8s_group.get('dir', '')
|
||||||
repos = repos_from_sig(sig)
|
if group_dir.startswith('sig-'):
|
||||||
if len(repos):
|
return "SIG " + k8s_group['name']
|
||||||
fp.write(
|
if group_dir.startswith('committee-'):
|
||||||
update_gha_repos_template.format(
|
return k8s_group['name'] + " Committee"
|
||||||
sig['name'],
|
return "UNKNOWN " + group_dir
|
||||||
',\n'.join([' \'{}\''.format(r) for r in repos])))
|
|
||||||
|
def write_repo_groups_template(name, repos, fp):
|
||||||
|
if len(repos):
|
||||||
|
fp.write(
|
||||||
|
repo_group_sql_template.format(
|
||||||
|
name,
|
||||||
|
',\n'.join([' \'{}\''.format(r) for r in repos])))
|
||||||
|
|
||||||
|
def write_repo_groups_sql(k8s_groups, fp):
|
||||||
|
fp.write(repo_groups_sql_header)
|
||||||
|
for g in special_case_groups:
|
||||||
|
write_repo_groups_template(g['name'], g['repos'], fp)
|
||||||
|
for group_type in ['sigs', 'committees']:
|
||||||
|
for g in k8s_groups[group_type]:
|
||||||
|
repos = set(repos_from_k8s_group(g)) | set(renamed_repos.get(g['dir'],[]))
|
||||||
|
repos = sorted(list(repos))
|
||||||
|
write_repo_groups_template(k8s_group_name(g), repos, fp)
|
||||||
|
fp.write(repo_groups_sql_footer)
|
||||||
|
|
||||||
def main(sigs_yaml, repo_groups_sql):
|
def main(sigs_yaml, repo_groups_sql):
|
||||||
with open(sigs_yaml) as fp:
|
with open(sigs_yaml) as fp:
|
||||||
sigs = yaml.round_trip_load(fp)
|
k8s_groups = yaml.round_trip_load(fp)
|
||||||
|
|
||||||
if repo_groups_sql is not None:
|
if repo_groups_sql is not None:
|
||||||
with open(repo_groups_sql, 'w') as fp:
|
with open(repo_groups_sql, 'w') as fp:
|
||||||
write_repo_groups_sql(sigs, fp)
|
write_repo_groups_sql(k8s_groups, fp)
|
||||||
else:
|
else:
|
||||||
write_repo_groups_sql(sigs, sys.stdout)
|
write_repo_groups_sql(k8s_groups, sys.stdout)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
PARSER = argparse.ArgumentParser(
|
PARSER = argparse.ArgumentParser(
|
||||||
description='Do things with sigs.yaml')
|
description='Generate a repo_groups.sql intended for github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql')
|
||||||
PARSER.add_argument(
|
PARSER.add_argument(
|
||||||
'--sigs-yaml',
|
'--sigs-yaml',
|
||||||
default='./sigs.yaml',
|
default='./sigs.yaml',
|
||||||
|
|
Loading…
Reference in New Issue