From 0224a259c7fd61fbabdb8ab632471e68b7fd6b4a Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 6 Jun 2019 10:48:40 -0700 Subject: [PATCH] Add script used to generate devstats repo groups This isn't perfect, but it's probably better living here than in a random gist I have laying around. --- hack/generate-devstats-repo-sql.py | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 hack/generate-devstats-repo-sql.py diff --git a/hack/generate-devstats-repo-sql.py b/hack/generate-devstats-repo-sql.py new file mode 100755 index 000000000..8ce88c212 --- /dev/null +++ b/hack/generate-devstats-repo-sql.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# Copyright 2019 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Output devstats repo_groups.sql based on subproject defintions in sigs.yaml + +This is likely missing a few repos because: + - some repos lack an owner (eg: kubernetes/kubernetes) + - it doesn't enumerate all repos from all kubernetes-owned orgs + - it ignores the fact that committees can own repos, only grouping by sig + +The sql generated is NOT intended to overwrite/replace the file that lives at +github.com/cncf/devstats/scripts/kubernetes/repo_groups.sql, but instead aid a +human in doing some manual updates to the file. Future improvements to this +script could eliminate that part of the process, but it's where we are today. +""" + +import argparse +import ruamel.yaml as yaml +import json +import re +import sys + +update_gha_repos_template = """ +update gha_repos set repo_group = 'SIG {}' where name in ( +{} +); +""" + +def repos_from_sig(sig): + """Returns a list of org/repos given a sig""" + repos = {} + subprojects = sig.get('subprojects', []) + if subprojects is None: + subprojects = [] + for sp in subprojects: + for uri in sp['owners']: + owners_path = re.sub(r"https://raw.githubusercontent.com/(.*)/master/(.*)",r"\1/\2",uri) + path_parts = owners_path.split('/') + # org/repo is owned by sig if org/repo/OWNERS os in one of their subprojects + if path_parts[2] == 'OWNERS': + repo = '/'.join(path_parts[0:2]) + repos[repo] = True + return sorted(repos.keys()) + +def write_repo_groups_sql(sigs, fp): + for sig in sigs['sigs']: + repos = repos_from_sig(sig) + if len(repos): + fp.write( + update_gha_repos_template.format( + sig['name'], + ',\n'.join([' \'{}\''.format(r) for r in repos]))) + +def main(sigs_yaml, repo_groups_sql): + with open(sigs_yaml) as fp: + sigs = yaml.round_trip_load(fp) + + if repo_groups_sql is not None: + with open(repo_groups_sql, 'w') as fp: + write_repo_groups_sql(sigs, fp) + else: + write_repo_groups_sql(sigs, sys.stdout) + +if __name__ == '__main__': + PARSER = argparse.ArgumentParser( + description='Do things with sigs.yaml') + PARSER.add_argument( + '--sigs-yaml', + default='./sigs.yaml', + help='Path to sigs.yaml') + PARSER.add_argument( + '--repo-groups-sql', + help='Path to output repo_groups.sql if provided') + ARGS = PARSER.parse_args() + + main(ARGS.sigs_yaml, ARGS.repo_groups_sql) +