Merge 73f5754c47 into fbb2da7313
This commit is contained in:
commit
9db5d15401
|
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright 2025 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Assuming fix-boilerplate is in dev/tools and headers.py is in dev/tools/shared
|
||||
script_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
# Add dev/tools to sys.path to allow importing shared.headers
|
||||
sys.path.append(script_dir)
|
||||
|
||||
from shared import headers
|
||||
|
||||
def main():
|
||||
# Find the repo root from the script's location
|
||||
repo_root = os.path.abspath(os.path.join(script_dir, '..', '..'))
|
||||
|
||||
# Excludes from the original fix-boilerplate script
|
||||
excludes = [
|
||||
'_archived/**',
|
||||
'databases/**',
|
||||
'web/**',
|
||||
]
|
||||
|
||||
print(f"Scanning for license headers in {repo_root}")
|
||||
headers.apply_headers_to_tree(repo_root, excludes=excludes)
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,266 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright 2025 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import fnmatch
|
||||
import argparse
|
||||
import datetime
|
||||
|
||||
# The license header to apply
|
||||
APACHE_HEADER = """Copyright {year} The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
# Mapping of file extensions to their comment syntax
|
||||
# (line_prefix, block_start, block_end)
|
||||
COMMENT_STYLES = {
|
||||
".go": ("// ", None, None),
|
||||
".sh": ("# ", None, None),
|
||||
".py": ("# ", None, None),
|
||||
".js": ("// ", None, None),
|
||||
".ts": ("// ", None, None),
|
||||
".java": ("// ", None, None),
|
||||
".scala": ("// ", None, None),
|
||||
".c": ("// ", None, None),
|
||||
".h": ("// ", None, None),
|
||||
".cpp": ("// ", None, None),
|
||||
".tf": ("# ", None, None),
|
||||
# Block comments for file types that support them
|
||||
".css": (None, "/*", " */"),
|
||||
".xml": (None, "<!--", "-->"),
|
||||
".html": (None, "<!--", "-->"),
|
||||
}
|
||||
|
||||
# Default glob patterns to exclude, relative to the root directory
|
||||
DEFAULT_EXCLUDES = [
|
||||
".git/**",
|
||||
".idea/**",
|
||||
"__pycache__/**",
|
||||
"node_modules/**",
|
||||
"vendor/**",
|
||||
"**/*.yaml",
|
||||
"**/*.yml",
|
||||
"**/LICENSE",
|
||||
"**/*.md",
|
||||
"**/OWNERS",
|
||||
"**/SECURITY_CONTACTS",
|
||||
"go.mod",
|
||||
"go.sum",
|
||||
"*.json",
|
||||
"*.pyc",
|
||||
"*.so",
|
||||
"*.o",
|
||||
"*.a",
|
||||
"*.dll",
|
||||
"*.exe",
|
||||
"*.jar",
|
||||
"*.class",
|
||||
"*.zip",
|
||||
"*.tar.gz",
|
||||
"*.tgz",
|
||||
"*.rar",
|
||||
"*.7z",
|
||||
"*.log",
|
||||
"*.sum",
|
||||
"*.DS_Store",
|
||||
]
|
||||
|
||||
def file_extension_magic(file_path):
|
||||
"""Tries to determine the file type, as encoded by a typical extension."""
|
||||
# Default to the file extension
|
||||
_, ext = os.path.splitext(file_path)
|
||||
if ext:
|
||||
return ext
|
||||
# Look for a shebang line
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
# Read the first 4k of the file, which should be enough for any header.
|
||||
try:
|
||||
content = f.read(4096)
|
||||
except UnicodeDecodeError:
|
||||
# Likely a binary file
|
||||
return None
|
||||
# First line is shebang (e.g., #!/usr/bin/env python)
|
||||
first_line = content.split('\n', 1)[0]
|
||||
if first_line.startswith("#!"):
|
||||
if "python" in first_line:
|
||||
return ".py"
|
||||
if "bash" in first_line or "sh" in first_line:
|
||||
return ".sh"
|
||||
print((f"unknown shebang in {file_path}: {first_line}"))
|
||||
return None
|
||||
|
||||
def get_comment_style(file_extension):
|
||||
"""Gets the comment style for a file based on its extension."""
|
||||
return COMMENT_STYLES.get(file_extension)
|
||||
|
||||
def format_header(header_text, style):
|
||||
"""Formats the header text with the correct comment style."""
|
||||
line_prefix, block_start, block_end = style
|
||||
|
||||
# Add a space for line prefixes if they don't have one
|
||||
if line_prefix and not line_prefix.endswith(' '):
|
||||
line_prefix += ' '
|
||||
|
||||
header_lines = header_text.strip().split('\n')
|
||||
|
||||
if line_prefix:
|
||||
# Handle empty lines in header correctly
|
||||
formatted_lines = [f"{line_prefix}{line}".rstrip() if line else line_prefix.rstrip() for line in header_lines]
|
||||
return '\n'.join(formatted_lines) + '\n\n'
|
||||
|
||||
if block_start and block_end:
|
||||
# Handle block comments
|
||||
formatted_header = f"{block_start}\n"
|
||||
formatted_header += '\n'.join(f" {line}".rstrip() if line else "" for line in header_lines)
|
||||
formatted_header += f"\n{block_end}\n\n"
|
||||
return formatted_header
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def has_license_header(file_path):
|
||||
"""Checks if a file already has an Apache license header."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
# Read the first 4k of the file, which should be enough for any header.
|
||||
content = f.read(4096)
|
||||
if not "Licensed under the Apache License, Version 2.0" in content:
|
||||
return False
|
||||
if not "The Kubernetes Authors" in content:
|
||||
return False
|
||||
return True
|
||||
except Exception as e:
|
||||
# print(f"Could not read file {file_path}: {e}")
|
||||
return True # Skip file on error
|
||||
|
||||
|
||||
def apply_license_header(file_path, header_text, dry_run=False):
|
||||
"""Applies the license header to a single file if it doesn't have one."""
|
||||
|
||||
file_extension = file_extension_magic(file_path)
|
||||
if not file_extension:
|
||||
# print(f"Skipping (unknown file type): {file_path}")
|
||||
return
|
||||
|
||||
|
||||
if has_license_header(file_path):
|
||||
# print(f"Skipping (header exists): {file_path}")
|
||||
return
|
||||
|
||||
style = get_comment_style(file_extension)
|
||||
if not style:
|
||||
# print(f"Skipping (unsupported extension): {file_path}")
|
||||
return
|
||||
|
||||
formatted_header = format_header(header_text, style)
|
||||
if not formatted_header:
|
||||
# print(f"Skipping (could not format header): {file_path}")
|
||||
return
|
||||
|
||||
print(f"Applying header to: {file_path}")
|
||||
if not dry_run:
|
||||
try:
|
||||
with open(file_path, 'r+', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
f.seek(0, 0)
|
||||
# Handle shebangs (e.g., #!/usr/bin/env python)
|
||||
if content.startswith("#!"):
|
||||
lines = content.split('\n', 1)
|
||||
shebang = lines[0]
|
||||
rest_of_content = lines[1] if len(lines) > 1 else ""
|
||||
f.write(shebang + '\n' + formatted_header + rest_of_content)
|
||||
else:
|
||||
f.write(formatted_header + content)
|
||||
except Exception as e:
|
||||
print(f"Could not write to file {file_path}: {e}")
|
||||
|
||||
|
||||
def _match_path_parts(path_parts, pattern_parts):
|
||||
"""Recursively matches path components against pattern components."""
|
||||
if not pattern_parts:
|
||||
return not path_parts
|
||||
if not path_parts:
|
||||
return pattern_parts == ['**'] or all(p == '' for p in pattern_parts)
|
||||
|
||||
p_part = pattern_parts[0]
|
||||
if p_part == '**':
|
||||
if len(pattern_parts) == 1:
|
||||
return True # `/**` at the end matches everything remaining
|
||||
# `/**/` can match zero or more directories.
|
||||
for i in range(len(path_parts) + 1):
|
||||
if _match_path_parts(path_parts[i:], pattern_parts[1:]):
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
if fnmatch.fnmatch(path_parts[0], p_part):
|
||||
return _match_path_parts(path_parts[1:], pattern_parts[1:])
|
||||
return False
|
||||
|
||||
def is_path_excluded(relative_path, exclude_patterns):
|
||||
"""Checks if a relative path matches any of the .gitignore-style exclude patterns."""
|
||||
relative_path = relative_path.replace(os.path.sep, '/')
|
||||
path_parts = relative_path.split('/')
|
||||
|
||||
for pattern in exclude_patterns:
|
||||
pattern = pattern.replace(os.path.sep, '/')
|
||||
if '/' not in pattern:
|
||||
# If no slash, match against any component of the path
|
||||
if any(fnmatch.fnmatch(part, pattern) for part in path_parts):
|
||||
return True
|
||||
else:
|
||||
# If slash is present, match from the root
|
||||
pattern_parts = pattern.split('/')
|
||||
if _match_path_parts(path_parts, pattern_parts):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def apply_headers_to_tree(root_dir, excludes=None, dry_run=False):
|
||||
"""
|
||||
Applies headers to all files in a repository, respecting excludes.
|
||||
"""
|
||||
year = datetime.datetime.now().year
|
||||
header_text = APACHE_HEADER.format(year=year)
|
||||
|
||||
all_excludes = DEFAULT_EXCLUDES + (excludes or [])
|
||||
print(f"Excluding patterns: {all_excludes}")
|
||||
|
||||
for root, dirs, files in os.walk(root_dir, topdown=True):
|
||||
rel_root = os.path.relpath(root, root_dir)
|
||||
if rel_root == '.':
|
||||
rel_root = ''
|
||||
|
||||
# Filter dirs in-place so os.walk doesn't recurse into them
|
||||
dirs[:] = [d for d in dirs if not is_path_excluded(os.path.join(rel_root, d), all_excludes)]
|
||||
|
||||
for file in files:
|
||||
rel_path = os.path.join(rel_root, file)
|
||||
if is_path_excluded(rel_path, all_excludes):
|
||||
continue
|
||||
|
||||
full_path = os.path.join(root, file)
|
||||
apply_license_header(full_path, header_text, dry_run)
|
||||
Loading…
Reference in New Issue