267 lines
8.9 KiB
Python
Executable File
267 lines
8.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
# Copyright 2025 The Kubernetes Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import fnmatch
|
|
import argparse
|
|
import datetime
|
|
|
|
# The license header to apply
|
|
APACHE_HEADER = """Copyright {year} The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
"""
|
|
|
|
# Mapping of file extensions to their comment syntax
|
|
# (line_prefix, block_start, block_end)
|
|
COMMENT_STYLES = {
|
|
".go": ("// ", None, None),
|
|
".sh": ("# ", None, None),
|
|
".py": ("# ", None, None),
|
|
".js": ("// ", None, None),
|
|
".ts": ("// ", None, None),
|
|
".java": ("// ", None, None),
|
|
".scala": ("// ", None, None),
|
|
".c": ("// ", None, None),
|
|
".h": ("// ", None, None),
|
|
".cpp": ("// ", None, None),
|
|
".tf": ("# ", None, None),
|
|
# Block comments for file types that support them
|
|
".css": (None, "/*", " */"),
|
|
".xml": (None, "<!--", "-->"),
|
|
".html": (None, "<!--", "-->"),
|
|
}
|
|
|
|
# Default glob patterns to exclude, relative to the root directory
|
|
DEFAULT_EXCLUDES = [
|
|
".git/**",
|
|
".idea/**",
|
|
"__pycache__/**",
|
|
"node_modules/**",
|
|
"vendor/**",
|
|
"**/*.yaml",
|
|
"**/*.yml",
|
|
"**/LICENSE",
|
|
"**/*.md",
|
|
"**/OWNERS",
|
|
"**/SECURITY_CONTACTS",
|
|
"go.mod",
|
|
"go.sum",
|
|
"*.json",
|
|
"*.pyc",
|
|
"*.so",
|
|
"*.o",
|
|
"*.a",
|
|
"*.dll",
|
|
"*.exe",
|
|
"*.jar",
|
|
"*.class",
|
|
"*.zip",
|
|
"*.tar.gz",
|
|
"*.tgz",
|
|
"*.rar",
|
|
"*.7z",
|
|
"*.log",
|
|
"*.sum",
|
|
"*.DS_Store",
|
|
]
|
|
|
|
def file_extension_magic(file_path):
|
|
"""Tries to determine the file type, as encoded by a typical extension."""
|
|
# Default to the file extension
|
|
_, ext = os.path.splitext(file_path)
|
|
if ext:
|
|
return ext
|
|
# Look for a shebang line
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
# Read the first 4k of the file, which should be enough for any header.
|
|
try:
|
|
content = f.read(4096)
|
|
except UnicodeDecodeError:
|
|
# Likely a binary file
|
|
return None
|
|
# First line is shebang (e.g., #!/usr/bin/env python)
|
|
first_line = content.split('\n', 1)[0]
|
|
if first_line.startswith("#!"):
|
|
if "python" in first_line:
|
|
return ".py"
|
|
if "bash" in first_line or "sh" in first_line:
|
|
return ".sh"
|
|
print((f"unknown shebang in {file_path}: {first_line}"))
|
|
return None
|
|
|
|
def get_comment_style(file_extension):
|
|
"""Gets the comment style for a file based on its extension."""
|
|
return COMMENT_STYLES.get(file_extension)
|
|
|
|
def format_header(header_text, style):
|
|
"""Formats the header text with the correct comment style."""
|
|
line_prefix, block_start, block_end = style
|
|
|
|
# Add a space for line prefixes if they don't have one
|
|
if line_prefix and not line_prefix.endswith(' '):
|
|
line_prefix += ' '
|
|
|
|
header_lines = header_text.strip().split('\n')
|
|
|
|
if line_prefix:
|
|
# Handle empty lines in header correctly
|
|
formatted_lines = [f"{line_prefix}{line}".rstrip() if line else line_prefix.rstrip() for line in header_lines]
|
|
return '\n'.join(formatted_lines) + '\n\n'
|
|
|
|
if block_start and block_end:
|
|
# Handle block comments
|
|
formatted_header = f"{block_start}\n"
|
|
formatted_header += '\n'.join(f" {line}".rstrip() if line else "" for line in header_lines)
|
|
formatted_header += f"\n{block_end}\n\n"
|
|
return formatted_header
|
|
|
|
return None
|
|
|
|
|
|
def has_license_header(file_path):
|
|
"""Checks if a file already has an Apache license header."""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
# Read the first 4k of the file, which should be enough for any header.
|
|
content = f.read(4096)
|
|
if not "Licensed under the Apache License, Version 2.0" in content:
|
|
return False
|
|
if not "The Kubernetes Authors" in content:
|
|
return False
|
|
return True
|
|
except Exception as e:
|
|
# print(f"Could not read file {file_path}: {e}")
|
|
return True # Skip file on error
|
|
|
|
|
|
def apply_license_header(file_path, header_text, dry_run=False):
|
|
"""Applies the license header to a single file if it doesn't have one."""
|
|
|
|
file_extension = file_extension_magic(file_path)
|
|
if not file_extension:
|
|
# print(f"Skipping (unknown file type): {file_path}")
|
|
return
|
|
|
|
|
|
if has_license_header(file_path):
|
|
# print(f"Skipping (header exists): {file_path}")
|
|
return
|
|
|
|
style = get_comment_style(file_extension)
|
|
if not style:
|
|
# print(f"Skipping (unsupported extension): {file_path}")
|
|
return
|
|
|
|
formatted_header = format_header(header_text, style)
|
|
if not formatted_header:
|
|
# print(f"Skipping (could not format header): {file_path}")
|
|
return
|
|
|
|
print(f"Applying header to: {file_path}")
|
|
if not dry_run:
|
|
try:
|
|
with open(file_path, 'r+', encoding='utf-8') as f:
|
|
content = f.read()
|
|
f.seek(0, 0)
|
|
# Handle shebangs (e.g., #!/usr/bin/env python)
|
|
if content.startswith("#!"):
|
|
lines = content.split('\n', 1)
|
|
shebang = lines[0]
|
|
rest_of_content = lines[1] if len(lines) > 1 else ""
|
|
f.write(shebang + '\n' + formatted_header + rest_of_content)
|
|
else:
|
|
f.write(formatted_header + content)
|
|
except Exception as e:
|
|
print(f"Could not write to file {file_path}: {e}")
|
|
|
|
|
|
def _match_path_parts(path_parts, pattern_parts):
|
|
"""Recursively matches path components against pattern components."""
|
|
if not pattern_parts:
|
|
return not path_parts
|
|
if not path_parts:
|
|
return pattern_parts == ['**'] or all(p == '' for p in pattern_parts)
|
|
|
|
p_part = pattern_parts[0]
|
|
if p_part == '**':
|
|
if len(pattern_parts) == 1:
|
|
return True # `/**` at the end matches everything remaining
|
|
# `/**/` can match zero or more directories.
|
|
for i in range(len(path_parts) + 1):
|
|
if _match_path_parts(path_parts[i:], pattern_parts[1:]):
|
|
return True
|
|
return False
|
|
else:
|
|
if fnmatch.fnmatch(path_parts[0], p_part):
|
|
return _match_path_parts(path_parts[1:], pattern_parts[1:])
|
|
return False
|
|
|
|
def is_path_excluded(relative_path, exclude_patterns):
|
|
"""Checks if a relative path matches any of the .gitignore-style exclude patterns."""
|
|
relative_path = relative_path.replace(os.path.sep, '/')
|
|
path_parts = relative_path.split('/')
|
|
|
|
for pattern in exclude_patterns:
|
|
pattern = pattern.replace(os.path.sep, '/')
|
|
if '/' not in pattern:
|
|
# If no slash, match against any component of the path
|
|
if any(fnmatch.fnmatch(part, pattern) for part in path_parts):
|
|
return True
|
|
else:
|
|
# If slash is present, match from the root
|
|
pattern_parts = pattern.split('/')
|
|
if _match_path_parts(path_parts, pattern_parts):
|
|
return True
|
|
return False
|
|
|
|
|
|
def apply_headers_to_tree(root_dir, excludes=None, dry_run=False):
|
|
"""
|
|
Applies headers to all files in a repository, respecting excludes.
|
|
"""
|
|
year = datetime.datetime.now().year
|
|
header_text = APACHE_HEADER.format(year=year)
|
|
|
|
all_excludes = DEFAULT_EXCLUDES + (excludes or [])
|
|
print(f"Excluding patterns: {all_excludes}")
|
|
|
|
for root, dirs, files in os.walk(root_dir, topdown=True):
|
|
rel_root = os.path.relpath(root, root_dir)
|
|
if rel_root == '.':
|
|
rel_root = ''
|
|
|
|
# Filter dirs in-place so os.walk doesn't recurse into them
|
|
dirs[:] = [d for d in dirs if not is_path_excluded(os.path.join(rel_root, d), all_excludes)]
|
|
|
|
for file in files:
|
|
rel_path = os.path.join(rel_root, file)
|
|
if is_path_excluded(rel_path, all_excludes):
|
|
continue
|
|
|
|
full_path = os.path.join(root, file)
|
|
apply_license_header(full_path, header_text, dry_run)
|