mirror of https://github.com/vllm-project/vllm.git
152 lines
5.2 KiB
Python
152 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
import os
|
|
import sys
|
|
|
|
import regex as re
|
|
|
|
try:
|
|
import pathspec
|
|
except ImportError:
|
|
print(
|
|
"ERROR: The 'pathspec' library is required. "
|
|
"Install it with 'pip install pathspec'.",
|
|
file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
# List of files (relative to repo root) that are allowed to import pickle or
|
|
# cloudpickle
|
|
#
|
|
# STOP AND READ BEFORE YOU ADD ANYTHING ELSE TO THIS LIST:
|
|
# The pickle and cloudpickle modules are known to be unsafe when deserializing
|
|
# data from potentially untrusted parties. They have resulted in multiple CVEs
|
|
# for vLLM and numerous vulnerabilities in the Python ecosystem more broadly.
|
|
# Before adding new uses of pickle/cloudpickle, please consider safer
|
|
# alternatives like msgpack or pydantic that are already in use in vLLM. Only
|
|
# add to this list if absolutely necessary and after careful security review.
|
|
ALLOWED_FILES = set([
|
|
# pickle
|
|
'vllm/v1/serial_utils.py',
|
|
'vllm/v1/executor/multiproc_executor.py',
|
|
'vllm/multimodal/hasher.py',
|
|
'vllm/transformers_utils/config.py',
|
|
'vllm/model_executor/models/registry.py',
|
|
'tests/test_utils.py',
|
|
'tests/tokenization/test_cached_tokenizer.py',
|
|
'tests/model_executor/test_guided_processors.py',
|
|
'vllm/distributed/utils.py',
|
|
'vllm/distributed/parallel_state.py',
|
|
'vllm/engine/multiprocessing/client.py',
|
|
'vllm/distributed/device_communicators/custom_all_reduce_utils.py',
|
|
'vllm/distributed/device_communicators/shm_broadcast.py',
|
|
'vllm/engine/multiprocessing/engine.py',
|
|
'benchmarks/kernels/graph_machete_bench.py',
|
|
'benchmarks/kernels/benchmark_lora.py',
|
|
'benchmarks/kernels/benchmark_machete.py',
|
|
'benchmarks/fused_kernels/layernorm_rms_benchmarks.py',
|
|
'benchmarks/cutlass_benchmarks/w8a8_benchmarks.py',
|
|
'benchmarks/cutlass_benchmarks/sparse_benchmarks.py',
|
|
# cloudpickle
|
|
'vllm/worker/worker_base.py',
|
|
'vllm/executor/mp_distributed_executor.py',
|
|
'vllm/executor/ray_distributed_executor.py',
|
|
'vllm/entrypoints/llm.py',
|
|
'tests/utils.py',
|
|
# pickle and cloudpickle
|
|
'vllm/utils/__init__.py',
|
|
'vllm/v1/serial_utils.py',
|
|
'vllm/v1/executor/multiproc_executor.py',
|
|
'vllm/transformers_utils/config.py',
|
|
'vllm/model_executor/models/registry.py',
|
|
'vllm/engine/multiprocessing/client.py',
|
|
'vllm/engine/multiprocessing/engine.py',
|
|
])
|
|
|
|
PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)"
|
|
r"|from\s+(pickle|cloudpickle)\s+import\b)")
|
|
|
|
|
|
def is_python_file(path):
|
|
return path.endswith('.py')
|
|
|
|
|
|
def scan_file(path):
|
|
with open(path, encoding='utf-8') as f:
|
|
for line in f:
|
|
if PICKLE_RE.match(line):
|
|
return True
|
|
return False
|
|
|
|
|
|
def load_gitignore(repo_root):
|
|
gitignore_path = os.path.join(repo_root, '.gitignore')
|
|
patterns = []
|
|
if os.path.exists(gitignore_path):
|
|
with open(gitignore_path, encoding='utf-8') as f:
|
|
patterns = f.read().splitlines()
|
|
# Always ignore .git directory
|
|
patterns.append('.git/')
|
|
return pathspec.PathSpec.from_lines('gitwildmatch', patterns)
|
|
|
|
|
|
def main():
|
|
repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
spec = load_gitignore(repo_root)
|
|
bad_files = []
|
|
for dirpath, _, filenames in os.walk(repo_root):
|
|
for filename in filenames:
|
|
if not is_python_file(filename):
|
|
continue
|
|
abs_path = os.path.join(dirpath, filename)
|
|
rel_path = os.path.relpath(abs_path, repo_root)
|
|
# Skip ignored files
|
|
if spec.match_file(rel_path):
|
|
continue
|
|
if scan_file(abs_path) and rel_path not in ALLOWED_FILES:
|
|
bad_files.append(rel_path)
|
|
if bad_files:
|
|
print("\nERROR: The following files import 'pickle' or 'cloudpickle' "
|
|
"but are not in the allowed list:")
|
|
for f in bad_files:
|
|
print(f" {f}")
|
|
print("\nIf this is intentional, update the allowed list in "
|
|
"tools/check_pickle_imports.py.")
|
|
sys.exit(1)
|
|
sys.exit(0)
|
|
|
|
|
|
def test_regex():
|
|
test_cases = [
|
|
# Should match
|
|
("import pickle", True),
|
|
("import cloudpickle", True),
|
|
("import pickle as pkl", True),
|
|
("import cloudpickle as cpkl", True),
|
|
("from pickle import *", True),
|
|
("from cloudpickle import dumps", True),
|
|
("from pickle import dumps, loads", True),
|
|
("from cloudpickle import (dumps, loads)", True),
|
|
(" import pickle", True),
|
|
("\timport cloudpickle", True),
|
|
("from pickle import loads", True),
|
|
# Should not match
|
|
("import somethingelse", False),
|
|
("from somethingelse import pickle", False),
|
|
("# import pickle", False),
|
|
("print('import pickle')", False),
|
|
("import pickleas as asdf", False),
|
|
]
|
|
for i, (line, should_match) in enumerate(test_cases):
|
|
result = bool(PICKLE_RE.match(line))
|
|
assert result == should_match, (
|
|
f"Test case {i} failed: '{line}' "
|
|
f"(expected {should_match}, got {result})")
|
|
print("All regex tests passed.")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if '--test-regex' in sys.argv:
|
|
test_regex()
|
|
else:
|
|
main()
|