Use standalone_compile by default in torch >= 2.8.0 (#18846)

Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
Richard Zou 2025-05-29 18:41:58 -04:00 committed by GitHub
parent 64eaf5fe05
commit a521ef06e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 9 deletions

View File

@ -16,7 +16,7 @@ import vllm.envs as envs
from vllm.config import CompilationConfig, VllmConfig
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils import resolve_obj_by_qualname
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
from .compiler_interface import (CompilerInterface, EagerAdaptor,
InductorAdaptor, InductorStandaloneAdaptor)
@ -29,7 +29,8 @@ logger = init_logger(__name__)
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
if compilation_config.use_inductor:
if envs.VLLM_TEST_STANDALONE_COMPILE:
if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
"2.8.0"):
logger.info("Using InductorStandaloneAdaptor")
return InductorStandaloneAdaptor()
else:

View File

@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
This is not on by default yet, but we plan to turn it on by default for
PyTorch 2.8.
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
"""
name = "inductor_standalone"

View File

@ -143,10 +143,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
def get_vllm_port() -> Optional[int]:
"""Get the port from VLLM_PORT environment variable.
Returns:
The port number as an integer if VLLM_PORT is set, None otherwise.
Raises:
ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
"""
@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: bool(
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
# Internal flag to enable/disable Inductor standalone compile
"VLLM_TEST_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
# Feature flag to enable/disable Inductor standalone compile.
# In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
# enabled by default.
"VLLM_USE_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
# local rank of the process in the distributed setting, used to determine
# the GPU device id
@ -892,7 +894,7 @@ def compute_hash() -> str:
"VLLM_USE_TRITON_AWQ",
"VLLM_DP_RANK",
"VLLM_DP_SIZE",
"VLLM_TEST_STANDALONE_COMPILE",
"VLLM_USE_STANDALONE_COMPILE",
]
for key in environment_variables_to_hash:
if key in environment_variables: