mirror of https://github.com/vllm-project/vllm.git
Use standalone_compile by default in torch >= 2.8.0 (#18846)
Signed-off-by: rzou <zou3519@gmail.com>
This commit is contained in:
parent
64eaf5fe05
commit
a521ef06e5
|
@ -16,7 +16,7 @@ import vllm.envs as envs
|
|||
from vllm.config import CompilationConfig, VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import resolve_obj_by_qualname
|
||||
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
|
||||
|
||||
from .compiler_interface import (CompilerInterface, EagerAdaptor,
|
||||
InductorAdaptor, InductorStandaloneAdaptor)
|
||||
|
@ -29,7 +29,8 @@ logger = init_logger(__name__)
|
|||
|
||||
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
|
||||
if compilation_config.use_inductor:
|
||||
if envs.VLLM_TEST_STANDALONE_COMPILE:
|
||||
if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
|
||||
"2.8.0"):
|
||||
logger.info("Using InductorStandaloneAdaptor")
|
||||
return InductorStandaloneAdaptor()
|
||||
else:
|
||||
|
|
|
@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
|
|||
This is not on by default yet, but we plan to turn it on by default for
|
||||
PyTorch 2.8.
|
||||
|
||||
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
|
||||
Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
|
||||
"""
|
||||
name = "inductor_standalone"
|
||||
|
||||
|
|
14
vllm/envs.py
14
vllm/envs.py
|
@ -143,10 +143,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
|
|||
|
||||
def get_vllm_port() -> Optional[int]:
|
||||
"""Get the port from VLLM_PORT environment variable.
|
||||
|
||||
|
||||
Returns:
|
||||
The port number as an integer if VLLM_PORT is set, None otherwise.
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
|
||||
"""
|
||||
|
@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||
lambda: bool(
|
||||
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
|
||||
|
||||
# Internal flag to enable/disable Inductor standalone compile
|
||||
"VLLM_TEST_STANDALONE_COMPILE":
|
||||
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
|
||||
# Feature flag to enable/disable Inductor standalone compile.
|
||||
# In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
|
||||
# enabled by default.
|
||||
"VLLM_USE_STANDALONE_COMPILE":
|
||||
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
|
||||
|
||||
# local rank of the process in the distributed setting, used to determine
|
||||
# the GPU device id
|
||||
|
@ -892,7 +894,7 @@ def compute_hash() -> str:
|
|||
"VLLM_USE_TRITON_AWQ",
|
||||
"VLLM_DP_RANK",
|
||||
"VLLM_DP_SIZE",
|
||||
"VLLM_TEST_STANDALONE_COMPILE",
|
||||
"VLLM_USE_STANDALONE_COMPILE",
|
||||
]
|
||||
for key in environment_variables_to_hash:
|
||||
if key in environment_variables:
|
||||
|
|
Loading…
Reference in New Issue