diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f8d511a4a..0f09da04ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -436,6 +436,8 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") MIN_VERSION 12.0 ARCHS "${MACHETE_ARCHS}" GEN_SCRIPT "${CMAKE_CURRENT_SOURCE_DIR}/csrc/quantization/machete/generate.py" + GEN_PYTHONPATH_PREPEND + "${CMAKE_CURRENT_SOURCE_DIR}/csrc/cutlass_extensions/:${CUTLASS_DIR}/python/" GEN_GLOB "csrc/quantization/machete/generated/*.cu" SRCS "csrc/quantization/machete/machete_pytorch.cu" VERSION_MSG diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 37e8710a3b..3b9815d70c 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -40,16 +40,17 @@ function (run_python OUT EXPR ERR_MSG) endfunction() # Generate CUDA sources via a Python script with caching based on script hash -function(generate_cuda_sources NAME SCRIPT GLOB OUT_SRCS) +function(generate_cuda_sources NAME SCRIPT PYTHONPATH_PREPEND GLOB OUT_SRCS) string(TOUPPER "${NAME}" _UPPER_NAME) set(_CACHE_VAR "${_UPPER_NAME}_GEN_SCRIPT_HASH") file(MD5 "${SCRIPT}" _GEN_HASH) message(STATUS "${NAME} generation script hash: ${_GEN_HASH}") message(STATUS "Last run ${NAME} generation script hash: $CACHE{${_CACHE_VAR}}") if(NOT DEFINED CACHE{${_CACHE_VAR}} OR NOT $CACHE{${_CACHE_VAR}} STREQUAL "${_GEN_HASH}") + message(STATUS "Running ${NAME} generation script: ${SCRIPT}, with PYTHONPATH prepend: ${PYTHONPATH_PREPEND}") execute_process( COMMAND ${CMAKE_COMMAND} -E env - PYTHONPATH=$ENV{PYTHONPATH} + PYTHONPATH=${PYTHONPATH_PREPEND}:$ENV{PYTHONPATH} ${Python_EXECUTABLE} "${SCRIPT}" RESULT_VARIABLE _GEN_RESULT OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${NAME}_generation.log" @@ -126,6 +127,7 @@ endfunction() # [VERSION_MSG [ ...]] # [NO_ARCH_MSG [ ...]] # [GEN_SCRIPT ] +# [GEN_PYTHONPATH_PREPEND ] # [GEN_GLOB ] # This will check if `CMAKE_CUDA_COMPILER_VERSION` is greater than or equal # to `MIN_VERSION` and the `cuda_archs_loose_intersection` of `ARCHS` and @@ -137,10 +139,11 @@ endfunction() # 3) append the flags in `FLAGS` to the global `VLLM_GPU_FLAGS` variable. # # This will also run GEN_SCRIPT (if supplied and the hash of the script does not -# match the latest in the cmake cache), before globbing sources matching -# GEN_GLOB and appending them alongside SRCS (with the gencodes set) +# match the latest in the cmake cache), with GEN_PYTHONPATH_PREPEND prepended to +# to the PYTHONPATH when calling before globbing sources matching GEN_GLOB +# and appending them alongside SRCS (with the gencodes set) macro(optional_cuda_sources) - set(oneValueArgs NAME MIN_VERSION GEN_SCRIPT GEN_GLOB OUT_SRCS_VAR) + set(oneValueArgs NAME MIN_VERSION GEN_SCRIPT GEN_PYTHONPATH_PREPEND GEN_GLOB OUT_SRCS_VAR) set(multiValueArgs ARCHS SRCS FLAGS VERSION_MSG NO_ARCH_MSG) cmake_parse_arguments(OCS "" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if(NOT OCS_NAME) @@ -161,7 +164,8 @@ macro(optional_cuda_sources) set(_OCS_SRCS ${OCS_SRCS}) # Generate sources if a script is provided if(OCS_GEN_SCRIPT AND OCS_GEN_GLOB) - generate_cuda_sources(${OCS_NAME} "${OCS_GEN_SCRIPT}" "${OCS_GEN_GLOB}" _OCS_GEN_SRCS) + generate_cuda_sources( + ${OCS_NAME} "${OCS_GEN_SCRIPT}" "${OCS_GEN_PYTHONPATH_PREPEND}" "${OCS_GEN_GLOB}" _OCS_GEN_SRCS) list(APPEND _OCS_SRCS ${_OCS_GEN_SRCS}) endif() set_gencode_flags_for_srcs(SRCS "${_OCS_SRCS}" CUDA_ARCHS "${_OCS_ARCHS}")