mirror of https://github.com/vllm-project/vllm.git
100 lines
3.0 KiB
Python
100 lines
3.0 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
import pytest
|
|
from PIL import Image
|
|
|
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
|
from vllm.multimodal.parse import ImageSize
|
|
from vllm.multimodal.processing import BaseMultiModalProcessor
|
|
|
|
from ....conftest import ImageTestAssets
|
|
from ...utils import build_model_context
|
|
|
|
|
|
@pytest.mark.parametrize("model_id", ["MiniMaxAI/MiniMax-VL-01"])
|
|
@pytest.mark.parametrize("num_imgs", [1, 2])
|
|
def test_processor_override(
|
|
image_assets: ImageTestAssets,
|
|
model_id: str,
|
|
num_imgs: int,
|
|
):
|
|
ctx = build_model_context(
|
|
model_id,
|
|
mm_processor_kwargs=None,
|
|
limit_mm_per_prompt={"image": num_imgs},
|
|
)
|
|
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
|
|
prompt = "<image>" * num_imgs
|
|
image = Image.new("RGB", size=(364, 364))
|
|
mm_data = {"image": [image] * num_imgs}
|
|
|
|
processed_inputs = processor.apply(prompt, mm_data, {})
|
|
image_placeholders = processed_inputs["mm_placeholders"]["image"]
|
|
|
|
assert len(image_placeholders) == num_imgs
|
|
|
|
|
|
def _validate_image_prompt_replacements_one(
|
|
processor: BaseMultiModalProcessor,
|
|
num_imgs: int,
|
|
failed_size_excs: list[tuple[ImageSize, Exception]],
|
|
image_size: ImageSize,
|
|
) -> None:
|
|
prompt = "<image>" * num_imgs
|
|
image = Image.new("RGB", size=image_size)
|
|
mm_data = {"image": [image] * num_imgs}
|
|
|
|
try:
|
|
processed_inputs = processor.apply(prompt, mm_data, {})
|
|
|
|
image_placeholders = processed_inputs["mm_placeholders"]["image"]
|
|
assert len(image_placeholders) == num_imgs
|
|
|
|
except Exception as exc:
|
|
failed_size_excs.append((image_size, exc))
|
|
|
|
|
|
def _test_image_prompt_replacements(
|
|
processor,
|
|
*,
|
|
num_imgs: int,
|
|
image_sizes: list[ImageSize],
|
|
) -> None:
|
|
|
|
failed_size_excs = list[tuple[ImageSize, Exception]]()
|
|
|
|
for size in image_sizes:
|
|
_validate_image_prompt_replacements_one(processor, num_imgs,
|
|
failed_size_excs, size)
|
|
|
|
if failed_size_excs:
|
|
msg = "Found failing image sizes:" \
|
|
+ "\n========\n".join(f"[{size}]\n{exc}"
|
|
for size, exc in failed_size_excs)
|
|
raise AssertionError(msg)
|
|
|
|
|
|
@pytest.mark.parametrize("model_id", ["MiniMaxAI/MiniMax-VL-01"])
|
|
@pytest.mark.parametrize("num_imgs", [1, 2])
|
|
def test_processor_prompt_replacements_regression(model_id, num_imgs):
|
|
ctx = build_model_context(
|
|
model_id,
|
|
mm_processor_kwargs=None,
|
|
limit_mm_per_prompt={"image": num_imgs},
|
|
)
|
|
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
|
|
|
|
image_ratios = [(171, 152), (184, 161), (198, 176), (333, 296), (369, 328),
|
|
(488, 183), (2560, 1669)]
|
|
image_sizes = [
|
|
size for w, h in image_ratios
|
|
for size in [ImageSize(w, h), ImageSize(h, w)]
|
|
]
|
|
|
|
_test_image_prompt_replacements(
|
|
processor,
|
|
num_imgs=num_imgs,
|
|
image_sizes=image_sizes,
|
|
)
|