mirror of https://github.com/dragonflyoss/api.git
1760 lines
47 KiB
Protocol Buffer
1760 lines
47 KiB
Protocol Buffer
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// * Neither the name of NVIDIA CORPORATION nor the names of its
|
|
// contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
syntax = "proto3";
|
|
|
|
package inference;
|
|
|
|
//@@.. cpp:namespace:: inference
|
|
|
|
import "pkg/apis/inference/model_config.proto";
|
|
|
|
option go_package = "d7y.io/api/v2/pkg/apis/inference;inference";
|
|
|
|
//@@
|
|
//@@.. cpp:var:: service InferenceService
|
|
//@@
|
|
//@@ Inference Server GRPC endpoints.
|
|
//@@
|
|
service GRPCInferenceService
|
|
{
|
|
//@@ .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns
|
|
//@@ (ServerLiveResponse)
|
|
//@@
|
|
//@@ Check liveness of the inference server.
|
|
//@@
|
|
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns
|
|
//@@ (ServerReadyResponse)
|
|
//@@
|
|
//@@ Check readiness of the inference server.
|
|
//@@
|
|
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns
|
|
//@@ (ModelReadyResponse)
|
|
//@@
|
|
//@@ Check readiness of a model in the inference server.
|
|
//@@
|
|
rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns
|
|
//@@ (ServerMetadataResponse)
|
|
//@@
|
|
//@@ Get server metadata.
|
|
//@@
|
|
rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
|
|
//@@ (ModelMetadataResponse)
|
|
//@@
|
|
//@@ Get model metadata.
|
|
//@@
|
|
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
|
|
//@@ (ModelInferResponse)
|
|
//@@
|
|
//@@ Perform inference using a specific model.
|
|
//@@
|
|
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
|
|
//@@ (stream ModelStreamInferResponse)
|
|
//@@
|
|
//@@ Perform streaming inference.
|
|
//@@
|
|
rpc ModelStreamInfer(stream ModelInferRequest)
|
|
returns (stream ModelStreamInferResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
|
|
//@@ (ModelConfigResponse)
|
|
//@@
|
|
//@@ Get model configuration.
|
|
//@@
|
|
rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc ModelStatistics(
|
|
//@@ ModelStatisticsRequest)
|
|
//@@ returns (ModelStatisticsResponse)
|
|
//@@
|
|
//@@ Get the cumulative inference statistics for a model.
|
|
//@@
|
|
rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns
|
|
//@@ (RepositoryIndexResponse)
|
|
//@@
|
|
//@@ Get the index of model repository contents.
|
|
//@@
|
|
rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns
|
|
//@@ (RepositoryModelLoadResponse)
|
|
//@@
|
|
//@@ Load or reload a model from a repository.
|
|
//@@
|
|
rpc RepositoryModelLoad(RepositoryModelLoadRequest)
|
|
returns (RepositoryModelLoadResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
|
|
//@@ returns (RepositoryModelUnloadResponse)
|
|
//@@
|
|
//@@ Unload a model.
|
|
//@@
|
|
rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
|
|
returns (RepositoryModelUnloadResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc SystemSharedMemoryStatus(
|
|
//@@ SystemSharedMemoryStatusRequest)
|
|
//@@ returns (SystemSharedMemoryStatusRespose)
|
|
//@@
|
|
//@@ Get the status of all registered system-shared-memory regions.
|
|
//@@
|
|
rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
|
|
returns (SystemSharedMemoryStatusResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc SystemSharedMemoryRegister(
|
|
//@@ SystemSharedMemoryRegisterRequest)
|
|
//@@ returns (SystemSharedMemoryRegisterResponse)
|
|
//@@
|
|
//@@ Register a system-shared-memory region.
|
|
//@@
|
|
rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
|
|
returns (SystemSharedMemoryRegisterResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc SystemSharedMemoryUnregister(
|
|
//@@ SystemSharedMemoryUnregisterRequest)
|
|
//@@ returns (SystemSharedMemoryUnregisterResponse)
|
|
//@@
|
|
//@@ Unregister a system-shared-memory region.
|
|
//@@
|
|
rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
|
|
returns (SystemSharedMemoryUnregisterResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc CudaSharedMemoryStatus(
|
|
//@@ CudaSharedMemoryStatusRequest)
|
|
//@@ returns (CudaSharedMemoryStatusRespose)
|
|
//@@
|
|
//@@ Get the status of all registered CUDA-shared-memory regions.
|
|
//@@
|
|
rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
|
|
returns (CudaSharedMemoryStatusResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc CudaSharedMemoryRegister(
|
|
//@@ CudaSharedMemoryRegisterRequest)
|
|
//@@ returns (CudaSharedMemoryRegisterResponse)
|
|
//@@
|
|
//@@ Register a CUDA-shared-memory region.
|
|
//@@
|
|
rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
|
|
returns (CudaSharedMemoryRegisterResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc CudaSharedMemoryUnregister(
|
|
//@@ CudaSharedMemoryUnregisterRequest)
|
|
//@@ returns (CudaSharedMemoryUnregisterResponse)
|
|
//@@
|
|
//@@ Unregister a CUDA-shared-memory region.
|
|
//@@
|
|
rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest)
|
|
returns (CudaSharedMemoryUnregisterResponse)
|
|
{
|
|
}
|
|
|
|
//@@ .. cpp:var:: rpc TraceSetting(TraceSettingRequest)
|
|
//@@ returns (TraceSettingResponse)
|
|
//@@
|
|
//@@ Update and get the trace setting of the Triton server.
|
|
//@@
|
|
rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse) {}
|
|
|
|
//@@ .. cpp:var:: rpc LogSettings(LogSettingsRequest)
|
|
//@@ returns (LogSettingsResponse)
|
|
//@@
|
|
//@@ Update and get the log settings of the Triton server.
|
|
//@@
|
|
rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse) {}
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerLiveRequest
|
|
//@@
|
|
//@@ Request message for ServerLive.
|
|
//@@
|
|
message ServerLiveRequest {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerLiveResponse
|
|
//@@
|
|
//@@ Response message for ServerLive.
|
|
//@@
|
|
message ServerLiveResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: bool live
|
|
//@@
|
|
//@@ True if the inference server is live, false it not live.
|
|
//@@
|
|
bool live = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerReadyRequest
|
|
//@@
|
|
//@@ Request message for ServerReady.
|
|
//@@
|
|
message ServerReadyRequest {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerReadyResponse
|
|
//@@
|
|
//@@ Response message for ServerReady.
|
|
//@@
|
|
message ServerReadyResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: bool ready
|
|
//@@
|
|
//@@ True if the inference server is ready, false it not ready.
|
|
//@@
|
|
bool ready = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelReadyRequest
|
|
//@@
|
|
//@@ Request message for ModelReady.
|
|
//@@
|
|
message ModelReadyRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model to check for readiness.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model to check for readiness. If not given the
|
|
//@@ server will choose a version based on the model and internal policy.
|
|
//@@
|
|
string version = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelReadyResponse
|
|
//@@
|
|
//@@ Response message for ModelReady.
|
|
//@@
|
|
message ModelReadyResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: bool ready
|
|
//@@
|
|
//@@ True if the model is ready, false it not ready.
|
|
//@@
|
|
bool ready = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerMetadataRequest
|
|
//@@
|
|
//@@ Request message for ServerMetadata.
|
|
//@@
|
|
message ServerMetadataRequest {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ServerMetadataResponse
|
|
//@@
|
|
//@@ Response message for ServerMetadata.
|
|
//@@
|
|
message ServerMetadataResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The server name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The server version.
|
|
//@@
|
|
string version = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string extensions (repeated)
|
|
//@@
|
|
//@@ The extensions supported by the server.
|
|
//@@
|
|
repeated string extensions = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelMetadataRequest
|
|
//@@
|
|
//@@ Request message for ModelMetadata.
|
|
//@@
|
|
message ModelMetadataRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model to check for readiness. If not
|
|
//@@ given the server will choose a version based on the
|
|
//@@ model and internal policy.
|
|
//@@
|
|
string version = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelMetadataResponse
|
|
//@@
|
|
//@@ Response message for ModelMetadata.
|
|
//@@
|
|
message ModelMetadataResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message TensorMetadata
|
|
//@@
|
|
//@@ Metadata for a tensor.
|
|
//@@
|
|
message TensorMetadata
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The tensor name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string datatype
|
|
//@@
|
|
//@@ The tensor data type.
|
|
//@@
|
|
string datatype = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: int64 shape (repeated)
|
|
//@@
|
|
//@@ The tensor shape. A variable-size dimension is represented
|
|
//@@ by a -1 value.
|
|
//@@
|
|
repeated int64 shape = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The model name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string versions (repeated)
|
|
//@@
|
|
//@@ The versions of the model.
|
|
//@@
|
|
repeated string versions = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string platform
|
|
//@@
|
|
//@@ The model's platform.
|
|
//@@
|
|
string platform = 3;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: TensorMetadata inputs (repeated)
|
|
//@@
|
|
//@@ The model's inputs.
|
|
//@@
|
|
repeated TensorMetadata inputs = 4;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: TensorMetadata outputs (repeated)
|
|
//@@
|
|
//@@ The model's outputs.
|
|
//@@
|
|
repeated TensorMetadata outputs = 5;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message InferParameter
|
|
//@@
|
|
//@@ An inference parameter value.
|
|
//@@
|
|
message InferParameter
|
|
{
|
|
//@@ .. cpp:var:: oneof parameter_choice
|
|
//@@
|
|
//@@ The parameter value can be a string, an int64,
|
|
//@@ an uint64, a double, or a boolean
|
|
//@@
|
|
//@@ Note: double and uint64 are currently
|
|
//@@ placeholders for future use and
|
|
//@@ are not supported for custom parameters
|
|
//@@
|
|
oneof parameter_choice
|
|
{
|
|
//@@ .. cpp:var:: bool bool_param
|
|
//@@
|
|
//@@ A boolean parameter value.
|
|
//@@
|
|
bool bool_param = 1;
|
|
|
|
//@@ .. cpp:var:: int64 int64_param
|
|
//@@
|
|
//@@ An int64 parameter value.
|
|
//@@
|
|
int64 int64_param = 2;
|
|
|
|
//@@ .. cpp:var:: string string_param
|
|
//@@
|
|
//@@ A string parameter value.
|
|
//@@
|
|
string string_param = 3;
|
|
|
|
//@@ .. cpp:var:: double double_param
|
|
//@@
|
|
//@@ A double parameter value.
|
|
//@@
|
|
//@@ Not supported for custom parameters
|
|
//@@
|
|
double double_param = 4;
|
|
|
|
//@@ .. cpp:var:: uint64 uint64_param
|
|
//@@
|
|
//@@ A uint64 parameter value.
|
|
//@@
|
|
//@@ Not supported for custom parameters
|
|
//@@
|
|
uint64 uint64_param = 5;
|
|
}
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message InferTensorContents
|
|
//@@
|
|
//@@ The data contained in a tensor represented by the repeated type
|
|
//@@ that matches the tensor's data type. Protobuf oneof is not used
|
|
//@@ because oneofs cannot contain repeated fields.
|
|
//@@
|
|
message InferTensorContents
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: bool bool_contents (repeated)
|
|
//@@
|
|
//@@ Representation for BOOL data type. The size must match what is
|
|
//@@ expected by the tensor's shape. The contents must be the flattened,
|
|
//@@ one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated bool bool_contents = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: int32 int_contents (repeated)
|
|
//@@
|
|
//@@ Representation for INT8, INT16, and INT32 data types. The size
|
|
//@@ must match what is expected by the tensor's shape. The contents
|
|
//@@ must be the flattened, one-dimensional, row-major order of the
|
|
//@@ tensor elements.
|
|
//@@
|
|
repeated int32 int_contents = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: int64 int64_contents (repeated)
|
|
//@@
|
|
//@@ Representation for INT64 data types. The size must match what
|
|
//@@ is expected by the tensor's shape. The contents must be the
|
|
//@@ flattened, one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated int64 int64_contents = 3;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: uint32 uint_contents (repeated)
|
|
//@@
|
|
//@@ Representation for UINT8, UINT16, and UINT32 data types. The size
|
|
//@@ must match what is expected by the tensor's shape. The contents
|
|
//@@ must be the flattened, one-dimensional, row-major order of the
|
|
//@@ tensor elements.
|
|
//@@
|
|
repeated uint32 uint_contents = 4;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: uint64 uint64_contents (repeated)
|
|
//@@
|
|
//@@ Representation for UINT64 data types. The size must match what
|
|
//@@ is expected by the tensor's shape. The contents must be the
|
|
//@@ flattened, one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated uint64 uint64_contents = 5;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: float fp32_contents (repeated)
|
|
//@@
|
|
//@@ Representation for FP32 data type. The size must match what is
|
|
//@@ expected by the tensor's shape. The contents must be the flattened,
|
|
//@@ one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated float fp32_contents = 6;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: double fp64_contents (repeated)
|
|
//@@
|
|
//@@ Representation for FP64 data type. The size must match what is
|
|
//@@ expected by the tensor's shape. The contents must be the flattened,
|
|
//@@ one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated double fp64_contents = 7;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: bytes bytes_contents (repeated)
|
|
//@@
|
|
//@@ Representation for BYTES data type. The size must match what is
|
|
//@@ expected by the tensor's shape. The contents must be the flattened,
|
|
//@@ one-dimensional, row-major order of the tensor elements.
|
|
//@@
|
|
repeated bytes bytes_contents = 8;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelInferRequest
|
|
//@@
|
|
//@@ Request message for ModelInfer.
|
|
//@@
|
|
message ModelInferRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message InferInputTensor
|
|
//@@
|
|
//@@ An input tensor for an inference request.
|
|
//@@
|
|
message InferInputTensor
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The tensor name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string datatype
|
|
//@@
|
|
//@@ The tensor data type.
|
|
//@@
|
|
string datatype = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: int64 shape (repeated)
|
|
//@@
|
|
//@@ The tensor shape.
|
|
//@@
|
|
repeated int64 shape = 3;
|
|
|
|
//@@ .. cpp:var:: map<string,InferParameter> parameters
|
|
//@@
|
|
//@@ Optional inference input tensor parameters.
|
|
//@@
|
|
map<string, InferParameter> parameters = 4;
|
|
|
|
//@@ .. cpp:var:: InferTensorContents contents
|
|
//@@
|
|
//@@ The tensor contents using a data-type format. This field
|
|
//@@ must not be specified if tensor contents are being specified
|
|
//@@ in ModelInferRequest.raw_input_contents.
|
|
//@@
|
|
InferTensorContents contents = 5;
|
|
}
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: message InferRequestedOutputTensor
|
|
//@@
|
|
//@@ An output tensor requested for an inference request.
|
|
//@@
|
|
message InferRequestedOutputTensor
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The tensor name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: map<string,InferParameter> parameters
|
|
//@@
|
|
//@@ Optional requested output tensor parameters.
|
|
//@@
|
|
map<string, InferParameter> parameters = 2;
|
|
}
|
|
|
|
//@@ .. cpp:var:: string model_name
|
|
//@@
|
|
//@@ The name of the model to use for inferencing.
|
|
//@@
|
|
string model_name = 1;
|
|
|
|
//@@ .. cpp:var:: string model_version
|
|
//@@
|
|
//@@ The version of the model to use for inference. If not
|
|
//@@ given the latest/most-recent version of the model is used.
|
|
//@@
|
|
string model_version = 2;
|
|
|
|
//@@ .. cpp:var:: string id
|
|
//@@
|
|
//@@ Optional identifier for the request. If specified will be
|
|
//@@ returned in the response.
|
|
//@@
|
|
string id = 3;
|
|
|
|
//@@ .. cpp:var:: map<string,InferParameter> parameters
|
|
//@@
|
|
//@@ Optional inference parameters.
|
|
//@@
|
|
map<string, InferParameter> parameters = 4;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: InferInputTensor inputs (repeated)
|
|
//@@
|
|
//@@ The input tensors for the inference.
|
|
//@@
|
|
repeated InferInputTensor inputs = 5;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
|
|
//@@
|
|
//@@ The requested output tensors for the inference. Optional, if not
|
|
//@@ specified all outputs specified in the model config will be
|
|
//@@ returned.
|
|
//@@
|
|
repeated InferRequestedOutputTensor outputs = 6;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: bytes raw_input_contents
|
|
//@@
|
|
//@@ The data contained in an input tensor can be represented in
|
|
//@@ "raw" bytes form or in the repeated type that matches the
|
|
//@@ tensor's data type. Using the "raw" bytes form will
|
|
//@@ typically allow higher performance due to the way protobuf
|
|
//@@ allocation and reuse interacts with GRPC. For example, see
|
|
//@@ https://github.com/grpc/grpc/issues/23231.
|
|
//@@
|
|
//@@ To use the raw representation 'raw_input_contents' must be
|
|
//@@ initialized with data for each tensor in the same order as
|
|
//@@ 'inputs'. For each tensor, the size of this content must
|
|
//@@ match what is expected by the tensor's shape and data
|
|
//@@ type. The raw data must be the flattened, one-dimensional,
|
|
//@@ row-major order of the tensor elements without any stride
|
|
//@@ or padding between the elements. Note that the FP16 and BF16 data
|
|
//@@ types must be represented as raw content as there is no
|
|
//@@ specific data type for a 16-bit float type.
|
|
//@@
|
|
//@@ If this field is specified then InferInputTensor::contents
|
|
//@@ must not be specified for any input tensor.
|
|
//@@
|
|
repeated bytes raw_input_contents = 7;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelInferResponse
|
|
//@@
|
|
//@@ Response message for ModelInfer.
|
|
//@@
|
|
message ModelInferResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message InferOutputTensor
|
|
//@@
|
|
//@@ An output tensor returned for an inference request.
|
|
//@@
|
|
message InferOutputTensor
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The tensor name.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string datatype
|
|
//@@
|
|
//@@ The tensor data type.
|
|
//@@
|
|
string datatype = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: int64 shape (repeated)
|
|
//@@
|
|
//@@ The tensor shape.
|
|
//@@
|
|
repeated int64 shape = 3;
|
|
|
|
//@@ .. cpp:var:: map<string,InferParameter> parameters
|
|
//@@
|
|
//@@ Optional output tensor parameters.
|
|
//@@
|
|
map<string, InferParameter> parameters = 4;
|
|
|
|
//@@ .. cpp:var:: InferTensorContents contents
|
|
//@@
|
|
//@@ The tensor contents using a data-type format. This field
|
|
//@@ must not be specified if tensor contents are being specified
|
|
//@@ in ModelInferResponse.raw_output_contents.
|
|
//@@
|
|
InferTensorContents contents = 5;
|
|
}
|
|
|
|
//@@ .. cpp:var:: string model_name
|
|
//@@
|
|
//@@ The name of the model used for inference.
|
|
//@@
|
|
string model_name = 1;
|
|
|
|
//@@ .. cpp:var:: string model_version
|
|
//@@
|
|
//@@ The version of the model used for inference.
|
|
//@@
|
|
string model_version = 2;
|
|
|
|
//@@ .. cpp:var:: string id
|
|
//@@
|
|
//@@ The id of the inference request if one was specified.
|
|
//@@
|
|
string id = 3;
|
|
|
|
//@@ .. cpp:var:: map<string,InferParameter> parameters
|
|
//@@
|
|
//@@ Optional inference response parameters.
|
|
//@@
|
|
map<string, InferParameter> parameters = 4;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: InferOutputTensor outputs (repeated)
|
|
//@@
|
|
//@@ The output tensors holding inference results.
|
|
//@@
|
|
repeated InferOutputTensor outputs = 5;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: bytes raw_output_contents
|
|
//@@
|
|
//@@ The data contained in an output tensor can be represented in
|
|
//@@ "raw" bytes form or in the repeated type that matches the
|
|
//@@ tensor's data type. Using the "raw" bytes form will
|
|
//@@ typically allow higher performance due to the way protobuf
|
|
//@@ allocation and reuse interacts with GRPC. For example, see
|
|
//@@ https://github.com/grpc/grpc/issues/23231.
|
|
//@@
|
|
//@@ To use the raw representation 'raw_output_contents' must be
|
|
//@@ initialized with data for each tensor in the same order as
|
|
//@@ 'outputs'. For each tensor, the size of this content must
|
|
//@@ match what is expected by the tensor's shape and data
|
|
//@@ type. The raw data must be the flattened, one-dimensional,
|
|
//@@ row-major order of the tensor elements without any stride
|
|
//@@ or padding between the elements. Note that the FP16 and BF16 data
|
|
//@@ types must be represented as raw content as there is no
|
|
//@@ specific data type for a 16-bit float type.
|
|
//@@
|
|
//@@ If this field is specified then InferOutputTensor::contents
|
|
//@@ must not be specified for any output tensor.
|
|
//@@
|
|
repeated bytes raw_output_contents = 6;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelStreamInferResponse
|
|
//@@
|
|
//@@ Response message for ModelStreamInfer.
|
|
//@@
|
|
message ModelStreamInferResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string error_message
|
|
//@@
|
|
//@@ The message describing the error. The empty message
|
|
//@@ indicates the inference was successful without errors.
|
|
//@@
|
|
string error_message = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: ModelInferResponse infer_response
|
|
//@@
|
|
//@@ Holds the results of the request.
|
|
//@@
|
|
ModelInferResponse infer_response = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelConfigRequest
|
|
//@@
|
|
//@@ Request message for ModelConfig.
|
|
//@@
|
|
message ModelConfigRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model. If not given the model version
|
|
//@@ is selected automatically based on the version policy.
|
|
//@@
|
|
string version = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelConfigResponse
|
|
//@@
|
|
//@@ Response message for ModelConfig.
|
|
//@@
|
|
message ModelConfigResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: ModelConfig config
|
|
//@@
|
|
//@@ The model configuration.
|
|
//@@
|
|
ModelConfig config = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelStatisticsRequest
|
|
//@@
|
|
//@@ Request message for ModelStatistics.
|
|
//@@
|
|
message ModelStatisticsRequest
|
|
{
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model. If not given returns statistics for
|
|
//@@ all models.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model. If not given returns statistics for
|
|
//@@ all model versions.
|
|
//@@
|
|
string version = 2;
|
|
}
|
|
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message StatisticDuration
|
|
//@@
|
|
//@@ Statistic recording a cumulative duration metric.
|
|
//@@
|
|
message StatisticDuration
|
|
{
|
|
//@@ .. cpp:var:: uint64 count
|
|
//@@
|
|
//@@ Cumulative number of times this metric occurred.
|
|
//@@
|
|
uint64 count = 1;
|
|
|
|
//@@ .. cpp:var:: uint64 total_time_ns
|
|
//@@
|
|
//@@ Total collected duration of this metric in nanoseconds.
|
|
//@@
|
|
uint64 ns = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message InferStatistics
|
|
//@@
|
|
//@@ Inference statistics.
|
|
//@@
|
|
message InferStatistics
|
|
{
|
|
//@@ .. cpp:var:: StatisticDuration success
|
|
//@@
|
|
//@@ Cumulative count and duration for successful inference
|
|
//@@ request. The "success" count and cumulative duration includes
|
|
//@@ cache hits.
|
|
//@@
|
|
StatisticDuration success = 1;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration fail
|
|
//@@
|
|
//@@ Cumulative count and duration for failed inference
|
|
//@@ request.
|
|
//@@
|
|
StatisticDuration fail = 2;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration queue
|
|
//@@
|
|
//@@ The count and cumulative duration that inference requests wait in
|
|
//@@ scheduling or other queues. The "queue" count and cumulative
|
|
//@@ duration includes cache hits.
|
|
//@@
|
|
StatisticDuration queue = 3;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_input
|
|
//@@
|
|
//@@ The count and cumulative duration to prepare input tensor data as
|
|
//@@ required by the model framework / backend. For example, this duration
|
|
//@@ should include the time to copy input tensor data to the GPU.
|
|
//@@ The "compute_input" count and cumulative duration do not account for
|
|
//@@ requests that were a cache hit. See the "cache_hit" field for more
|
|
//@@ info.
|
|
//@@
|
|
StatisticDuration compute_input = 4;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_infer
|
|
//@@
|
|
//@@ The count and cumulative duration to execute the model.
|
|
//@@ The "compute_infer" count and cumulative duration do not account for
|
|
//@@ requests that were a cache hit. See the "cache_hit" field for more
|
|
//@@ info.
|
|
//@@
|
|
StatisticDuration compute_infer = 5;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_output
|
|
//@@
|
|
//@@ The count and cumulative duration to extract output tensor data
|
|
//@@ produced by the model framework / backend. For example, this duration
|
|
//@@ should include the time to copy output tensor data from the GPU.
|
|
//@@ The "compute_output" count and cumulative duration do not account for
|
|
//@@ requests that were a cache hit. See the "cache_hit" field for more
|
|
//@@ info.
|
|
//@@
|
|
StatisticDuration compute_output = 6;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration cache_hit
|
|
//@@
|
|
//@@ The count of response cache hits and cumulative duration to lookup
|
|
//@@ and extract output tensor data from the Response Cache on a cache
|
|
//@@ hit. For example, this duration should include the time to copy
|
|
//@@ output tensor data from the Response Cache to the response object.
|
|
//@@ On cache hits, triton does not need to go to the model/backend
|
|
//@@ for the output tensor data, so the "compute_input", "compute_infer",
|
|
//@@ and "compute_output" fields are not updated. Assuming the response
|
|
//@@ cache is enabled for a given model, a cache hit occurs for a
|
|
//@@ request to that model when the request metadata (model name,
|
|
//@@ model version, model inputs) hashes to an existing entry in the
|
|
//@@ cache. On a cache miss, the request hash and response output tensor
|
|
//@@ data is added to the cache. See response cache docs for more info:
|
|
//@@
|
|
//https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
|
|
//@@
|
|
StatisticDuration cache_hit = 7;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration cache_miss
|
|
//@@
|
|
//@@ The count of response cache misses and cumulative duration to lookup
|
|
//@@ and insert output tensor data from the computed response to the
|
|
//cache.
|
|
//@@ For example, this duration should include the time to copy
|
|
//@@ output tensor data from the response object to the Response Cache.
|
|
//@@ Assuming the response cache is enabled for a given model, a cache
|
|
//@@ miss occurs for a request to that model when the request metadata
|
|
//@@ does NOT hash to an existing entry in the cache. See the response
|
|
//@@ cache docs for more info:
|
|
//@@
|
|
//https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
|
|
//@@
|
|
StatisticDuration cache_miss = 8;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message InferBatchStatistics
|
|
//@@
|
|
//@@ Inference batch statistics.
|
|
//@@
|
|
message InferBatchStatistics
|
|
{
|
|
//@@ .. cpp:var:: uint64 batch_size
|
|
//@@
|
|
//@@ The size of the batch.
|
|
//@@
|
|
uint64 batch_size = 1;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_input
|
|
//@@
|
|
//@@ The count and cumulative duration to prepare input tensor data as
|
|
//@@ required by the model framework / backend with the given batch size.
|
|
//@@ For example, this duration should include the time to copy input
|
|
//@@ tensor data to the GPU.
|
|
//@@
|
|
StatisticDuration compute_input = 2;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_infer
|
|
//@@
|
|
//@@ The count and cumulative duration to execute the model with the given
|
|
//@@ batch size.
|
|
//@@
|
|
StatisticDuration compute_infer = 3;
|
|
|
|
//@@ .. cpp:var:: StatisticDuration compute_output
|
|
//@@
|
|
//@@ The count and cumulative duration to extract output tensor data
|
|
//@@ produced by the model framework / backend with the given batch size.
|
|
//@@ For example, this duration should include the time to copy output
|
|
//@@ tensor data from the GPU.
|
|
//@@
|
|
StatisticDuration compute_output = 4;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message MemoryUsage
|
|
//@@
|
|
//@@ Memory usage.
|
|
//@@
|
|
message MemoryUsage
|
|
{
|
|
//@@ .. cpp:var:: string type
|
|
//@@
|
|
//@@ The type of memory, the value can be "CPU", "CPU_PINNED", "GPU".
|
|
//@@
|
|
string type = 1;
|
|
|
|
//@@ .. cpp:var:: int64 id
|
|
//@@
|
|
//@@ The id of the memory, typically used with "type" to identify
|
|
//@@ a device that hosts the memory.
|
|
//@@
|
|
int64 id = 2;
|
|
|
|
//@@ .. cpp:var:: uint64 byte_size
|
|
//@@
|
|
//@@ The byte size of the memory.
|
|
//@@
|
|
uint64 byte_size = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelStatistics
|
|
//@@
|
|
//@@ Statistics for a specific model and version.
|
|
//@@
|
|
message ModelStatistics
|
|
{
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model. If not given returns statistics for all
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model.
|
|
//@@
|
|
string version = 2;
|
|
|
|
//@@ .. cpp:var:: uint64 last_inference
|
|
//@@
|
|
//@@ The timestamp of the last inference request made for this model,
|
|
//@@ as milliseconds since the epoch.
|
|
//@@
|
|
uint64 last_inference = 3;
|
|
|
|
//@@ .. cpp:var:: uint64 last_inference
|
|
//@@
|
|
//@@ The cumulative count of successful inference requests made for this
|
|
//@@ model. Each inference in a batched request is counted as an
|
|
//@@ individual inference. For example, if a client sends a single
|
|
//@@ inference request with batch size 64, "inference_count" will be
|
|
//@@ incremented by 64. Similarly, if a clients sends 64 individual
|
|
//@@ requests each with batch size 1, "inference_count" will be
|
|
//@@ incremented by 64. The "inference_count" value DOES NOT include
|
|
//@@ cache hits.
|
|
//@@
|
|
uint64 inference_count = 4;
|
|
|
|
//@@ .. cpp:var:: uint64 last_inference
|
|
//@@
|
|
//@@ The cumulative count of the number of successful inference executions
|
|
//@@ performed for the model. When dynamic batching is enabled, a single
|
|
//@@ model execution can perform inferencing for more than one inference
|
|
//@@ request. For example, if a clients sends 64 individual requests each
|
|
//@@ with batch size 1 and the dynamic batcher batches them into a single
|
|
//@@ large batch for model execution then "execution_count" will be
|
|
//@@ incremented by 1. If, on the other hand, the dynamic batcher is not
|
|
//@@ enabled for that each of the 64 individual requests is executed
|
|
//@@ independently, then "execution_count" will be incremented by 64.
|
|
//@@ The "execution_count" value DOES NOT include cache hits.
|
|
//@@
|
|
uint64 execution_count = 5;
|
|
|
|
//@@ .. cpp:var:: InferStatistics inference_stats
|
|
//@@
|
|
//@@ The aggregate statistics for the model/version.
|
|
//@@
|
|
InferStatistics inference_stats = 6;
|
|
|
|
//@@ .. cpp:var:: InferBatchStatistics batch_stats (repeated)
|
|
//@@
|
|
//@@ The aggregate statistics for each different batch size that is
|
|
//@@ executed in the model. The batch statistics indicate how many actual
|
|
//@@ model executions were performed and show differences due to different
|
|
//@@ batch size (for example, larger batches typically take longer to
|
|
//@@ compute).
|
|
//@@
|
|
repeated InferBatchStatistics batch_stats = 7;
|
|
|
|
//@@ .. cpp:var:: MemoryUsage memory_usage (repeated)
|
|
//@@
|
|
//@@ The memory usage detected during model loading, which may be used to
|
|
//@@ estimate the memory to be released once the model is unloaded. Note
|
|
//@@ that the estimation is inferenced by the profiling tools and
|
|
//@@ framework's memory schema, therefore it is advised to perform
|
|
//@@ experiments to understand the scenario that the reported memory usage
|
|
//@@ can be relied on. As a starting point, the GPU memory usage for
|
|
//@@ models in ONNX Runtime backend and TensorRT backend is usually
|
|
//@@ aligned.
|
|
//@@
|
|
repeated MemoryUsage memory_usage = 8;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelStatisticsResponse
|
|
//@@
|
|
//@@ Response message for ModelStatistics.
|
|
//@@
|
|
message ModelStatisticsResponse
|
|
{
|
|
//@@ .. cpp:var:: ModelStatistics model_stats (repeated)
|
|
//@@
|
|
//@@ Statistics for each requested model.
|
|
//@@
|
|
repeated ModelStatistics model_stats = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message ModelRepositoryParameter
|
|
//@@
|
|
//@@ An model repository parameter value.
|
|
//@@
|
|
message ModelRepositoryParameter
|
|
{
|
|
//@@ .. cpp:var:: oneof parameter_choice
|
|
//@@
|
|
//@@ The parameter value can be a string, an int64 or
|
|
//@@ a boolean
|
|
//@@
|
|
oneof parameter_choice
|
|
{
|
|
//@@ .. cpp:var:: bool bool_param
|
|
//@@
|
|
//@@ A boolean parameter value.
|
|
//@@
|
|
bool bool_param = 1;
|
|
|
|
//@@ .. cpp:var:: int64 int64_param
|
|
//@@
|
|
//@@ An int64 parameter value.
|
|
//@@
|
|
int64 int64_param = 2;
|
|
|
|
//@@ .. cpp:var:: string string_param
|
|
//@@
|
|
//@@ A string parameter value.
|
|
//@@
|
|
string string_param = 3;
|
|
|
|
//@@ .. cpp:var:: bytes bytes_param
|
|
//@@
|
|
//@@ A bytes parameter value.
|
|
//@@
|
|
bytes bytes_param = 4;
|
|
}
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryIndexRequest
|
|
//@@
|
|
//@@ Request message for RepositoryIndex.
|
|
//@@
|
|
message RepositoryIndexRequest
|
|
{
|
|
//@@ .. cpp:var:: string repository_name
|
|
//@@
|
|
//@@ The name of the repository. If empty the index is returned
|
|
//@@ for all repositories.
|
|
//@@
|
|
string repository_name = 1;
|
|
|
|
//@@ .. cpp:var:: bool ready
|
|
//@@
|
|
//@@ If true returned only models currently ready for inferencing.
|
|
//@@
|
|
bool ready = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryIndexResponse
|
|
//@@
|
|
//@@ Response message for RepositoryIndex.
|
|
//@@
|
|
message RepositoryIndexResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message ModelIndex
|
|
//@@
|
|
//@@ Index entry for a model.
|
|
//@@
|
|
message ModelIndex
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the model.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string version
|
|
//@@
|
|
//@@ The version of the model.
|
|
//@@
|
|
string version = 2;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string state
|
|
//@@
|
|
//@@ The state of the model.
|
|
//@@
|
|
string state = 3;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string reason
|
|
//@@
|
|
//@@ The reason, if any, that the model is in the given state.
|
|
//@@
|
|
string reason = 4;
|
|
}
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: ModelIndex models (repeated)
|
|
//@@
|
|
//@@ An index entry for each model.
|
|
//@@
|
|
repeated ModelIndex models = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryModelLoadRequest
|
|
//@@
|
|
//@@ Request message for RepositoryModelLoad.
|
|
//@@
|
|
message RepositoryModelLoadRequest
|
|
{
|
|
//@@ .. cpp:var:: string repository_name
|
|
//@@
|
|
//@@ The name of the repository to load from. If empty the model
|
|
//@@ is loaded from any repository.
|
|
//@@
|
|
string repository_name = 1;
|
|
|
|
//@@ .. cpp:var:: string repository_name
|
|
//@@
|
|
//@@ The name of the model to load, or reload.
|
|
//@@
|
|
string model_name = 2;
|
|
|
|
//@@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters
|
|
//@@
|
|
//@@ Optional model repository request parameters.
|
|
//@@
|
|
map<string, ModelRepositoryParameter> parameters = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryModelLoadResponse
|
|
//@@
|
|
//@@ Response message for RepositoryModelLoad.
|
|
//@@
|
|
message RepositoryModelLoadResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryModelUnloadRequest
|
|
//@@
|
|
//@@ Request message for RepositoryModelUnload.
|
|
//@@
|
|
message RepositoryModelUnloadRequest
|
|
{
|
|
//@@ .. cpp:var:: string repository_name
|
|
//@@
|
|
//@@ The name of the repository from which the model was originally
|
|
//@@ loaded. If empty the repository is not considered.
|
|
//@@
|
|
string repository_name = 1;
|
|
|
|
//@@ .. cpp:var:: string repository_name
|
|
//@@
|
|
//@@ The name of the model to unload.
|
|
//@@
|
|
string model_name = 2;
|
|
|
|
//@@ .. cpp:var:: map<string,ModelRepositoryParameter> parameters
|
|
//@@
|
|
//@@ Optional model repository request parameters.
|
|
//@@
|
|
map<string, ModelRepositoryParameter> parameters = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message RepositoryModelUnloadResponse
|
|
//@@
|
|
//@@ Response message for RepositoryModelUnload.
|
|
//@@
|
|
message RepositoryModelUnloadResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryStatusRequest
|
|
//@@
|
|
//@@ Request message for SystemSharedMemoryStatus.
|
|
//@@
|
|
message SystemSharedMemoryStatusRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the region to get status for. If empty the
|
|
//@@ status is returned for all registered regions.
|
|
//@@
|
|
string name = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryStatusResponse
|
|
//@@
|
|
//@@ Response message for SystemSharedMemoryStatus.
|
|
//@@
|
|
message SystemSharedMemoryStatusResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message RegionStatus
|
|
//@@
|
|
//@@ Status for a shared memory region.
|
|
//@@
|
|
message RegionStatus
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name for the shared memory region.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string shared_memory_key
|
|
//@@
|
|
//@@ The key of the underlying memory object that contains the
|
|
//@@ shared memory region.
|
|
//@@
|
|
string key = 2;
|
|
|
|
//@@ .. cpp:var:: uint64 offset
|
|
//@@
|
|
//@@ Offset, in bytes, within the underlying memory object to
|
|
//@@ the start of the shared memory region.
|
|
//@@
|
|
uint64 offset = 3;
|
|
|
|
//@@ .. cpp:var:: uint64 byte_size
|
|
//@@
|
|
//@@ Size of the shared memory region, in bytes.
|
|
//@@
|
|
uint64 byte_size = 4;
|
|
}
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: map<string,RegionStatus> regions
|
|
//@@
|
|
//@@ Status for each of the registered regions, indexed by
|
|
//@@ region name.
|
|
//@@
|
|
map<string, RegionStatus> regions = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest
|
|
//@@
|
|
//@@ Request message for SystemSharedMemoryRegister.
|
|
//@@
|
|
message SystemSharedMemoryRegisterRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the region to register.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: string shared_memory_key
|
|
//@@
|
|
//@@ The key of the underlying memory object that contains the
|
|
//@@ shared memory region.
|
|
//@@
|
|
string key = 2;
|
|
|
|
//@@ .. cpp:var:: uint64 offset
|
|
//@@
|
|
//@@ Offset, in bytes, within the underlying memory object to
|
|
//@@ the start of the shared memory region.
|
|
//@@
|
|
uint64 offset = 3;
|
|
|
|
//@@ .. cpp:var:: uint64 byte_size
|
|
//@@
|
|
//@@ Size of the shared memory region, in bytes.
|
|
//@@
|
|
uint64 byte_size = 4;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse
|
|
//@@
|
|
//@@ Response message for SystemSharedMemoryRegister.
|
|
//@@
|
|
message SystemSharedMemoryRegisterResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest
|
|
//@@
|
|
//@@ Request message for SystemSharedMemoryUnregister.
|
|
//@@
|
|
message SystemSharedMemoryUnregisterRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the system region to unregister. If empty
|
|
//@@ all system shared-memory regions are unregistered.
|
|
//@@
|
|
string name = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse
|
|
//@@
|
|
//@@ Response message for SystemSharedMemoryUnregister.
|
|
//@@
|
|
message SystemSharedMemoryUnregisterResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryStatusRequest
|
|
//@@
|
|
//@@ Request message for CudaSharedMemoryStatus.
|
|
//@@
|
|
message CudaSharedMemoryStatusRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the region to get status for. If empty the
|
|
//@@ status is returned for all registered regions.
|
|
//@@
|
|
string name = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryStatusResponse
|
|
//@@
|
|
//@@ Response message for CudaSharedMemoryStatus.
|
|
//@@
|
|
message CudaSharedMemoryStatusResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message RegionStatus
|
|
//@@
|
|
//@@ Status for a shared memory region.
|
|
//@@
|
|
message RegionStatus
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name for the shared memory region.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: uin64 device_id
|
|
//@@
|
|
//@@ The GPU device ID where the cudaIPC handle was created.
|
|
//@@
|
|
uint64 device_id = 2;
|
|
|
|
//@@ .. cpp:var:: uint64 byte_size
|
|
//@@
|
|
//@@ Size of the shared memory region, in bytes.
|
|
//@@
|
|
uint64 byte_size = 3;
|
|
}
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: map<string,RegionStatus> regions
|
|
//@@
|
|
//@@ Status for each of the registered regions, indexed by
|
|
//@@ region name.
|
|
//@@
|
|
map<string, RegionStatus> regions = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest
|
|
//@@
|
|
//@@ Request message for CudaSharedMemoryRegister.
|
|
//@@
|
|
message CudaSharedMemoryRegisterRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the region to register.
|
|
//@@
|
|
string name = 1;
|
|
|
|
//@@ .. cpp:var:: bytes raw_handle
|
|
//@@
|
|
//@@ The raw serialized cudaIPC handle.
|
|
//@@
|
|
bytes raw_handle = 2;
|
|
|
|
//@@ .. cpp:var:: int64 device_id
|
|
//@@
|
|
//@@ The GPU device ID on which the cudaIPC handle was created.
|
|
//@@
|
|
int64 device_id = 3;
|
|
|
|
//@@ .. cpp:var:: uint64 byte_size
|
|
//@@
|
|
//@@ Size of the shared memory block, in bytes.
|
|
//@@
|
|
uint64 byte_size = 4;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse
|
|
//@@
|
|
//@@ Response message for CudaSharedMemoryRegister.
|
|
//@@
|
|
message CudaSharedMemoryRegisterResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest
|
|
//@@
|
|
//@@ Request message for CudaSharedMemoryUnregister.
|
|
//@@
|
|
message CudaSharedMemoryUnregisterRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string name
|
|
//@@
|
|
//@@ The name of the cuda region to unregister. If empty
|
|
//@@ all cuda shared-memory regions are unregistered.
|
|
//@@
|
|
string name = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse
|
|
//@@
|
|
//@@ Response message for CudaSharedMemoryUnregister.
|
|
//@@
|
|
message CudaSharedMemoryUnregisterResponse {}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message TraceSettingRequest
|
|
//@@
|
|
//@@ Request message for TraceSetting.
|
|
//@@
|
|
message TraceSettingRequest
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message SettingValue
|
|
//@@
|
|
//@@ The values to be associated with a trace setting.
|
|
//@@ If no value is provided, the setting will be clear and
|
|
//@@ the global setting value will be used.
|
|
//@@
|
|
message SettingValue
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string value (repeated)
|
|
//@@
|
|
//@@ The value.
|
|
//@@
|
|
repeated string value = 1;
|
|
}
|
|
|
|
//@@ .. cpp:var:: map<string,SettingValue> settings
|
|
//@@
|
|
//@@ The new setting values to be updated,
|
|
//@@ settings that are not specified will remain unchanged.
|
|
//@@
|
|
map<string, SettingValue> settings = 1;
|
|
|
|
//@@
|
|
//@@ .. cpp:var:: string model_name
|
|
//@@
|
|
//@@ The name of the model to apply the new trace settings.
|
|
//@@ If not given, the new settings will be applied globally.
|
|
//@@
|
|
string model_name = 2;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message TraceSettingResponse
|
|
//@@
|
|
//@@ Response message for TraceSetting.
|
|
//@@
|
|
message TraceSettingResponse
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: message SettingValue
|
|
//@@
|
|
//@@ The values to be associated with a trace setting.
|
|
//@@
|
|
message SettingValue
|
|
{
|
|
//@@
|
|
//@@ .. cpp:var:: string value (repeated)
|
|
//@@
|
|
//@@ The value.
|
|
//@@
|
|
repeated string value = 1;
|
|
}
|
|
|
|
//@@ .. cpp:var:: map<string,SettingValue> settings
|
|
//@@
|
|
//@@ The current trace settings, including any changes specified
|
|
//@@ by TraceSettingRequest.
|
|
//@@
|
|
map<string, SettingValue> settings = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message LogSettingsRequest
|
|
//@@
|
|
//@@ Request message for LogSettings.
|
|
//@@
|
|
message LogSettingsRequest
|
|
{
|
|
message SettingValue
|
|
{
|
|
oneof parameter_choice
|
|
{
|
|
//@@ .. cpp:var:: bool bool_param
|
|
//@@
|
|
//@@ A boolean parameter value.
|
|
//@@
|
|
bool bool_param = 1;
|
|
|
|
//@@ .. cpp:var:: uint32 uint32_param
|
|
//@@
|
|
//@@ An uint32 parameter value.
|
|
//@@
|
|
uint32 uint32_param = 2;
|
|
|
|
//@@ .. cpp:var:: string string_param
|
|
//@@
|
|
//@@ A string parameter value.
|
|
//@@
|
|
string string_param = 3;
|
|
}
|
|
}
|
|
//@@ .. cpp:var:: map<string,SettingValue> settings
|
|
//@@
|
|
//@@ The current log settings.
|
|
//@@
|
|
map<string, SettingValue> settings = 1;
|
|
}
|
|
|
|
//@@
|
|
//@@.. cpp:var:: message LogSettingsResponse
|
|
//@@
|
|
//@@ Response message for LogSettings.
|
|
//@@
|
|
message LogSettingsResponse
|
|
{
|
|
message SettingValue
|
|
{
|
|
oneof parameter_choice
|
|
{
|
|
//@@ .. cpp:var:: bool bool_param
|
|
//@@
|
|
//@@ A boolean parameter value.
|
|
//@@
|
|
bool bool_param = 1;
|
|
|
|
//@@ .. cpp:var:: uint32 uint32_param
|
|
//@@
|
|
//@@ An int32 parameter value.
|
|
//@@
|
|
uint32 uint32_param = 2;
|
|
|
|
//@@ .. cpp:var:: string string_param
|
|
//@@
|
|
//@@ A string parameter value.
|
|
//@@
|
|
string string_param = 3;
|
|
}
|
|
}
|
|
//@@ .. cpp:var:: map<string,SettingValue> settings
|
|
//@@
|
|
//@@ The current log settings.
|
|
//@@
|
|
map<string, SettingValue> settings = 1;
|
|
}
|