inference: Fix ignoring parse errors for unknown models

We ignore parse errors for models that gguf-parser-go can't parse yet,
for now. This regressed in the pre-pull memeory estimation PR.

Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
Piotr Stankiewicz 2025-08-22 14:46:19 +02:00 committed by Piotr
parent 6d72f943f6
commit 03f7adc077
3 changed files with 12 additions and 8 deletions

View File

@ -2,7 +2,6 @@ package inference
import (
"context"
"errors"
"net/http"
)
@ -18,9 +17,13 @@ const (
BackendModeEmbedding
)
var (
ErrGGUFParse = errors.New("failed to parse GGUF file")
)
type ErrGGUFParse struct {
Err error
}
func (e *ErrGGUFParse) Error() string {
return "failed to parse GGUF: " + e.Err.Error()
}
// String implements Stringer.String for BackendMode.
func (m BackendMode) String() string {

View File

@ -235,12 +235,12 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string,
if inStore {
mdlGguf, mdlConfig, err = l.parseLocalModel(model)
if err != nil {
return nil, fmt.Errorf("parsing local gguf: %w", err)
return nil, &inference.ErrGGUFParse{Err: err}
}
} else {
mdlGguf, mdlConfig, err = l.parseRemoteModel(ctx, model)
if err != nil {
return nil, fmt.Errorf("parsing remote model: %w", err)
return nil, &inference.ErrGGUFParse{Err: err}
}
}

View File

@ -421,11 +421,12 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
runnerConfig = &rc
}
memory, err := backend.GetRequiredMemoryForModel(ctx, modelID, runnerConfig)
if errors.Is(err, inference.ErrGGUFParse) {
var parseErr *inference.ErrGGUFParse
if errors.As(err, &parseErr) {
// TODO(p1-0tr): For now override memory checks in case model can't be parsed
// e.g. model is too new for gguf-parser-go to know. We should provide a cleaner
// way to bypass these checks.
l.log.Warnf("Could not parse model(%s), memory checks will be ignored for it.", modelID)
l.log.Warnf("Could not parse model(%s), memory checks will be ignored for it. Error: %s", modelID, parseErr)
memory = &inference.RequiredMemory{
RAM: 0,
VRAM: 0,