inference: Fix ignoring parse errors for unknown models
We ignore parse errors for models that gguf-parser-go can't parse yet, for now. This regressed in the pre-pull memeory estimation PR. Signed-off-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
parent
6d72f943f6
commit
03f7adc077
|
|
@ -2,7 +2,6 @@ package inference
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
|
|
@ -18,9 +17,13 @@ const (
|
|||
BackendModeEmbedding
|
||||
)
|
||||
|
||||
var (
|
||||
ErrGGUFParse = errors.New("failed to parse GGUF file")
|
||||
)
|
||||
type ErrGGUFParse struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *ErrGGUFParse) Error() string {
|
||||
return "failed to parse GGUF: " + e.Err.Error()
|
||||
}
|
||||
|
||||
// String implements Stringer.String for BackendMode.
|
||||
func (m BackendMode) String() string {
|
||||
|
|
|
|||
|
|
@ -235,12 +235,12 @@ func (l *llamaCpp) GetRequiredMemoryForModel(ctx context.Context, model string,
|
|||
if inStore {
|
||||
mdlGguf, mdlConfig, err = l.parseLocalModel(model)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing local gguf: %w", err)
|
||||
return nil, &inference.ErrGGUFParse{Err: err}
|
||||
}
|
||||
} else {
|
||||
mdlGguf, mdlConfig, err = l.parseRemoteModel(ctx, model)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing remote model: %w", err)
|
||||
return nil, &inference.ErrGGUFParse{Err: err}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -421,11 +421,12 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
|
|||
runnerConfig = &rc
|
||||
}
|
||||
memory, err := backend.GetRequiredMemoryForModel(ctx, modelID, runnerConfig)
|
||||
if errors.Is(err, inference.ErrGGUFParse) {
|
||||
var parseErr *inference.ErrGGUFParse
|
||||
if errors.As(err, &parseErr) {
|
||||
// TODO(p1-0tr): For now override memory checks in case model can't be parsed
|
||||
// e.g. model is too new for gguf-parser-go to know. We should provide a cleaner
|
||||
// way to bypass these checks.
|
||||
l.log.Warnf("Could not parse model(%s), memory checks will be ignored for it.", modelID)
|
||||
l.log.Warnf("Could not parse model(%s), memory checks will be ignored for it. Error: %s", modelID, parseErr)
|
||||
memory = &inference.RequiredMemory{
|
||||
RAM: 0,
|
||||
VRAM: 0,
|
||||
|
|
|
|||
Loading…
Reference in New Issue