mirror of https://github.com/cri-o/cri-o.git
server: handle missing network namespace gracefully during networkStop
After host reboot, network namespaces are destroyed but CRI-O attempts to clean them up during pod sandbox destruction, causing CNI plugin failures and preventing pods from restarting properly. The fix ensures pods can restart normally after host reboots. Signed-off-by: Sohan Kunkerkar <sohank2602@gmail.com>
This commit is contained in:
parent
2edb23fd54
commit
d17990aff1
|
|
@ -181,23 +181,42 @@ func (s *Server) networkStop(ctx context.Context, sb *sandbox.Sandbox) error {
|
|||
|
||||
podNetwork, err := s.newPodNetwork(ctx, sb)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to create pod network for sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
|
||||
}
|
||||
|
||||
// Check if the network namespace file exists and is valid before attempting CNI teardown.
|
||||
// If the file doesn't exist or is invalid, skip CNI teardown and mark network as stopped.
|
||||
if podNetwork.NetNS != "" {
|
||||
if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil {
|
||||
// Network namespace file doesn't exist, mark network as stopped and return success
|
||||
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), skipping CNI teardown",
|
||||
podNetwork.NetNS, sb.Name(), sb.ID())
|
||||
|
||||
return sb.SetNetworkStopped(ctx, true)
|
||||
}
|
||||
|
||||
if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
|
||||
// Network namespace file exists but is invalid (e.g., corrupted or fake file)
|
||||
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and skipping CNI teardown",
|
||||
podNetwork.NetNS, sb.Name(), sb.ID(), validateErr)
|
||||
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
|
||||
|
||||
return sb.SetNetworkStopped(ctx, true)
|
||||
}
|
||||
}
|
||||
|
||||
if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil {
|
||||
retErr := fmt.Errorf("failed to destroy network for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
|
||||
log.Warnf(ctx, "Failed to destroy network for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err)
|
||||
|
||||
if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil {
|
||||
return fmt.Errorf("%w: stat netns path %q: %w", retErr, podNetwork.NetNS, statErr)
|
||||
// If the network namespace exists but CNI teardown failed, try to clean it up.
|
||||
if podNetwork.NetNS != "" {
|
||||
if _, statErr := os.Stat(podNetwork.NetNS); statErr == nil {
|
||||
// Clean up the netns file since CNI teardown failed.
|
||||
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
|
||||
}
|
||||
}
|
||||
|
||||
// The netns file may still exists, which means that it's likely
|
||||
// corrupted. Remove it to allow cleanup of the network namespace:
|
||||
if rmErr := os.RemoveAll(podNetwork.NetNS); rmErr != nil {
|
||||
return fmt.Errorf("%w: failed to remove netns path: %w", retErr, rmErr)
|
||||
}
|
||||
|
||||
log.Warnf(ctx, "Removed invalid netns path %s from pod sandbox %s(%s)", podNetwork.NetNS, sb.Name(), sb.ID())
|
||||
return fmt.Errorf("network teardown failed for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
|
||||
}
|
||||
|
||||
return sb.SetNetworkStopped(ctx, true)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,24 @@
|
|||
//go:build freebsd
|
||||
// +build freebsd
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/cri-o/cri-o/internal/lib/sandbox"
|
||||
"github.com/cri-o/cri-o/internal/log"
|
||||
)
|
||||
|
||||
// validateNetworkNamespace checks if the given path is a valid network namespace
|
||||
// On FreeBSD, this is a no-op since network namespaces are Linux-specific.
|
||||
func (s *Server) validateNetworkNamespace(netnsPath string) error {
|
||||
// Network namespaces are Linux-specific, so on FreeBSD we assume it's valid
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupNetns removes a network namespace file and logs the action
|
||||
// On FreeBSD, this is a no-op since network namespaces are Linux-specific.
|
||||
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
|
||||
log.Debugf(ctx, "Network namespace cleanup not supported on this platform")
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/containernetworking/plugins/pkg/ns"
|
||||
|
||||
"github.com/cri-o/cri-o/internal/lib/sandbox"
|
||||
"github.com/cri-o/cri-o/internal/log"
|
||||
)
|
||||
|
||||
// validateNetworkNamespace checks if the given path is a valid network namespace.
|
||||
func (s *Server) validateNetworkNamespace(netnsPath string) error {
|
||||
netns, err := ns.GetNS(netnsPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid network namespace: %w", err)
|
||||
}
|
||||
|
||||
defer netns.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupNetns removes a network namespace file and logs the action.
|
||||
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
|
||||
if rmErr := os.RemoveAll(netnsPath); rmErr != nil {
|
||||
log.Warnf(ctx, "Failed to remove netns path %s: %v", netnsPath, rmErr)
|
||||
} else {
|
||||
log.Infof(ctx, "Removed netns path %s from pod sandbox %s(%s)", netnsPath, sb.Name(), sb.ID())
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
//go:build !linux && !freebsd
|
||||
// +build !linux,!freebsd
|
||||
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/cri-o/cri-o/internal/lib/sandbox"
|
||||
"github.com/cri-o/cri-o/internal/log"
|
||||
)
|
||||
|
||||
// validateNetworkNamespace checks if the given path is a valid network namespace
|
||||
// On unsupported platforms, this is a no-op since network namespaces are Linux-specific.
|
||||
func (s *Server) validateNetworkNamespace(netnsPath string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupNetns removes a network namespace file and logs the action
|
||||
// On unsupported platforms, this is a no-op since network namespaces are Linux-specific.
|
||||
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
|
||||
log.Debugf(ctx, "Network namespace cleanup not supported on this platform")
|
||||
}
|
||||
|
|
@ -185,5 +185,5 @@ function check_networking() {
|
|||
|
||||
# be able to remove the sandbox
|
||||
crictl rmp -f "$POD"
|
||||
grep -q "Removed invalid netns path $NETNS_PATH$NS from pod sandbox" "$CRIO_LOG"
|
||||
grep -q "Removed netns path $NETNS_PATH$NS from pod sandbox" "$CRIO_LOG"
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue