container kill: handle stopped/exited container
The container lock is released before stopping/killing which implies
certain race conditions with, for instance, the cleanup process changing
the container state to stopped, exited or other states.
The (remaining) flakes seen in #16142 and #15367 strongly indicate a
race in between the stopping/killing a container and the cleanup
process.  To fix the flake make sure to ignore invalid-state errors.
An alternative fix would be to change `KillContainer` to not return such
errors at all but commit c77691f06f indicates an explicit desire to
have these errors being reported in the sig proxy.
[NO NEW TESTS NEEDED] as it's a race already covered by the system
tests.
Fixes: #16142
Fixes: #15367
Signed-off-by: Valentin Rothberg <vrothberg@redhat.com>
			
			
This commit is contained in:
		
							parent
							
								
									6f919af78b
								
							
						
					
					
						commit
						067442b570
					
				|  | @ -429,6 +429,10 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { | 	if err := r.KillContainer(ctr, uint(unix.SIGKILL), all); err != nil { | ||||||
|  | 		// Ignore the error if KillContainer complains about it already
 | ||||||
|  | 		// being stopped or exited.  There's an inherent race with the
 | ||||||
|  | 		// cleanup process (see #16142).
 | ||||||
|  | 		if !(errors.Is(err, define.ErrCtrStateInvalid) && ctr.ensureState(define.ContainerStateStopped, define.ContainerStateExited)) { | ||||||
| 			// If the PID is 0, then the container is already stopped.
 | 			// If the PID is 0, then the container is already stopped.
 | ||||||
| 			if ctr.state.PID == 0 { | 			if ctr.state.PID == 0 { | ||||||
| 				return nil | 				return nil | ||||||
|  | @ -439,6 +443,7 @@ func (r *ConmonOCIRuntime) StopContainer(ctr *Container, timeout uint, all bool) | ||||||
| 			} | 			} | ||||||
| 			return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) | 			return fmt.Errorf("sending SIGKILL to container %s: %w", ctr.ID(), err) | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
| 
 | 
 | ||||||
| 	// Give runtime a few seconds to make it happen
 | 	// Give runtime a few seconds to make it happen
 | ||||||
| 	if err := waitContainerStop(ctr, killContainerTimeout); err != nil { | 	if err := waitContainerStop(ctr, killContainerTimeout); err != nil { | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue