Merge pull request #94 from mheon/restart

Add handling for system restart in libpod
This commit is contained in:
Daniel J Walsh 2017-12-11 10:47:50 -06:00 committed by GitHub
commit 62e19beeec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 5 deletions

View File

@ -420,6 +420,30 @@ func (c *Container) teardownStorage() error {
return nil
}
// Refresh refreshes the container's state after a restart
func (c *Container) refresh() error {
c.lock.Lock()
defer c.lock.Unlock()
if !c.valid {
return errors.Wrapf(ErrCtrRemoved, "container %s is not valid - may have been removed", c.ID())
}
// We need to get the container's temporary directory from c/storage
// It was lost in the reboot and must be recreated
dir, err := c.runtime.storageService.GetRunDir(c.ID())
if err != nil {
return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID())
}
c.state.RunDir = dir
if err := c.runtime.state.SaveContainer(c); err != nil {
return errors.Wrapf(err, "error refreshing state for container %s", c.ID())
}
return nil
}
// Init creates a container in the OCI runtime
func (c *Container) Init() (err error) {
c.lock.Lock()
@ -433,12 +457,27 @@ func (c *Container) Init() (err error) {
return errors.Wrapf(ErrCtrExists, "container %s has already been created in runtime", c.ID())
}
// Mount storage for the container
if err := c.mountStorage(); err != nil {
return err
}
// Make the OCI runtime spec we will use
// If the OCI spec already exists, we need to replace it
// Cannot guarantee some things, e.g. network namespaces, have the same
// paths
jsonPath := filepath.Join(c.bundlePath(), "config.json")
if _, err := os.Stat(jsonPath); err != nil {
if !os.IsNotExist(err) {
return errors.Wrapf(err, "error doing stat on container %s spec", c.ID())
}
// The spec does not exist, we're fine
} else {
// The spec exists, need to remove it
if err := os.Remove(jsonPath); err != nil {
return errors.Wrapf(err, "error replacing runtime spec for container %s", c.ID())
}
}
// Save OCI spec to disk
g := generate.NewFromSpec(c.config.Spec)
// Mount ShmDir from host into container
g.AddBindMount(c.config.ShmDir, "/dev/shm", []string{"rw"})
@ -447,8 +486,6 @@ func (c *Container) Init() (err error) {
c.runningSpec.Annotations[crioAnnotations.Created] = c.config.CreatedTime.Format(time.RFC3339Nano)
c.runningSpec.Annotations["org.opencontainers.image.stopSignal"] = fmt.Sprintf("%d", c.config.StopSignal)
// Save the OCI spec to disk
jsonPath := filepath.Join(c.bundlePath(), "config.json")
fileJSON, err := json.Marshal(c.runningSpec)
if err != nil {
return errors.Wrapf(err, "error exporting runtime spec for container %s to JSON", c.ID())
@ -456,10 +493,11 @@ func (c *Container) Init() (err error) {
if err := ioutil.WriteFile(jsonPath, fileJSON, 0644); err != nil {
return errors.Wrapf(err, "error writing runtime spec JSON to file for container %s", c.ID())
}
c.state.ConfigPath = jsonPath
logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath)
c.state.ConfigPath = jsonPath
// With the spec complete, do an OCI create
// TODO set cgroup parent in a sane fashion
if err := c.runtime.ociRuntime.createContainer(c, "/libpod_parent"); err != nil {

View File

@ -38,6 +38,12 @@ func (s *InMemoryState) Close() error {
return nil
}
// Refresh clears container and pod stats after a reboot
// In-memory state won't survive a reboot so this is a no-op
func (s *InMemoryState) Refresh() error {
return nil
}
// Container retrieves a container from its full ID
func (s *InMemoryState) Container(id string) (*Container, error) {
if id == "" {

View File

@ -173,6 +173,36 @@ func NewRuntime(options ...RuntimeOption) (runtime *Runtime, err error) {
runtime.state = state
}
// We now need to see if the system has restarted
// We check for the presence of a file in our tmp directory to verify this
// This check must be locked to prevent races
runtimeAliveLock := filepath.Join(runtime.config.TmpDir, "alive.lck")
runtimeAliveFile := filepath.Join(runtime.config.TmpDir, "alive")
aliveLock, err := storage.GetLockfile(runtimeAliveLock)
if err != nil {
return nil, errors.Wrapf(err, "error acquiring runtime init lock")
}
// Acquire the lock and hold it until we return
// This ensures that no two processes will be in runtime.refresh at once
// TODO: we can't close the FD in this lock, so we should keep it around
// and use it to lock important operations
aliveLock.Lock()
defer aliveLock.Unlock()
_, err = os.Stat(runtimeAliveFile)
if err != nil {
// If the file doesn't exist, we need to refresh the state
// This will trigger on first use as well, but refreshing an
// empty state only creates a single file
// As such, it's not really a performance concern
if os.IsNotExist(err) {
if err2 := runtime.refresh(runtimeAliveFile); err2 != nil {
return nil, err2
}
} else {
return nil, errors.Wrapf(err, "error reading runtime status file %s", runtimeAliveFile)
}
}
// Mark the runtime as valid - ready to be used, cannot be modified
// further
runtime.valid = true
@ -238,3 +268,33 @@ func (r *Runtime) Shutdown(force bool) error {
return lastError
}
// Reconfigures the runtime after a reboot
// Refreshes the state, recreating temporary files
// Does not check validity as the runtime is not valid until after this has run
func (r *Runtime) refresh(alivePath string) error {
// First clear the state in the database
if err := r.state.Refresh(); err != nil {
return err
}
// Next refresh the state of all containers to recreate dirs and
// namespaces
ctrs, err := r.state.AllContainers()
if err != nil {
return errors.Wrapf(err, "error retrieving all containers from state")
}
for _, ctr := range ctrs {
if err := ctr.refresh(); err != nil {
return err
}
}
file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644)
if err != nil {
return errors.Wrapf(err, "error creating runtime status file %s", alivePath)
}
defer file.Close()
return nil
}

View File

@ -103,6 +103,52 @@ func (s *SQLState) Close() error {
return nil
}
// Refresh clears the state after a reboot
// Resets mountpoint, PID, state for all containers
func (s *SQLState) Refresh() (err error) {
const refresh = `UPDATE containerState SET
State=?,
Mountpoint=?,
Pid=?;`
s.lock.Lock()
defer s.lock.Unlock()
if !s.valid {
return ErrDBClosed
}
tx, err := s.db.Begin()
if err != nil {
return errors.Wrapf(err, "error beginning database transaction")
}
defer func() {
if err != nil {
if err2 := tx.Rollback(); err2 != nil {
logrus.Errorf("Error rolling back transaction to refresh state: %v", err2)
}
}
}()
// Refresh container state
// The constants could be moved into the SQL, but keeping them here
// will keep us in sync in case ContainerStateConfigured ever changes in
// the container state
_, err = tx.Exec(refresh,
ContainerStateConfigured,
"",
0)
if err != nil {
return errors.Wrapf(err, "error refreshing database state")
}
if err := tx.Commit(); err != nil {
return errors.Wrapf(err, "error committing transaction to refresh database")
}
return nil
}
// Container retrieves a container from its full ID
func (s *SQLState) Container(id string) (*Container, error) {
const query = `SELECT containers.*,

View File

@ -6,6 +6,9 @@ type State interface {
// connections) that may be required
Close() error
// Refresh clears container and pod states after a reboot
Refresh() error
// Accepts full ID of container
Container(id string) (*Container, error)
// Accepts full or partial IDs (as long as they are unique) and names