diff --git a/common/libimage/disk_usage.go b/common/libimage/disk_usage.go index 7b7b56ad7d..581c23a86f 100644 --- a/common/libimage/disk_usage.go +++ b/common/libimage/disk_usage.go @@ -5,6 +5,9 @@ package libimage import ( "context" "time" + + "github.com/containers/storage" + "github.com/sirupsen/logrus" ) // ImageDiskUsage reports the total size of an image. That is the size @@ -36,49 +39,49 @@ func (r *Runtime) DiskUsage(ctx context.Context) ([]ImageDiskUsage, int64, error return nil, -1, err } - layerTree, err := r.newLayerTreeFromData(images, layers) - if err != nil { - return nil, -1, err + var totalSize int64 + layerMap := make(map[string]*storage.Layer) + for _, layer := range layers { + layerMap[layer.ID] = &layer + if layer.UncompressedSize == -1 { + // size is unknown, we must manually diff the layer size which + // can be quite slow as it might have to walk all files + size, err := r.store.DiffSize("", layer.ID) + if err != nil { + return nil, -1, err + } + // cache the size now + layer.UncompressedSize = size + } + // count the total layer size here so we know we only count each layer once + totalSize += layer.UncompressedSize } - var totalSize int64 - visitedImages := make(map[string]bool) - visistedLayers := make(map[string]bool) + // First walk all images to count how often each layer is used. + // This is done so we know if the size for an image is shared between + // images that use the same layer or unique. + layerCount := make(map[string]int) + for _, image := range images { + walkImageLayers(image, layerMap, func(layer *storage.Layer) { + // Increment the count for each layer visit + layerCount[layer.ID] += 1 + }) + } + // Now that we actually have all the info walk again to add the sizes. var allUsages []ImageDiskUsage for _, image := range images { - usages, err := diskUsageForImage(ctx, image, layerTree) + usages, err := diskUsageForImage(ctx, image, layerMap, layerCount, &totalSize) if err != nil { return nil, -1, err } allUsages = append(allUsages, usages...) - - if _, ok := visitedImages[image.ID()]; ok { - // Do not count an image twice - continue - } - visitedImages[image.ID()] = true - - size, err := image.Size() - if err != nil { - return nil, -1, err - } - for _, layer := range layerTree.layersOf(image) { - if _, ok := visistedLayers[layer.ID]; ok { - // Do not count a layer twice, so remove its - // size from the image size. - size -= layer.UncompressedSize - continue - } - visistedLayers[layer.ID] = true - } - totalSize += size } return allUsages, totalSize, err } // diskUsageForImage returns the disk-usage baseistics for the specified image. -func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]ImageDiskUsage, error) { +func diskUsageForImage(ctx context.Context, image *Image, layerMap map[string]*storage.Layer, layerCount map[string]int, totalSize *int64) ([]ImageDiskUsage, error) { if err := image.isCorrupted(ctx, ""); err != nil { return nil, err } @@ -90,36 +93,25 @@ func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]Im Tag: "", } - // Shared, unique and total size. - parent, err := tree.parent(ctx, image) - if err != nil { - return nil, err - } - childIDs, err := tree.children(ctx, image, false) - if err != nil { - return nil, err - } - - // Optimistically set unique size to the full size of the image. - size, err := image.Size() - if err != nil { - return nil, err - } - base.UniqueSize = size - - if len(childIDs) > 0 { - // If we have children, we share everything. - base.SharedSize = base.UniqueSize - base.UniqueSize = 0 - } else if parent != nil { - // If we have no children but a parent, remove the parent - // (shared) size from the unique one. - size, err := parent.Size() - if err != nil { - return nil, err + walkImageLayers(image, layerMap, func(layer *storage.Layer) { + // If the layer used by more than one image it shares its size + if layerCount[layer.ID] > 1 { + base.SharedSize += layer.UncompressedSize + } else { + base.UniqueSize += layer.UncompressedSize } - base.UniqueSize -= size - base.SharedSize = size + }) + + // Count the image specific big data as well. + // store.BigDataSize() is not used intentionally, it is slower (has to take + // locks) and can fail. + // BigDataSizes is always correctly populated on new stores since c/storage + // commit a7d7fe8c9a (2016). It should be safe to assume that no such old + // store+image exist now so we don't bother. Worst case we report a few + // bytes to little. + for _, size := range image.storageImage.BigDataSizes { + base.UniqueSize += size + *totalSize += size } base.Size = base.SharedSize + base.UniqueSize @@ -155,3 +147,33 @@ func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]Im return results, nil } + +// walkImageLayers walks all layers in an image and calls the given function for each layer. +func walkImageLayers(image *Image, layerMap map[string]*storage.Layer, f func(layer *storage.Layer)) { + visited := make(map[string]struct{}) + // Layers are walked recursively until it has no parent which means we reached the end. + // We must account for the fact that an image might have several top layers when id mappings are used. + layers := append([]string{image.storageImage.TopLayer}, image.storageImage.MappedTopLayers...) + for _, layerID := range layers { + for layerID != "" { + layer := layerMap[layerID] + if layer == nil { + logrus.Errorf("Local Storage is corrupt, layer %q missing from the storage", layerID) + break + } + if _, ok := visited[layerID]; ok { + // We have seen this layer before. Break here to + // a) Do not count the same layer twice that was shared between + // the TopLayer and MappedTopLayers layer chain. + // b) Prevent infinite loops, should not happen per c/storage + // design but it is good to be safer. + break + } + visited[layerID] = struct{}{} + + f(layer) + // Set the layer for the next iteration, parent is empty if we reach the end. + layerID = layer.Parent + } + } +} diff --git a/common/libimage/disk_usage_test.go b/common/libimage/disk_usage_test.go new file mode 100644 index 0000000000..d06f49f5b8 --- /dev/null +++ b/common/libimage/disk_usage_test.go @@ -0,0 +1,87 @@ +//go:build !remote + +package libimage + +import ( + "context" + "testing" + "time" + + "github.com/containers/common/pkg/config" + "github.com/containers/storage" + "github.com/stretchr/testify/require" +) + +func TestDiskUsage(t *testing.T) { + runtime := testNewRuntime(t) + ctx := context.Background() + + const expectedTotalImageSize int64 = 5847966 + name := "quay.io/libpod/alpine:3.10.2" + pullOptions := &PullOptions{} + pulledImages, err := runtime.Pull(ctx, name, config.PullPolicyAlways, pullOptions) + require.NoError(t, err) + require.Len(t, pulledImages, 1) + imgID := pulledImages[0].storageImage.ID + layerID := pulledImages[0].storageImage.TopLayer + digest := pulledImages[0].storageImage.Digest + img, err := pulledImages[0].storageReference.NewImageSource(ctx, &runtime.systemContext) + require.NoError(t, err) + defer img.Close() + manifest, _, err := img.GetManifest(ctx, nil) + require.NoError(t, err) + + expectedImageDiskUsage := ImageDiskUsage{ + ID: imgID, + Repository: "quay.io/libpod/alpine", + Tag: "3.10.2", + SharedSize: 0, + UniqueSize: expectedTotalImageSize, + Size: expectedTotalImageSize, + } + + res, size, err := runtime.DiskUsage(ctx) + require.NoError(t, err) + require.Equal(t, expectedTotalImageSize, size) + require.Len(t, res, 1) + + // intentionally unsetting the time here, we cannot really equal the time + // because of the local information that is part of the struct and that + // can differ even when the time is the same + res[0].Created = time.Time{} + require.Equal(t, expectedImageDiskUsage, res[0]) + + opts := &storage.ImageOptions{ + BigData: []storage.ImageBigDataOption{ + { + Key: storage.ImageDigestBigDataKey, + Data: manifest, + Digest: digest, + }, + }, + } + + img2, err := runtime.store.CreateImage("", []string{"localhost/test:123"}, layerID, "", opts) + require.NoError(t, err) + + const sharedSize int64 = 5843968 + // copy the expected and update the expected values + expectedImageDiskUsage2 := ImageDiskUsage{ + ID: img2.ID, + Repository: "localhost/test", + Tag: "123", + SharedSize: sharedSize, + UniqueSize: int64(len(manifest)), + Size: sharedSize + int64(len(manifest)), + } + expectedImageDiskUsage.SharedSize = sharedSize + expectedImageDiskUsage.UniqueSize = expectedImageDiskUsage.Size - sharedSize + + res, size, err = runtime.DiskUsage(ctx) + require.NoError(t, err) + require.Equal(t, expectedTotalImageSize+int64(len(manifest)), size) + require.Len(t, res, 2) + res[0].Created = time.Time{} + res[1].Created = time.Time{} + require.ElementsMatch(t, []ImageDiskUsage{expectedImageDiskUsage, expectedImageDiskUsage2}, res) +} diff --git a/common/libimage/layer_tree.go b/common/libimage/layer_tree.go index aeb1d45bb3..33741bd083 100644 --- a/common/libimage/layer_tree.go +++ b/common/libimage/layer_tree.go @@ -141,19 +141,6 @@ func (r *Runtime) newLayerTreeFromData(images []*Image, layers []storage.Layer) return &tree, nil } -// layersOf returns all storage layers of the specified image. -func (t *layerTree) layersOf(image *Image) []*storage.Layer { - var layers []*storage.Layer - node := t.node(image.TopLayer()) - for node != nil { - if node.layer != nil { - layers = append(layers, node.layer) - } - node = node.parent - } - return layers -} - // children returns the child images of parent. Child images are images with // either the same top layer as parent or parent being the true parent layer. // Furthermore, the history of the parent and child images must match with the