Merge pull request #2355 from Luap99/disk-usage

libimage: rework DiskUsage() to count layers
This commit is contained in:
openshift-merge-bot[bot] 2025-03-10 17:09:03 +00:00 committed by GitHub
commit c4f20ceed9
3 changed files with 167 additions and 71 deletions

View File

@ -5,6 +5,9 @@ package libimage
import (
"context"
"time"
"github.com/containers/storage"
"github.com/sirupsen/logrus"
)
// ImageDiskUsage reports the total size of an image. That is the size
@ -36,49 +39,49 @@ func (r *Runtime) DiskUsage(ctx context.Context) ([]ImageDiskUsage, int64, error
return nil, -1, err
}
layerTree, err := r.newLayerTreeFromData(images, layers)
if err != nil {
return nil, -1, err
var totalSize int64
layerMap := make(map[string]*storage.Layer)
for _, layer := range layers {
layerMap[layer.ID] = &layer
if layer.UncompressedSize == -1 {
// size is unknown, we must manually diff the layer size which
// can be quite slow as it might have to walk all files
size, err := r.store.DiffSize("", layer.ID)
if err != nil {
return nil, -1, err
}
// cache the size now
layer.UncompressedSize = size
}
// count the total layer size here so we know we only count each layer once
totalSize += layer.UncompressedSize
}
var totalSize int64
visitedImages := make(map[string]bool)
visistedLayers := make(map[string]bool)
// First walk all images to count how often each layer is used.
// This is done so we know if the size for an image is shared between
// images that use the same layer or unique.
layerCount := make(map[string]int)
for _, image := range images {
walkImageLayers(image, layerMap, func(layer *storage.Layer) {
// Increment the count for each layer visit
layerCount[layer.ID] += 1
})
}
// Now that we actually have all the info walk again to add the sizes.
var allUsages []ImageDiskUsage
for _, image := range images {
usages, err := diskUsageForImage(ctx, image, layerTree)
usages, err := diskUsageForImage(ctx, image, layerMap, layerCount, &totalSize)
if err != nil {
return nil, -1, err
}
allUsages = append(allUsages, usages...)
if _, ok := visitedImages[image.ID()]; ok {
// Do not count an image twice
continue
}
visitedImages[image.ID()] = true
size, err := image.Size()
if err != nil {
return nil, -1, err
}
for _, layer := range layerTree.layersOf(image) {
if _, ok := visistedLayers[layer.ID]; ok {
// Do not count a layer twice, so remove its
// size from the image size.
size -= layer.UncompressedSize
continue
}
visistedLayers[layer.ID] = true
}
totalSize += size
}
return allUsages, totalSize, err
}
// diskUsageForImage returns the disk-usage baseistics for the specified image.
func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]ImageDiskUsage, error) {
func diskUsageForImage(ctx context.Context, image *Image, layerMap map[string]*storage.Layer, layerCount map[string]int, totalSize *int64) ([]ImageDiskUsage, error) {
if err := image.isCorrupted(ctx, ""); err != nil {
return nil, err
}
@ -90,36 +93,25 @@ func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]Im
Tag: "<none>",
}
// Shared, unique and total size.
parent, err := tree.parent(ctx, image)
if err != nil {
return nil, err
}
childIDs, err := tree.children(ctx, image, false)
if err != nil {
return nil, err
}
// Optimistically set unique size to the full size of the image.
size, err := image.Size()
if err != nil {
return nil, err
}
base.UniqueSize = size
if len(childIDs) > 0 {
// If we have children, we share everything.
base.SharedSize = base.UniqueSize
base.UniqueSize = 0
} else if parent != nil {
// If we have no children but a parent, remove the parent
// (shared) size from the unique one.
size, err := parent.Size()
if err != nil {
return nil, err
walkImageLayers(image, layerMap, func(layer *storage.Layer) {
// If the layer used by more than one image it shares its size
if layerCount[layer.ID] > 1 {
base.SharedSize += layer.UncompressedSize
} else {
base.UniqueSize += layer.UncompressedSize
}
base.UniqueSize -= size
base.SharedSize = size
})
// Count the image specific big data as well.
// store.BigDataSize() is not used intentionally, it is slower (has to take
// locks) and can fail.
// BigDataSizes is always correctly populated on new stores since c/storage
// commit a7d7fe8c9a (2016). It should be safe to assume that no such old
// store+image exist now so we don't bother. Worst case we report a few
// bytes to little.
for _, size := range image.storageImage.BigDataSizes {
base.UniqueSize += size
*totalSize += size
}
base.Size = base.SharedSize + base.UniqueSize
@ -155,3 +147,33 @@ func diskUsageForImage(ctx context.Context, image *Image, tree *layerTree) ([]Im
return results, nil
}
// walkImageLayers walks all layers in an image and calls the given function for each layer.
func walkImageLayers(image *Image, layerMap map[string]*storage.Layer, f func(layer *storage.Layer)) {
visited := make(map[string]struct{})
// Layers are walked recursively until it has no parent which means we reached the end.
// We must account for the fact that an image might have several top layers when id mappings are used.
layers := append([]string{image.storageImage.TopLayer}, image.storageImage.MappedTopLayers...)
for _, layerID := range layers {
for layerID != "" {
layer := layerMap[layerID]
if layer == nil {
logrus.Errorf("Local Storage is corrupt, layer %q missing from the storage", layerID)
break
}
if _, ok := visited[layerID]; ok {
// We have seen this layer before. Break here to
// a) Do not count the same layer twice that was shared between
// the TopLayer and MappedTopLayers layer chain.
// b) Prevent infinite loops, should not happen per c/storage
// design but it is good to be safer.
break
}
visited[layerID] = struct{}{}
f(layer)
// Set the layer for the next iteration, parent is empty if we reach the end.
layerID = layer.Parent
}
}
}

View File

@ -0,0 +1,87 @@
//go:build !remote
package libimage
import (
"context"
"testing"
"time"
"github.com/containers/common/pkg/config"
"github.com/containers/storage"
"github.com/stretchr/testify/require"
)
func TestDiskUsage(t *testing.T) {
runtime := testNewRuntime(t)
ctx := context.Background()
const expectedTotalImageSize int64 = 5847966
name := "quay.io/libpod/alpine:3.10.2"
pullOptions := &PullOptions{}
pulledImages, err := runtime.Pull(ctx, name, config.PullPolicyAlways, pullOptions)
require.NoError(t, err)
require.Len(t, pulledImages, 1)
imgID := pulledImages[0].storageImage.ID
layerID := pulledImages[0].storageImage.TopLayer
digest := pulledImages[0].storageImage.Digest
img, err := pulledImages[0].storageReference.NewImageSource(ctx, &runtime.systemContext)
require.NoError(t, err)
defer img.Close()
manifest, _, err := img.GetManifest(ctx, nil)
require.NoError(t, err)
expectedImageDiskUsage := ImageDiskUsage{
ID: imgID,
Repository: "quay.io/libpod/alpine",
Tag: "3.10.2",
SharedSize: 0,
UniqueSize: expectedTotalImageSize,
Size: expectedTotalImageSize,
}
res, size, err := runtime.DiskUsage(ctx)
require.NoError(t, err)
require.Equal(t, expectedTotalImageSize, size)
require.Len(t, res, 1)
// intentionally unsetting the time here, we cannot really equal the time
// because of the local information that is part of the struct and that
// can differ even when the time is the same
res[0].Created = time.Time{}
require.Equal(t, expectedImageDiskUsage, res[0])
opts := &storage.ImageOptions{
BigData: []storage.ImageBigDataOption{
{
Key: storage.ImageDigestBigDataKey,
Data: manifest,
Digest: digest,
},
},
}
img2, err := runtime.store.CreateImage("", []string{"localhost/test:123"}, layerID, "", opts)
require.NoError(t, err)
const sharedSize int64 = 5843968
// copy the expected and update the expected values
expectedImageDiskUsage2 := ImageDiskUsage{
ID: img2.ID,
Repository: "localhost/test",
Tag: "123",
SharedSize: sharedSize,
UniqueSize: int64(len(manifest)),
Size: sharedSize + int64(len(manifest)),
}
expectedImageDiskUsage.SharedSize = sharedSize
expectedImageDiskUsage.UniqueSize = expectedImageDiskUsage.Size - sharedSize
res, size, err = runtime.DiskUsage(ctx)
require.NoError(t, err)
require.Equal(t, expectedTotalImageSize+int64(len(manifest)), size)
require.Len(t, res, 2)
res[0].Created = time.Time{}
res[1].Created = time.Time{}
require.ElementsMatch(t, []ImageDiskUsage{expectedImageDiskUsage, expectedImageDiskUsage2}, res)
}

View File

@ -141,19 +141,6 @@ func (r *Runtime) newLayerTreeFromData(images []*Image, layers []storage.Layer)
return &tree, nil
}
// layersOf returns all storage layers of the specified image.
func (t *layerTree) layersOf(image *Image) []*storage.Layer {
var layers []*storage.Layer
node := t.node(image.TopLayer())
for node != nil {
if node.layer != nil {
layers = append(layers, node.layer)
}
node = node.parent
}
return layers
}
// children returns the child images of parent. Child images are images with
// either the same top layer as parent or parent being the true parent layer.
// Furthermore, the history of the parent and child images must match with the