dragonfly/cdnsystem/daemon/cdn/storage/storage_gc.go

188 lines
5.2 KiB
Go

/*
* Copyright 2020 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package storage
import (
"fmt"
"os"
"strings"
"time"
"d7y.io/dragonfly/v2/cdnsystem/daemon"
cdnerrors "d7y.io/dragonfly/v2/cdnsystem/errors"
"d7y.io/dragonfly/v2/cdnsystem/storedriver"
logger "d7y.io/dragonfly/v2/internal/dflog"
"d7y.io/dragonfly/v2/pkg/util/timeutils"
"github.com/emirpasic/gods/maps/treemap"
godsutils "github.com/emirpasic/gods/utils"
)
type Cleaner struct {
cfg *GCConfig
driver storedriver.Driver
taskMgr daemon.SeedTaskMgr
storageMgr Manager
}
func NewStorageCleaner(cfg *GCConfig, driver storedriver.Driver, storageMgr Manager, taskMgr daemon.SeedTaskMgr) (*Cleaner, error) {
return &Cleaner{
cfg: cfg,
driver: driver,
taskMgr: taskMgr,
storageMgr: storageMgr,
}, nil
}
func (cleaner *Cleaner) GC(storagePattern string, force bool) ([]string, error) {
freeSpace, err := cleaner.driver.GetAvailSpace()
if err != nil {
if cdnerrors.IsFileNotExist(err) {
err = cleaner.driver.CreateBaseDir()
if err != nil {
return nil, err
}
freeSpace, _ = cleaner.driver.GetAvailSpace()
} else {
return nil, fmt.Errorf("get available space: %v", err)
}
}
fullGC := force
if !fullGC {
if freeSpace > cleaner.cfg.YoungGCThreshold {
return nil, nil
}
if freeSpace <= cleaner.cfg.FullGCThreshold {
fullGC = true
}
}
logger.GcLogger.With("type", storagePattern).Debugf("start to exec gc with fullGC: %t", fullGC)
gapTasks := treemap.NewWith(godsutils.Int64Comparator)
intervalTasks := treemap.NewWith(godsutils.Int64Comparator)
// walkTaskIds is used to avoid processing multiple times for the same taskId
// which is extracted from file name.
walkTaskIds := make(map[string]bool)
var gcTaskIDs []string
walkFn := func(path string, info os.FileInfo, err error) error {
logger.GcLogger.With("type", storagePattern).Debugf("start to walk path(%s)", path)
if err != nil {
logger.GcLogger.With("type", storagePattern).Errorf("failed to access path(%s): %v", path, err)
return err
}
if info.IsDir() {
return nil
}
taskID := strings.Split(info.Name(), ".")[0]
// If the taskID has been handled, and no need to do that again.
if walkTaskIds[taskID] {
return nil
}
walkTaskIds[taskID] = true
// we should return directly when we success to get info which means it is being used
if _, err := cleaner.taskMgr.Get(taskID); err == nil || !cdnerrors.IsDataNotFound(err) {
if err != nil {
logger.GcLogger.With("type", storagePattern).Errorf("failed to get taskID(%s): %v", taskID, err)
}
return nil
}
// add taskID to gcTaskIds slice directly when fullGC equals true.
if fullGC {
gcTaskIDs = append(gcTaskIDs, taskID)
return nil
}
metaData, err := cleaner.storageMgr.ReadFileMetaData(taskID)
if err != nil || metaData == nil {
logger.GcLogger.With("type", storagePattern).Debugf("taskID: %s, failed to get metadata: %v", taskID, err)
gcTaskIDs = append(gcTaskIDs, taskID)
return nil
}
// put taskId into gapTasks or intervalTasks which will sort by some rules
if err := cleaner.sortInert(gapTasks, intervalTasks, metaData); err != nil {
logger.GcLogger.With("type", storagePattern).Errorf("failed to parse inert metaData(%+v): %v", metaData, err)
}
return nil
}
if err := cleaner.driver.Walk(&storedriver.Raw{
WalkFn: walkFn,
}); err != nil {
return nil, err
}
if !fullGC {
gcTaskIDs = append(gcTaskIDs, cleaner.getGCTasks(gapTasks, intervalTasks)...)
}
return gcTaskIDs, nil
}
func (cleaner *Cleaner) sortInert(gapTasks, intervalTasks *treemap.Map, metaData *FileMetaData) error {
gap := timeutils.CurrentTimeMillis() - metaData.AccessTime
if metaData.Interval > 0 &&
gap <= metaData.Interval+(int64(cleaner.cfg.IntervalThreshold.Seconds())*int64(time.Millisecond)) {
info, err := cleaner.storageMgr.StatDownloadFile(metaData.TaskID)
if err != nil {
return err
}
v, found := intervalTasks.Get(info.Size)
if !found {
v = make([]string, 0)
}
tasks := v.([]string)
tasks = append(tasks, metaData.TaskID)
intervalTasks.Put(info.Size, tasks)
return nil
}
v, found := gapTasks.Get(gap)
if !found {
v = make([]string, 0)
}
tasks := v.([]string)
tasks = append(tasks, metaData.TaskID)
gapTasks.Put(gap, tasks)
return nil
}
func (cleaner *Cleaner) getGCTasks(gapTasks, intervalTasks *treemap.Map) []string {
var gcTasks = make([]string, 0)
for _, v := range gapTasks.Values() {
if taskIDs, ok := v.([]string); ok {
gcTasks = append(gcTasks, taskIDs...)
}
}
for _, v := range intervalTasks.Values() {
if taskIDs, ok := v.([]string); ok {
gcTasks = append(gcTasks, taskIDs...)
}
}
gcLen := (len(gcTasks)*cleaner.cfg.CleanRatio + 9) / 10
return gcTasks[0:gcLen]
}