406 lines
10 KiB
Go
406 lines
10 KiB
Go
package pathlib
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"slices"
|
|
)
|
|
|
|
// WalkOpts is the struct that defines how a walk should be performed
|
|
type WalkOpts struct {
|
|
// Depth defines how far down a directory we should recurse. A value of -1 means
|
|
// infinite depth. 0 means only the direct children of root will be returned, etc.
|
|
Depth int
|
|
|
|
// Algorithm specifies the algoritm that the Walk() function should use to
|
|
// traverse the directory.
|
|
Algorithm Algorithm
|
|
|
|
// FollowSymlinks defines whether symlinks should be dereferenced or not. If True,
|
|
// the symlink itself will never be returned to WalkFunc, but rather whatever it
|
|
// points to.
|
|
FollowSymlinks bool
|
|
|
|
// MinimumFileSize specifies the minimum size of a file for visitation.
|
|
// If negative, there is no minimum size.
|
|
MinimumFileSize int64
|
|
|
|
// MaximumFileSize specifies the maximum size of a file for visitation.
|
|
// If negative, there is no maximum size.
|
|
MaximumFileSize int64
|
|
|
|
// VisitFiles specifies that we should visit regular files during
|
|
// the walk.
|
|
VisitFiles bool
|
|
|
|
// VisitDirs specifies that we should visit directories during the walk.
|
|
VisitDirs bool
|
|
|
|
// VisitSymlinks specifies that we should visit symlinks during the walk.
|
|
VisitSymlinks bool
|
|
|
|
// SortChildren causes all children of a path to be lexigraphically sorted before
|
|
// being sent to the WalkFunc.
|
|
SortChildren bool
|
|
}
|
|
|
|
// DefaultWalkOpts returns the default WalkOpts struct used when
|
|
// walking a directory.
|
|
func DefaultWalkOpts() *WalkOpts {
|
|
return &WalkOpts{
|
|
Depth: -1,
|
|
Algorithm: AlgorithmBasic,
|
|
FollowSymlinks: false,
|
|
MinimumFileSize: -1,
|
|
MaximumFileSize: -1,
|
|
VisitFiles: true,
|
|
VisitDirs: true,
|
|
VisitSymlinks: true,
|
|
SortChildren: false,
|
|
}
|
|
}
|
|
|
|
// MeetsMinimumSize returns whether size is at least the minimum specified.
|
|
func (w *WalkOpts) MeetsMinimumSize(size int64) bool {
|
|
if w.MinimumFileSize < 0 {
|
|
return true
|
|
}
|
|
return size >= w.MinimumFileSize
|
|
}
|
|
|
|
// MeetsMaximumSize returns whether size is less than or equal to the maximum specified.
|
|
func (w *WalkOpts) MeetsMaximumSize(size int64) bool {
|
|
if w.MaximumFileSize < 0 {
|
|
return true
|
|
}
|
|
return size <= w.MaximumFileSize
|
|
}
|
|
|
|
// Algorithm represents the walk algorithm that will be performed.
|
|
type Algorithm int
|
|
|
|
const (
|
|
// AlgorithmBasic is a walk algorithm. It iterates over filesystem objects in the
|
|
// order in which they are returned by the operating system. It guarantees no
|
|
// ordering of any kind. It will recurse into subdirectories as soon as it encounters them,
|
|
// and will continue iterating the remaining children after the recursion is complete.
|
|
// It behaves as a quasi-DFS algorithm.
|
|
AlgorithmBasic Algorithm = iota
|
|
// AlgorithmDepthFirst is a walk algorithm. More specifically, it is a post-order
|
|
// depth first search whereby subdirectories are recursed into before
|
|
// visiting the children of the current directory.
|
|
// DEPRECATED: Use AlgorithmPostOrderDepthFirst
|
|
AlgorithmDepthFirst
|
|
// AlgorithmPostOrderDepthFirst is a walk algorithm that recurses into all of its children
|
|
// before visiting any of a node's elements.
|
|
AlgorithmPostOrderDepthFirst
|
|
// AlgorithmPreOrderDepthFirst is a walk algorithm. It visits all of a node's elements
|
|
// before recursing into its children.
|
|
AlgorithmPreOrderDepthFirst
|
|
)
|
|
|
|
// Walk is an object that handles walking through a directory tree
|
|
type Walk struct {
|
|
Opts *WalkOpts
|
|
root *Path
|
|
}
|
|
|
|
type WalkOptsFunc func(config *WalkOpts)
|
|
|
|
func WalkDepth(depth int) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.Depth = depth
|
|
}
|
|
}
|
|
|
|
func WalkAlgorithm(algo Algorithm) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.Algorithm = algo
|
|
}
|
|
}
|
|
|
|
func WalkFollowSymlinks(follow bool) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.FollowSymlinks = follow
|
|
}
|
|
}
|
|
|
|
func WalkMinimumFileSize(size int64) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.MinimumFileSize = size
|
|
}
|
|
}
|
|
|
|
func WalkMaximumFileSize(size int64) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.MaximumFileSize = size
|
|
}
|
|
}
|
|
|
|
func WalkVisitFiles(value bool) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.VisitFiles = value
|
|
}
|
|
}
|
|
|
|
func WalkVisitDirs(value bool) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.VisitDirs = value
|
|
}
|
|
}
|
|
|
|
func WalkVisitSymlinks(value bool) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.VisitSymlinks = value
|
|
}
|
|
}
|
|
|
|
func WalkSortChildren(value bool) WalkOptsFunc {
|
|
return func(config *WalkOpts) {
|
|
config.SortChildren = value
|
|
}
|
|
}
|
|
|
|
// NewWalk returns a new Walk struct with default values applied
|
|
func NewWalk(root *Path, opts ...WalkOptsFunc) (*Walk, error) {
|
|
config := DefaultWalkOpts()
|
|
for _, opt := range opts {
|
|
opt(config)
|
|
}
|
|
return NewWalkWithOpts(root, config)
|
|
}
|
|
|
|
// NewWalkWithOpts returns a Walk object with the given WalkOpts applied
|
|
func NewWalkWithOpts(root *Path, opts *WalkOpts) (*Walk, error) {
|
|
if root == nil {
|
|
return nil, fmt.Errorf("root path can't be nil")
|
|
}
|
|
if opts == nil {
|
|
return nil, fmt.Errorf("opts can't be nil")
|
|
}
|
|
return &Walk{
|
|
Opts: opts,
|
|
root: root,
|
|
}, nil
|
|
}
|
|
|
|
func (w *Walk) maxDepthReached(currentDepth int) bool {
|
|
if w.Opts.Depth >= 0 && currentDepth > w.Opts.Depth {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
type dfsObjectInfo struct {
|
|
path *Path
|
|
info os.FileInfo
|
|
err error
|
|
}
|
|
|
|
func (w *Walk) walkDFS(walkFn WalkFunc, root *Path, currentDepth int) error {
|
|
if w.maxDepthReached(currentDepth) {
|
|
return nil
|
|
}
|
|
|
|
var children []*dfsObjectInfo
|
|
|
|
if err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
|
|
// Since we are doing depth-first, we have to first recurse through all the directories,
|
|
// and save all non-directory objects so we can defer handling at a later time.
|
|
if IsDir(info.Mode()) {
|
|
if err := w.walkDFS(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
|
|
return err
|
|
}
|
|
}
|
|
|
|
children = append(children, &dfsObjectInfo{
|
|
path: child,
|
|
info: info,
|
|
err: encounteredErr,
|
|
})
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Iterate over all children after all subdirs have been recursed
|
|
for _, child := range children {
|
|
passesQuery, err := w.passesQuerySpecification(child.info)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if passesQuery {
|
|
if err := walkFn(child.path, child.info, child.err); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// iterateImmediateChildren is a function that handles discovering root's immediate children,
|
|
// and will run the algorithm function for every child. The algorithm function is essentially
|
|
// what differentiates how each walk behaves, and determines what actions to take given a
|
|
// certain child.
|
|
func (w *Walk) iterateImmediateChildren(root *Path, algorithmFunction WalkFunc) error {
|
|
children, err := root.ReadDir()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if w.Opts.SortChildren {
|
|
slices.SortFunc[[]*Path, *Path](children, func(a *Path, b *Path) int {
|
|
if a.String() < b.String() {
|
|
return -1
|
|
}
|
|
if a.String() == b.String() {
|
|
return 0
|
|
}
|
|
return 1
|
|
})
|
|
}
|
|
var info os.FileInfo
|
|
for _, child := range children {
|
|
if child.String() == root.String() {
|
|
continue
|
|
}
|
|
if w.Opts.FollowSymlinks {
|
|
info, err = child.Stat()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
info, err = child.Lstat()
|
|
}
|
|
|
|
if info == nil {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return ErrInfoIsNil
|
|
}
|
|
|
|
if algoErr := algorithmFunction(child, info, err); algoErr != nil {
|
|
return algoErr
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// passesQuerySpecification returns whether or not the object described by
|
|
// the os.FileInfo passes all of the query specifications listed in
|
|
// the walk options.
|
|
func (w *Walk) passesQuerySpecification(info os.FileInfo) (bool, error) {
|
|
if IsFile(info.Mode()) {
|
|
if !w.Opts.VisitFiles {
|
|
return false, nil
|
|
}
|
|
|
|
if !w.Opts.MeetsMinimumSize(info.Size()) ||
|
|
!w.Opts.MeetsMaximumSize(info.Size()) {
|
|
return false, nil
|
|
}
|
|
} else if IsDir(info.Mode()) && !w.Opts.VisitDirs {
|
|
return false, nil
|
|
} else if IsSymlink(info.Mode()) && !w.Opts.VisitSymlinks {
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
func (w *Walk) walkBasic(walkFn WalkFunc, root *Path, currentDepth int) error {
|
|
if w.maxDepthReached(currentDepth) {
|
|
return nil
|
|
}
|
|
|
|
err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
|
|
if IsDir(info.Mode()) {
|
|
// In the case the error is ErrWalkSkipSubtree, we ignore it as we've already
|
|
// exited from the recursive call. Any other error should be bubbled up.
|
|
if err := w.walkBasic(walkFn, child, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
|
|
return err
|
|
}
|
|
}
|
|
|
|
passesQuery, err := w.passesQuerySpecification(info)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if passesQuery {
|
|
if err := walkFn(child, info, encounteredErr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
return err
|
|
}
|
|
|
|
func (w *Walk) walkPreOrderDFS(walkFn WalkFunc, root *Path, currentDepth int) error {
|
|
if w.maxDepthReached(currentDepth) {
|
|
return nil
|
|
}
|
|
dirs := []*Path{}
|
|
err := w.iterateImmediateChildren(root, func(child *Path, info os.FileInfo, encounteredErr error) error {
|
|
if IsDir(info.Mode()) {
|
|
dirs = append(dirs, child)
|
|
}
|
|
|
|
passesQuery, err := w.passesQuerySpecification(info)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if passesQuery {
|
|
if err := walkFn(child, info, encounteredErr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, dir := range dirs {
|
|
if err := w.walkPreOrderDFS(walkFn, dir, currentDepth+1); err != nil && !errors.Is(err, ErrWalkSkipSubtree) {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// WalkFunc is the function provided to the Walk function for each directory.
|
|
type WalkFunc func(path *Path, info os.FileInfo, err error) error
|
|
|
|
// Walk walks the directory using the algorithm specified in the configuration. Your WalkFunc
|
|
// may return any of the ErrWalk* errors to control various behavior of the walker. See the documentation
|
|
// of each error for more details.
|
|
func (w *Walk) Walk(walkFn WalkFunc) error {
|
|
funcs := map[Algorithm]func(walkFn WalkFunc, root *Path, currentDepth int) error{
|
|
AlgorithmBasic: w.walkBasic,
|
|
AlgorithmDepthFirst: w.walkDFS,
|
|
AlgorithmPostOrderDepthFirst: w.walkDFS,
|
|
AlgorithmPreOrderDepthFirst: w.walkPreOrderDFS,
|
|
}
|
|
algoFunc, ok := funcs[w.Opts.Algorithm]
|
|
if !ok {
|
|
return ErrInvalidAlgorithm
|
|
}
|
|
if err := algoFunc(walkFn, w.root, 0); err != nil {
|
|
if errors.Is(err, errWalkControl) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
|
|
}
|