dragonfly/scheduler/config/config.go

560 lines
17 KiB
Go

/*
* Copyright 2020 The Dragonfly Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package config
import (
"errors"
"fmt"
"net"
"time"
"d7y.io/dragonfly/v2/cmd/dependency/base"
"d7y.io/dragonfly/v2/pkg/net/fqdn"
"d7y.io/dragonfly/v2/pkg/net/ip"
"d7y.io/dragonfly/v2/pkg/rpc"
"d7y.io/dragonfly/v2/pkg/types"
)
type Config struct {
// Base options.
base.Options `yaml:",inline" mapstructure:",squash"`
// Scheduler configuration.
Scheduler SchedulerConfig `yaml:"scheduler" mapstructure:"scheduler"`
// Server configuration.
Server ServerConfig `yaml:"server" mapstructure:"server"`
// Dynconfig configuration.
DynConfig DynConfig `yaml:"dynConfig" mapstructure:"dynConfig"`
// Manager configuration.
Manager ManagerConfig `yaml:"manager" mapstructure:"manager"`
// SeedPeer configuration.
SeedPeer SeedPeerConfig `yaml:"seedPeer" mapstructure:"seedPeer"`
// Host configuration.
Host HostConfig `yaml:"host" mapstructure:"host"`
// Job configuration.
Job JobConfig `yaml:"job" mapstructure:"job"`
// Storage configuration.
Storage StorageConfig `yaml:"storage" mapstructure:"storage"`
// Metrics configuration.
Metrics MetricsConfig `yaml:"metrics" mapstructure:"metrics"`
// Security configuration.
Security SecurityConfig `yaml:"security" mapstructure:"security"`
// Network configuration.
Network NetworkConfig `yaml:"network" mapstructure:"network"`
}
type ServerConfig struct {
// DEPRECATED: Please use the `advertiseIP` field instead.
IP string `yaml:"ip" mapstructure:"ip"`
// DEPRECATED: Please use the `listenIP` field instead.
Listen string `yaml:"listen" mapstructure:"listen"`
// AdvertiseIP is advertise ip.
AdvertiseIP string `yaml:"advertiseIP" mapstructure:"advertiseIP"`
// ListenIP is listen ip, like: 0.0.0.0, 192.168.0.1.
ListenIP string `yaml:"listenIP" mapstructure:"listenIP"`
// Server port.
Port int `yaml:"port" mapstructure:"port"`
// Server hostname.
Host string `yaml:"host" mapstructure:"host"`
// Server work directory.
WorkHome string `yaml:"workHome" mapstructure:"workHome"`
// Server dynamic config cache directory.
CacheDir string `yaml:"cacheDir" mapstructure:"cacheDir"`
// Server log directory.
LogDir string `yaml:"logDir" mapstructure:"logDir"`
// Server plugin directory.
PluginDir string `yaml:"pluginDir" mapstructure:"pluginDir"`
// Server storage data directory.
DataDir string `yaml:"dataDir" mapstructure:"dataDir"`
}
type SchedulerConfig struct {
// Algorithm is scheduling algorithm used by the scheduler.
Algorithm string `yaml:"algorithm" mapstructure:"algorithm"`
// DEPRECATED: Please use the `backToSourceCount` field instead.
BackSourceCount int `yaml:"backSourceCount" mapstructure:"backSourceCount"`
// DEPRECATED: Please use the `retryBackToSourceLimit` field instead.
RetryBackSourceLimit int `yaml:"retryBackSourceLimit" mapstructure:"retryBackSourceLimit"`
// BackToSourceCount is single task allows the peer to back-to-source count.
BackToSourceCount int `yaml:"backToSourceCount" mapstructure:"backToSourceCount"`
// RetryBackToSourceLimit reaches the limit, then the peer back-to-source.
RetryBackToSourceLimit int `yaml:"retryBackToSourceLimit" mapstructure:"retryBackToSourceLimit"`
// RetryLimit reaches the limit, then scheduler returns scheduling failed.
RetryLimit int `yaml:"retryLimit" mapstructure:"retryLimit"`
// RetryInterval is scheduling interval.
RetryInterval time.Duration `yaml:"retryInterval" mapstructure:"retryInterval"`
// GC configuration.
GC GCConfig `yaml:"gc" mapstructure:"gc"`
// Training configuration.
Training TrainingConfig `yaml:"training" mapstructure:"training"`
}
type TrainingConfig struct {
// Enable training.
Enable bool `yaml:"enable" mapstructure:"enable"`
// Enable auto refresh model.
EnableAutoRefresh bool `yaml:"enableAutoRefresh" mapstructure:"enableAutoRefresh"`
// RefreshModelInterval is refresh interval for refreshing model.
RefreshModelInterval time.Duration `yaml:"refreshModelInterval" mapstructure:"refreshModelInterval"`
// CPU limit while training.
CPU int `yaml:"cpu" mapstructure:"cpu"`
}
type GCConfig struct {
// PieceDownloadTimeout is timout of downloading piece.
PieceDownloadTimeout time.Duration `yaml:"pieceDownloadTimeout" mapstructure:"pieceDownloadTimeout"`
// PeerGCInterval is interval of peer gc.
PeerGCInterval time.Duration `yaml:"peerGCInterval" mapstructure:"peerGCInterval"`
// PeerTTL is time to live of peer.
PeerTTL time.Duration `yaml:"peerTTL" mapstructure:"peerTTL"`
// TaskGCInterval is interval of task gc.
TaskGCInterval time.Duration `yaml:"taskGCInterval" mapstructure:"taskGCInterval"`
// HostGCInterval is interval of host gc.
HostGCInterval time.Duration `yaml:"hostGCInterval" mapstructure:"hostGCInterval"`
}
type DynConfig struct {
// RefreshInterval is refresh interval for manager cache.
RefreshInterval time.Duration `yaml:"refreshInterval" mapstructure:"refreshInterval"`
}
type HostConfig struct {
// IDC for scheduler.
IDC string `mapstructure:"idc" yaml:"idc"`
// NetTopology for scheduler.
NetTopology string `mapstructure:"netTopology" yaml:"netTopology"`
// Location for scheduler.
Location string `mapstructure:"location" yaml:"location"`
}
type ManagerConfig struct {
// Addr is manager address.
Addr string `yaml:"addr" mapstructure:"addr"`
// SchedulerClusterID is scheduler cluster id.
SchedulerClusterID uint `yaml:"schedulerClusterID" mapstructure:"schedulerClusterID"`
// KeepAlive configuration.
KeepAlive KeepAliveConfig `yaml:"keepAlive" mapstructure:"keepAlive"`
}
type SeedPeerConfig struct {
// Enable is to enable seed peer as P2P peer.
Enable bool `yaml:"enable" mapstructure:"enable"`
}
type KeepAliveConfig struct {
// Keep alive interval.
Interval time.Duration `yaml:"interval" mapstructure:"interval"`
}
type JobConfig struct {
// Enable job service.
Enable bool `yaml:"enable" mapstructure:"enable"`
// Number of workers in global queue.
GlobalWorkerNum uint `yaml:"globalWorkerNum" mapstructure:"globalWorkerNum"`
// Number of workers in scheduler queue.
SchedulerWorkerNum uint `yaml:"schedulerWorkerNum" mapstructure:"schedulerWorkerNum"`
// Number of workers in local queue.
LocalWorkerNum uint `yaml:"localWorkerNum" mapstructure:"localWorkerNum"`
// Redis configuration.
Redis RedisConfig `yaml:"redis" mapstructure:"redis"`
}
type StorageConfig struct {
// MaxSize sets the maximum size in megabytes of storage file.
MaxSize int `yaml:"maxSize" mapstructure:"maxSize"`
// MaxBackups sets the maximum number of storage files to retain.
MaxBackups int `yaml:"maxBackups" mapstructure:"maxBackups"`
// BufferSize sets the size of buffer container,
// if the buffer is full, write all the records in the buffer to the file.
BufferSize int `yaml:"bufferSize" mapstructure:"bufferSize"`
}
type RedisConfig struct {
// DEPRECATED: Please use the `addrs` field instead.
Host string `yaml:"host" mapstructure:"host"`
// DEPRECATED: Please use the `addrs` field instead.
Port int `yaml:"port" mapstructure:"port"`
// Addrs is server addresses.
Addrs []string `yaml:"addrs" mapstructure:"addrs"`
// MasterName is the sentinel master name.
MasterName string `yaml:"masterName" mapstructure:"masterName"`
// Username is server username.
Username string `yaml:"username" mapstructure:"username"`
// Password is server password.
Password string `yaml:"password" mapstructure:"password"`
// BrokerDB is broker database name.
BrokerDB int `yaml:"brokerDB" mapstructure:"brokerDB"`
// BackendDB is backend database name.
BackendDB int `yaml:"backendDB" mapstructure:"backendDB"`
}
type MetricsConfig struct {
// Enable metrics service.
Enable bool `yaml:"enable" mapstructure:"enable"`
// Metrics service address.
Addr string `yaml:"addr" mapstructure:"addr"`
// Enable peer host metrics.
EnablePeerHost bool `yaml:"enablePeerHost" mapstructure:"enablePeerHost"`
}
type SecurityConfig struct {
// AutoIssueCert indicates to issue client certificates for all grpc call
// if AutoIssueCert is false, any other option in Security will be ignored.
AutoIssueCert bool `mapstructure:"autoIssueCert" yaml:"autoIssueCert"`
// CACert is the root CA certificate for all grpc tls handshake, it can be path or PEM format string.
CACert types.PEMContent `mapstructure:"caCert" yaml:"caCert"`
// TLSVerify indicates to verify client certificates.
TLSVerify bool `mapstructure:"tlsVerify" yaml:"tlsVerify"`
// TLSPolicy controls the grpc shandshake behaviors:
// force: both ClientHandshake and ServerHandshake are only support tls.
// prefer: ServerHandshake supports tls and insecure (non-tls), ClientHandshake will only support tls.
// default: ServerHandshake supports tls and insecure (non-tls), ClientHandshake will only support insecure (non-tls).
TLSPolicy string `mapstructure:"tlsPolicy" yaml:"tlsPolicy"`
// CertSpec is the desired state of certificate.
CertSpec CertSpec `mapstructure:"certSpec" yaml:"certSpec"`
}
type CertSpec struct {
// ValidityPeriod is the validity period of certificate.
ValidityPeriod time.Duration `mapstructure:"validityPeriod" yaml:"validityPeriod"`
}
type NetworkConfig struct {
// EnableIPv6 enables ipv6 for server.
EnableIPv6 bool `mapstructure:"enableIPv6" yaml:"enableIPv6"`
}
// New default configuration.
func New() *Config {
return &Config{
Server: ServerConfig{
Port: DefaultServerPort,
Host: fqdn.FQDNHostname,
},
Scheduler: SchedulerConfig{
Algorithm: DefaultSchedulerAlgorithm,
BackToSourceCount: DefaultSchedulerBackToSourceCount,
RetryBackToSourceLimit: DefaultSchedulerRetryBackToSourceLimit,
RetryLimit: DefaultSchedulerRetryLimit,
RetryInterval: DefaultSchedulerRetryInterval,
GC: GCConfig{
PieceDownloadTimeout: DefaultSchedulerPieceDownloadTimeout,
PeerGCInterval: DefaultSchedulerPeerGCInterval,
PeerTTL: DefaultSchedulerPeerTTL,
TaskGCInterval: DefaultSchedulerTaskGCInterval,
HostGCInterval: DefaultSchedulerHostGCInterval,
},
Training: TrainingConfig{
Enable: false,
EnableAutoRefresh: false,
RefreshModelInterval: DefaultRefreshModelInterval,
CPU: DefaultCPU,
},
},
DynConfig: DynConfig{
RefreshInterval: DefaultDynConfigRefreshInterval,
},
Host: HostConfig{},
Manager: ManagerConfig{
SchedulerClusterID: DefaultManagerSchedulerClusterID,
KeepAlive: KeepAliveConfig{
Interval: DefaultManagerKeepAliveInterval,
},
},
SeedPeer: SeedPeerConfig{
Enable: true,
},
Job: JobConfig{
Enable: true,
GlobalWorkerNum: DefaultJobGlobalWorkerNum,
SchedulerWorkerNum: DefaultJobSchedulerWorkerNum,
LocalWorkerNum: DefaultJobLocalWorkerNum,
Redis: RedisConfig{
BrokerDB: DefaultJobRedisBrokerDB,
BackendDB: DefaultJobRedisBackendDB,
},
},
Storage: StorageConfig{
MaxSize: DefaultStorageMaxSize,
MaxBackups: DefaultStorageMaxBackups,
BufferSize: DefaultStorageBufferSize,
},
Metrics: MetricsConfig{
Enable: false,
Addr: DefaultMetricsAddr,
EnablePeerHost: false,
},
Security: SecurityConfig{
AutoIssueCert: false,
TLSVerify: true,
TLSPolicy: rpc.PreferTLSPolicy,
CertSpec: CertSpec{
ValidityPeriod: DefaultCertValidityPeriod,
},
},
Network: NetworkConfig{
EnableIPv6: DefaultNetworkEnableIPv6,
},
}
}
// Validate config parameters.
func (cfg *Config) Validate() error {
if cfg.Server.AdvertiseIP == "" {
return errors.New("server requires parameter advertiseIP")
}
if cfg.Server.ListenIP == "" {
return errors.New("server requires parameter listenIP")
}
if cfg.Server.Port <= 0 {
return errors.New("server requires parameter port")
}
if cfg.Server.Host == "" {
return errors.New("server requires parameter host")
}
if cfg.Scheduler.Algorithm == "" {
return errors.New("scheduler requires parameter algorithm")
}
if cfg.Scheduler.BackToSourceCount == 0 {
return errors.New("scheduler requires parameter backToSourceCount")
}
if cfg.Scheduler.RetryBackToSourceLimit == 0 {
return errors.New("scheduler requires parameter retryBackToSourceLimit")
}
if cfg.Scheduler.RetryLimit <= 0 {
return errors.New("scheduler requires parameter retryLimit")
}
if cfg.Scheduler.RetryInterval <= 0 {
return errors.New("scheduler requires parameter retryInterval")
}
if cfg.Scheduler.GC.PieceDownloadTimeout <= 0 {
return errors.New("scheduler requires parameter pieceDownloadTimeout")
}
if cfg.Scheduler.GC.PeerTTL <= 0 {
return errors.New("scheduler requires parameter peerTTL")
}
if cfg.Scheduler.GC.PeerGCInterval <= 0 {
return errors.New("scheduler requires parameter peerGCInterval")
}
if cfg.Scheduler.GC.TaskGCInterval <= 0 {
return errors.New("scheduler requires parameter taskGCInterval")
}
if cfg.Scheduler.GC.HostGCInterval <= 0 {
return errors.New("scheduler requires parameter hostGCInterval")
}
if cfg.Scheduler.Training.Enable {
if cfg.Scheduler.Training.CPU <= 0 {
return errors.New("training requires parameter cpu")
}
if cfg.Scheduler.Training.EnableAutoRefresh && cfg.Scheduler.Training.RefreshModelInterval <= 0 {
return errors.New("training requires parameter refreshModelInterval")
}
}
if cfg.DynConfig.RefreshInterval <= 0 {
return errors.New("dynconfig requires parameter refreshInterval")
}
if cfg.Manager.Addr == "" {
return errors.New("manager requires parameter addr")
}
if cfg.Manager.SchedulerClusterID == 0 {
return errors.New("manager requires parameter schedulerClusterID")
}
if cfg.Manager.KeepAlive.Interval <= 0 {
return errors.New("manager requires parameter keepAlive interval")
}
if cfg.Job.Enable {
if cfg.Job.GlobalWorkerNum == 0 {
return errors.New("job requires parameter globalWorkerNum")
}
if cfg.Job.SchedulerWorkerNum == 0 {
return errors.New("job requires parameter schedulerWorkerNum")
}
if cfg.Job.LocalWorkerNum == 0 {
return errors.New("job requires parameter localWorkerNum")
}
if len(cfg.Job.Redis.Addrs) == 0 {
return errors.New("job requires parameter addrs")
}
if len(cfg.Job.Redis.Addrs) == 1 {
if cfg.Job.Redis.BrokerDB <= 0 {
return errors.New("job requires parameter redis brokerDB")
}
if cfg.Job.Redis.BackendDB <= 0 {
return errors.New("job requires parameter redis backendDB")
}
}
}
if cfg.Storage.MaxSize <= 0 {
return errors.New("storage requires parameter maxSize")
}
if cfg.Storage.MaxBackups <= 0 {
return errors.New("storage requires parameter maxBackups")
}
if cfg.Storage.BufferSize <= 0 {
return errors.New("storage requires parameter bufferSize")
}
if cfg.Metrics.Enable {
if cfg.Metrics.Addr == "" {
return errors.New("metrics requires parameter addr")
}
}
if cfg.Security.AutoIssueCert {
if cfg.Security.CACert == "" {
return errors.New("security requires parameter caCert")
}
if cfg.Security.CertSpec.ValidityPeriod <= 0 {
return errors.New("certSpec requires parameter validityPeriod")
}
}
return nil
}
func (cfg *Config) Convert() error {
// TODO Compatible with deprecated fields backSourceCount.
if cfg.Scheduler.BackSourceCount != 0 {
cfg.Scheduler.BackToSourceCount = cfg.Scheduler.BackSourceCount
}
// TODO Compatible with deprecated fields retryBackSourceLimit.
if cfg.Scheduler.RetryBackSourceLimit != 0 {
cfg.Scheduler.RetryBackToSourceLimit = cfg.Scheduler.RetryBackSourceLimit
}
// TODO Compatible with deprecated fields host and port.
if len(cfg.Job.Redis.Addrs) == 0 && cfg.Job.Redis.Host != "" && cfg.Job.Redis.Port > 0 {
cfg.Job.Redis.Addrs = []string{fmt.Sprintf("%s:%d", cfg.Job.Redis.Host, cfg.Job.Redis.Port)}
}
// TODO Compatible with deprecated fields ip.
if cfg.Server.IP != "" && cfg.Server.AdvertiseIP == "" {
cfg.Server.AdvertiseIP = cfg.Server.IP
}
// TODO Compatible with deprecated fields listen.
if cfg.Server.Listen != "" && cfg.Server.ListenIP == "" {
cfg.Server.ListenIP = cfg.Server.Listen
}
if cfg.Server.AdvertiseIP == "" {
if cfg.Network.EnableIPv6 {
cfg.Server.AdvertiseIP = ip.IPv6
} else {
cfg.Server.AdvertiseIP = ip.IPv4
}
}
if cfg.Server.ListenIP == "" {
if cfg.Network.EnableIPv6 {
cfg.Server.ListenIP = net.IPv6zero.String()
} else {
cfg.Server.ListenIP = net.IPv4zero.String()
}
}
return nil
}