/* * Copyright 2023 The Dragonfly Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //go:generate mockgen -destination mocks/probes_mock.go -source probes.go -package mocks package networktopology import ( "context" "encoding/json" "errors" "strconv" "time" "github.com/redis/go-redis/v9" logger "d7y.io/dragonfly/v2/internal/dflog" "d7y.io/dragonfly/v2/pkg/cache" pkgredis "d7y.io/dragonfly/v2/pkg/redis" "d7y.io/dragonfly/v2/scheduler/config" "d7y.io/dragonfly/v2/scheduler/resource" ) const ( // defaultMovingAverageWeight is the default weight of moving average. defaultMovingAverageWeight = 0.1 ) // Probe is the probe metadata. type Probe struct { // Host metadata. Host *resource.Host `json:"host"` // RTT is the round-trip time sent via this pinger. RTT time.Duration `json:"rtt"` // CreatedAt is the time to create probe. CreatedAt time.Time `json:"createdAt"` } // Probes is the interface to store probes. type Probes interface { // Peek returns the oldest probe without removing it. Peek() (*Probe, error) // Enqueue enqueues probe into the queue. Enqueue(*Probe) error // Len gets the length of probes. Len() (int64, error) // CreatedAt is the creation time of probes. CreatedAt() (time.Time, error) // UpdatedAt is the updated time to store probe. UpdatedAt() (time.Time, error) // AverageRTT is the moving average round-trip time of probes. AverageRTT() (time.Duration, error) } // probes is the implementation of Probes. type probes struct { // config is the probe config. config config.NetworkTopologyConfig // rdb is redis universal client interface. rdb redis.UniversalClient // Cache instance. cache cache.Cache // srcHostID is the source host id. srcHostID string // destHostID is the destination host id. destHostID string } // NewProbes creates a probes interface. func NewProbes(cfg config.NetworkTopologyConfig, rdb redis.UniversalClient, cache cache.Cache, srcHostID string, destHostID string) Probes { return &probes{ config: cfg, rdb: rdb, cache: cache, srcHostID: srcHostID, destHostID: destHostID, } } // Peek returns the oldest probe without removing it. func (p *probes) Peek() (*Probe, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() probesKey := pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID) if cache, _, ok := p.cache.GetWithExpiration(probesKey); ok { probes, ok := cache.([]*Probe) if !ok { return nil, errors.New("get probes failed") } if len(probes) == 0 { return nil, errors.New("probes cache is empty") } return probes[0], nil } rawProbes, err := p.rdb.LRange(ctx, pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID), 0, -1).Result() if err != nil { logger.Errorf("get probes failed: %s", err.Error()) return nil, err } var probes []*Probe for _, rawProbe := range rawProbes { probe := &Probe{} if err = json.Unmarshal([]byte(rawProbe), probe); err != nil { return nil, err } probes = append(probes, probe) } // Add cache data. p.cache.Set(probesKey, probes, p.config.Cache.TTL) return probes[0], nil } // Enqueue enqueues probe into the queue. func (p *probes) Enqueue(probe *Probe) error { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() // Get the length of the queue. length, err := p.Len() if err != nil { return err } // If the queue is full, remove the oldest probe. if length >= int64(p.config.Probe.QueueLength) { if _, err := p.dequeue(); err != nil { return err } } // Push the probe into the queue. data, err := json.Marshal(probe) if err != nil { return err } probesKey := pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID) if err := p.rdb.RPush(ctx, probesKey, data).Err(); err != nil { return err } p.cache.Delete(probesKey) // Calculate the moving average round-trip time. var averageRTT time.Duration if length > 0 { // If the queue is not empty, calculate the // moving average round-trip time. rawProbes, err := p.rdb.LRange(context.Background(), probesKey, 0, -1).Result() if err != nil { return err } for index, rawProbe := range rawProbes { probe := &Probe{} if err = json.Unmarshal([]byte(rawProbe), probe); err != nil { return err } if index == 0 { averageRTT = probe.RTT continue } averageRTT = time.Duration(float64(averageRTT)*defaultMovingAverageWeight + float64(probe.RTT)*(1-defaultMovingAverageWeight)) } } else { // If the queue is empty, use the probe round-trip time as // the moving average round-trip time. averageRTT = probe.RTT } // Update the moving average round-trip time and updated time. networkTopologyKey := pkgredis.MakeNetworkTopologyKeyInScheduler(p.srcHostID, p.destHostID) if err := p.rdb.HSet(ctx, networkTopologyKey, "averageRTT", averageRTT.Nanoseconds()).Err(); err != nil { return err } if err := p.rdb.HSet(ctx, networkTopologyKey, "updatedAt", probe.CreatedAt.Format(time.RFC3339Nano)).Err(); err != nil { return err } p.cache.Delete(networkTopologyKey) probedCountKey := pkgredis.MakeProbedCountKeyInScheduler(p.destHostID) if err := p.rdb.Incr(ctx, probedCountKey).Err(); err != nil { return err } p.cache.Delete(probedCountKey) return nil } // Length gets the length of probes. func (p *probes) Len() (int64, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() probesKey := pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID) if cache, _, ok := p.cache.GetWithExpiration(probesKey); ok { probes, ok := cache.([]*Probe) if !ok { return int64(0), errors.New("get probes failed") } return int64(len(probes)), nil } rawProbes, err := p.rdb.LRange(ctx, pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID), 0, -1).Result() if err != nil { logger.Errorf("get probes failed: %s", err.Error()) return int64(0), err } if len(rawProbes) == 0 { return int64(0), err } var probes []*Probe for _, rawProbe := range rawProbes { probe := &Probe{} if err = json.Unmarshal([]byte(rawProbe), probe); err != nil { return int64(0), err } probes = append(probes, probe) } // Add cache data. p.cache.Set(probesKey, probes, p.config.Cache.TTL) return int64(len(probes)), nil } // CreatedAt is the creation time of probes. func (p *probes) CreatedAt() (time.Time, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() var networkTopology map[string]string networkTopologyKey := pkgredis.MakeNetworkTopologyKeyInScheduler(p.srcHostID, p.destHostID) cache, _, ok := p.cache.GetWithExpiration(networkTopologyKey) if ok { if networkTopology, ok = cache.(map[string]string); !ok { return time.Time{}, errors.New("get networkTopology failed") } } else { var err error if networkTopology, err = p.rdb.HGetAll(ctx, networkTopologyKey).Result(); err != nil { return time.Time{}, err } // Add cache data. p.cache.Set(networkTopologyKey, networkTopology, p.config.Cache.TTL) } createdAt, err := time.Parse(time.RFC3339Nano, networkTopology["createdAt"]) if err != nil { return time.Time{}, err } return createdAt, nil } // UpdatedAt is the updated time to store probe. func (p *probes) UpdatedAt() (time.Time, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() var networkTopology map[string]string networkTopologyKey := pkgredis.MakeNetworkTopologyKeyInScheduler(p.srcHostID, p.destHostID) cache, _, ok := p.cache.GetWithExpiration(networkTopologyKey) if ok { if networkTopology, ok = cache.(map[string]string); !ok { return time.Time{}, errors.New("get networkTopology failed") } } else { var err error if networkTopology, err = p.rdb.HGetAll(ctx, networkTopologyKey).Result(); err != nil { return time.Time{}, err } // Add cache data. p.cache.Set(networkTopologyKey, networkTopology, p.config.Cache.TTL) } updatedAt, err := time.Parse(time.RFC3339Nano, networkTopology["updatedAt"]) if err != nil { return time.Time{}, err } return updatedAt, nil } // AverageRTT is the moving average round-trip time of probes. func (p *probes) AverageRTT() (time.Duration, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() var networkTopology map[string]string networkTopologyKey := pkgredis.MakeNetworkTopologyKeyInScheduler(p.srcHostID, p.destHostID) cache, _, ok := p.cache.GetWithExpiration(networkTopologyKey) if ok { if networkTopology, ok = cache.(map[string]string); !ok { return time.Duration(0), errors.New("get networkTopology failed") } } else { var err error if networkTopology, err = p.rdb.HGetAll(ctx, networkTopologyKey).Result(); err != nil { return time.Duration(0), err } // Add cache data. p.cache.Set(networkTopologyKey, networkTopology, p.config.Cache.TTL) } averageRTT, err := strconv.ParseInt(networkTopology["averageRTT"], 10, 64) if err != nil { return time.Duration(0), err } return time.Duration(averageRTT), nil } // dequeue removes and returns the oldest probe. func (p *probes) dequeue() (*Probe, error) { ctx, cancel := context.WithTimeout(context.Background(), contextTimeout) defer cancel() probesKey := pkgredis.MakeProbesKeyInScheduler(p.srcHostID, p.destHostID) rawProbe, err := p.rdb.LPop(ctx, probesKey).Bytes() if err != nil { return nil, err } p.cache.Delete(probesKey) probe := &Probe{} if err = json.Unmarshal(rawProbe, probe); err != nil { return nil, err } return probe, nil }