/* * Copyright 2023 The Dragonfly Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package trainer import ( "context" "errors" "fmt" "net" "net/http" "time" "google.golang.org/grpc" logger "d7y.io/dragonfly/v2/internal/dflog" "d7y.io/dragonfly/v2/pkg/dfpath" "d7y.io/dragonfly/v2/pkg/net/ip" "d7y.io/dragonfly/v2/trainer/config" "d7y.io/dragonfly/v2/trainer/metrics" "d7y.io/dragonfly/v2/trainer/rpcserver" "d7y.io/dragonfly/v2/trainer/storage" ) const ( // gracefulStopTimeout specifies a time limit for // grpc server to complete a graceful shutdown. gracefulStopTimeout = 10 * time.Minute ) // Server is the trainer server. type Server struct { // Server configuration. config *config.Config // GRPC server. grpcServer *grpc.Server // Metrics server. metricsServer *http.Server // Storage interface. storage storage.Storage } // New creates a new Server. func New(ctx context.Context, cfg *config.Config, d dfpath.Dfpath) (*Server, error) { s := &Server{config: cfg} // Initialize Storage. s.storage = storage.New(d.DataDir()) // Initialize trainer grpc server. s.grpcServer = rpcserver.New(cfg, s.storage) // Initialize metrics. if cfg.Metrics.Enable { s.metricsServer = metrics.New(&cfg.Metrics, s.grpcServer) } return s, nil } // Serve starts the trainer server. func (s *Server) Serve() error { // Started metrics server. if s.metricsServer != nil { go func() { logger.Infof("started metrics server at %s", s.metricsServer.Addr) if err := s.metricsServer.ListenAndServe(); err != nil { if err == http.ErrServerClosed { return } logger.Fatalf("metrics server closed unexpect: %s", err.Error()) } }() } // Generate GRPC limit listener. ip, ok := ip.FormatIP(s.config.Server.ListenIP.String()) if !ok { return errors.New("format ip failed") } listener, err := net.Listen("tcp", fmt.Sprintf("%s:%d", ip, s.config.Server.Port)) if err != nil { logger.Fatalf("net listener failed to start: %s", err.Error()) } defer listener.Close() // Started GRPC server. logger.Infof("started grpc server at %s://%s", listener.Addr().Network(), listener.Addr().String()) if err := s.grpcServer.Serve(listener); err != nil { logger.Errorf("stoped grpc server: %s", err.Error()) return err } return nil } // Stop stops the trainer server. func (s *Server) Stop() { // Clean storage file. if err := s.storage.Clear(); err != nil { logger.Errorf("clean storage file failed %s", err.Error()) } else { logger.Info("clean storage file completed") } // Stop metrics server. if s.metricsServer != nil { if err := s.metricsServer.Shutdown(context.Background()); err != nil { logger.Errorf("metrics server failed to stop: %s", err.Error()) } else { logger.Info("metrics server closed under request") } } // Stop GRPC server. stopped := make(chan struct{}) go func() { s.grpcServer.GracefulStop() logger.Info("grpc server closed under request") close(stopped) }() t := time.NewTimer(gracefulStopTimeout) select { case <-t.C: s.grpcServer.Stop() case <-stopped: t.Stop() } }