podman/libpod/kube.go

704 lines
22 KiB
Go

package libpod
import (
"math/rand"
"os"
"strconv"
"strings"
"time"
"github.com/containers/podman/v2/libpod/define"
"github.com/containers/podman/v2/pkg/lookup"
"github.com/containers/podman/v2/pkg/util"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
v12 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// GenerateForKube takes a slice of libpod containers and generates
// one v1.Pod description that includes just a single container.
func GenerateForKube(ctrs []*Container) (*v1.Pod, error) {
// Generate the v1.Pod yaml description
return simplePodWithV1Containers(ctrs)
}
// GenerateForKube takes a slice of libpod containers and generates
// one v1.Pod description
func (p *Pod) GenerateForKube() (*v1.Pod, []v1.ServicePort, error) {
// Generate the v1.Pod yaml description
var (
ports []v1.ContainerPort //nolint
servicePorts []v1.ServicePort //nolint
)
allContainers, err := p.allContainers()
if err != nil {
return nil, servicePorts, err
}
// If the pod has no containers, no sense to generate YAML
if len(allContainers) == 0 {
return nil, servicePorts, errors.Errorf("pod %s has no containers", p.ID())
}
// If only an infra container is present, makes no sense to generate YAML
if len(allContainers) == 1 && p.HasInfraContainer() {
return nil, servicePorts, errors.Errorf("pod %s only has an infra container", p.ID())
}
extraHost := make([]v1.HostAlias, 0)
hostNetwork := false
if p.HasInfraContainer() {
infraContainer, err := p.getInfraContainer()
if err != nil {
return nil, servicePorts, err
}
for _, host := range infraContainer.config.ContainerNetworkConfig.HostAdd {
hostSli := strings.SplitN(host, ":", 2)
if len(hostSli) != 2 {
return nil, servicePorts, errors.New("invalid hostAdd")
}
extraHost = append(extraHost, v1.HostAlias{
IP: hostSli[1],
Hostnames: []string{hostSli[0]},
})
}
ports, err = ocicniPortMappingToContainerPort(infraContainer.config.PortMappings)
if err != nil {
return nil, servicePorts, err
}
servicePorts = containerPortsToServicePorts(ports)
hostNetwork = p.config.InfraContainer.HostNetwork
}
pod, err := p.podWithContainers(allContainers, ports, hostNetwork)
if err != nil {
return nil, servicePorts, err
}
pod.Spec.HostAliases = extraHost
// vendor/k8s.io/api/core/v1/types.go: v1.Container cannot save restartPolicy
// so set it at here
for _, ctr := range allContainers {
if !ctr.IsInfra() {
switch ctr.Config().RestartPolicy {
case RestartPolicyAlways:
pod.Spec.RestartPolicy = v1.RestartPolicyAlways
case RestartPolicyOnFailure:
pod.Spec.RestartPolicy = v1.RestartPolicyOnFailure
case RestartPolicyNo:
pod.Spec.RestartPolicy = v1.RestartPolicyNever
default: // some pod create from cmdline, such as "", so set it to Never
pod.Spec.RestartPolicy = v1.RestartPolicyNever
}
break
}
}
if p.SharesPID() {
// unfortunately, go doesn't have a nice way to specify a pointer to a bool
b := true
pod.Spec.ShareProcessNamespace = &b
}
return pod, servicePorts, nil
}
func (p *Pod) getInfraContainer() (*Container, error) {
infraID, err := p.InfraContainerID()
if err != nil {
return nil, err
}
return p.runtime.GetContainer(infraID)
}
// GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object
func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) v1.Service {
service := v1.Service{}
selector := make(map[string]string)
selector["app"] = pod.Labels["app"]
ports := servicePorts
if len(ports) == 0 {
ports = containersToServicePorts(pod.Spec.Containers)
}
serviceSpec := v1.ServiceSpec{
Ports: ports,
Selector: selector,
Type: v1.ServiceTypeNodePort,
}
service.Spec = serviceSpec
service.ObjectMeta = pod.ObjectMeta
tm := v12.TypeMeta{
Kind: "Service",
APIVersion: pod.TypeMeta.APIVersion,
}
service.TypeMeta = tm
return service
}
// containerPortsToServicePorts takes a slice of containerports and generates a
// slice of service ports
func containerPortsToServicePorts(containerPorts []v1.ContainerPort) []v1.ServicePort {
sps := make([]v1.ServicePort, 0, len(containerPorts))
for _, cp := range containerPorts {
nodePort := 30000 + rand.Intn(32767-30000+1)
servicePort := v1.ServicePort{
Protocol: cp.Protocol,
Port: cp.ContainerPort,
NodePort: int32(nodePort),
Name: strconv.Itoa(int(cp.ContainerPort)),
}
sps = append(sps, servicePort)
}
return sps
}
// containersToServicePorts takes a slice of v1.Containers and generates an
// inclusive list of serviceports to expose
func containersToServicePorts(containers []v1.Container) []v1.ServicePort {
// Without the call to rand.Seed, a program will produce the same sequence of pseudo-random numbers
// for each execution. Legal nodeport range is 30000-32767
rand.Seed(time.Now().UnixNano())
sps := make([]v1.ServicePort, 0, len(containers))
for _, ctr := range containers {
sps = append(sps, containerPortsToServicePorts(ctr.Ports)...)
}
return sps
}
func (p *Pod) podWithContainers(containers []*Container, ports []v1.ContainerPort, hostNetwork bool) (*v1.Pod, error) {
deDupPodVolumes := make(map[string]*v1.Volume)
first := true
podContainers := make([]v1.Container, 0, len(containers))
dnsInfo := v1.PodDNSConfig{}
for _, ctr := range containers {
if !ctr.IsInfra() {
ctr, volumes, _, err := containerToV1Container(ctr)
if err != nil {
return nil, err
}
// Since port bindings for the pod are handled by the
// infra container, wipe them here.
ctr.Ports = nil
// We add the original port declarations from the libpod infra container
// to the first kubernetes container description because otherwise we loose
// the original container/port bindings.
if first && len(ports) > 0 {
ctr.Ports = ports
first = false
}
podContainers = append(podContainers, ctr)
// Deduplicate volumes, so if containers in the pod share a volume, it's only
// listed in the volumes section once
for _, vol := range volumes {
vol := vol
deDupPodVolumes[vol.Name] = &vol
}
} else {
_, _, infraDNS, err := containerToV1Container(ctr)
if err != nil {
return nil, err
}
if infraDNS != nil {
if servers := infraDNS.Nameservers; len(servers) > 0 {
dnsInfo.Nameservers = servers
}
if searches := infraDNS.Searches; len(searches) > 0 {
dnsInfo.Searches = searches
}
if options := infraDNS.Options; len(options) > 0 {
dnsInfo.Options = options
}
}
}
}
podVolumes := make([]v1.Volume, 0, len(deDupPodVolumes))
for _, vol := range deDupPodVolumes {
podVolumes = append(podVolumes, *vol)
}
return addContainersAndVolumesToPodObject(podContainers, podVolumes, p.Name(), &dnsInfo, hostNetwork), nil
}
func addContainersAndVolumesToPodObject(containers []v1.Container, volumes []v1.Volume, podName string, dnsOptions *v1.PodDNSConfig, hostNetwork bool) *v1.Pod {
tm := v12.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
}
// Add a label called "app" with the containers name as a value
labels := make(map[string]string)
labels["app"] = removeUnderscores(podName)
om := v12.ObjectMeta{
// The name of the pod is container_name-libpod
Name: podName,
Labels: labels,
// CreationTimestamp seems to be required, so adding it; in doing so, the timestamp
// will reflect time this is run (not container create time) because the conversion
// of the container create time to v1 Time is probably not warranted nor worthwhile.
CreationTimestamp: v12.Now(),
}
ps := v1.PodSpec{
Containers: containers,
Volumes: volumes,
HostNetwork: hostNetwork,
}
if dnsOptions != nil {
ps.DNSConfig = dnsOptions
}
p := v1.Pod{
TypeMeta: tm,
ObjectMeta: om,
Spec: ps,
}
return &p
}
// simplePodWithV1Containers is a function used by inspect when kube yaml needs to be generated
// for a single container. we "insert" that container description in a pod.
func simplePodWithV1Containers(ctrs []*Container) (*v1.Pod, error) {
kubeCtrs := make([]v1.Container, 0, len(ctrs))
kubeVolumes := make([]v1.Volume, 0)
hostNetwork := true
podDNS := v1.PodDNSConfig{}
for _, ctr := range ctrs {
if !ctr.HostNetwork() {
hostNetwork = false
}
kubeCtr, kubeVols, ctrDNS, err := containerToV1Container(ctr)
if err != nil {
return nil, err
}
kubeCtrs = append(kubeCtrs, kubeCtr)
kubeVolumes = append(kubeVolumes, kubeVols...)
// Combine DNS information in sum'd structure
if ctrDNS != nil {
// nameservers
if servers := ctrDNS.Nameservers; servers != nil {
if podDNS.Nameservers == nil {
podDNS.Nameservers = make([]string, 0)
}
for _, s := range servers {
if !util.StringInSlice(s, podDNS.Nameservers) { // only append if it does not exist
podDNS.Nameservers = append(podDNS.Nameservers, s)
}
}
}
// search domains
if domains := ctrDNS.Searches; domains != nil {
if podDNS.Searches == nil {
podDNS.Searches = make([]string, 0)
}
for _, d := range domains {
if !util.StringInSlice(d, podDNS.Searches) { // only append if it does not exist
podDNS.Searches = append(podDNS.Searches, d)
}
}
}
// dns options
if options := ctrDNS.Options; options != nil {
if podDNS.Options == nil {
podDNS.Options = make([]v1.PodDNSConfigOption, 0)
}
podDNS.Options = append(podDNS.Options, options...)
}
} // end if ctrDNS
}
return addContainersAndVolumesToPodObject(kubeCtrs, kubeVolumes, strings.ReplaceAll(ctrs[0].Name(), "_", ""), &podDNS, hostNetwork), nil
}
// containerToV1Container converts information we know about a libpod container
// to a V1.Container specification.
func containerToV1Container(c *Container) (v1.Container, []v1.Volume, *v1.PodDNSConfig, error) {
kubeContainer := v1.Container{}
kubeVolumes := []v1.Volume{}
kubeSec, err := generateKubeSecurityContext(c)
if err != nil {
return kubeContainer, kubeVolumes, nil, err
}
if len(c.config.Spec.Linux.Devices) > 0 {
// TODO Enable when we can support devices and their names
kubeContainer.VolumeDevices = generateKubeVolumeDeviceFromLinuxDevice(c.Spec().Linux.Devices)
return kubeContainer, kubeVolumes, nil, errors.Wrapf(define.ErrNotImplemented, "linux devices")
}
if len(c.config.UserVolumes) > 0 {
// TODO When we until we can resolve what the volume name should be, this is disabled
// Volume names need to be coordinated "globally" in the kube files.
volumeMounts, volumes, err := libpodMountsToKubeVolumeMounts(c)
if err != nil {
return kubeContainer, kubeVolumes, nil, err
}
kubeContainer.VolumeMounts = volumeMounts
kubeVolumes = append(kubeVolumes, volumes...)
}
envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env)
if err != nil {
return kubeContainer, kubeVolumes, nil, err
}
portmappings, err := c.PortMappings()
if err != nil {
return kubeContainer, kubeVolumes, nil, err
}
ports, err := ocicniPortMappingToContainerPort(portmappings)
if err != nil {
return kubeContainer, kubeVolumes, nil, err
}
containerCommands := c.Command()
kubeContainer.Name = removeUnderscores(c.Name())
_, image := c.Image()
kubeContainer.Image = image
kubeContainer.Stdin = c.Stdin()
// prepend the entrypoint of the container to command
if ep := c.Entrypoint(); len(c.Entrypoint()) > 0 {
ep = append(ep, containerCommands...)
containerCommands = ep
}
kubeContainer.Command = containerCommands
// TODO need to figure out how we handle command vs entry point. Kube appears to prefer entrypoint.
// right now we just take the container's command
//container.Args = args
kubeContainer.WorkingDir = c.WorkingDir()
kubeContainer.Ports = ports
// This should not be applicable
//container.EnvFromSource =
kubeContainer.Env = envVariables
kubeContainer.SecurityContext = kubeSec
kubeContainer.StdinOnce = false
kubeContainer.TTY = c.config.Spec.Process.Terminal
if c.config.Spec.Linux != nil &&
c.config.Spec.Linux.Resources != nil {
if c.config.Spec.Linux.Resources.Memory != nil &&
c.config.Spec.Linux.Resources.Memory.Limit != nil {
if kubeContainer.Resources.Limits == nil {
kubeContainer.Resources.Limits = v1.ResourceList{}
}
qty := kubeContainer.Resources.Limits.Memory()
qty.Set(*c.config.Spec.Linux.Resources.Memory.Limit)
kubeContainer.Resources.Limits[v1.ResourceMemory] = *qty
}
if c.config.Spec.Linux.Resources.CPU != nil &&
c.config.Spec.Linux.Resources.CPU.Quota != nil &&
c.config.Spec.Linux.Resources.CPU.Period != nil {
quota := *c.config.Spec.Linux.Resources.CPU.Quota
period := *c.config.Spec.Linux.Resources.CPU.Period
if quota > 0 && period > 0 {
cpuLimitMilli := int64(1000 * util.PeriodAndQuotaToCores(period, quota))
// Kubernetes: precision finer than 1m is not allowed
if cpuLimitMilli >= 1 {
if kubeContainer.Resources.Limits == nil {
kubeContainer.Resources.Limits = v1.ResourceList{}
}
qty := kubeContainer.Resources.Limits.Cpu()
qty.SetMilli(cpuLimitMilli)
kubeContainer.Resources.Limits[v1.ResourceCPU] = *qty
}
}
}
}
// Obtain the DNS entries from the container
dns := v1.PodDNSConfig{}
// DNS servers
if servers := c.config.DNSServer; len(servers) > 0 {
dnsServers := make([]string, 0)
for _, server := range servers {
dnsServers = append(dnsServers, server.String())
}
dns.Nameservers = dnsServers
}
// DNS search domains
if searches := c.config.DNSSearch; len(searches) > 0 {
dns.Searches = searches
}
// DNS options
if options := c.config.DNSOption; len(options) > 0 {
dnsOptions := make([]v1.PodDNSConfigOption, 0)
for _, option := range options {
// the option can be "k:v" or just "k", no delimiter is required
opts := strings.SplitN(option, ":", 2)
dnsOpt := v1.PodDNSConfigOption{
Name: opts[0],
Value: &opts[1],
}
dnsOptions = append(dnsOptions, dnsOpt)
}
dns.Options = dnsOptions
}
return kubeContainer, kubeVolumes, &dns, nil
}
// ocicniPortMappingToContainerPort takes an ocicni portmapping and converts
// it to a v1.ContainerPort format for kube output
func ocicniPortMappingToContainerPort(portMappings []ocicni.PortMapping) ([]v1.ContainerPort, error) {
containerPorts := make([]v1.ContainerPort, 0, len(portMappings))
for _, p := range portMappings {
var protocol v1.Protocol
switch strings.ToUpper(p.Protocol) {
case "TCP":
protocol = v1.ProtocolTCP
case "UDP":
protocol = v1.ProtocolUDP
default:
return containerPorts, errors.Errorf("unknown network protocol %s", p.Protocol)
}
cp := v1.ContainerPort{
// Name will not be supported
HostPort: p.HostPort,
HostIP: p.HostIP,
ContainerPort: p.ContainerPort,
Protocol: protocol,
}
containerPorts = append(containerPorts, cp)
}
return containerPorts, nil
}
// libpodEnvVarsToKubeEnvVars converts a key=value string slice to []v1.EnvVar
func libpodEnvVarsToKubeEnvVars(envs []string) ([]v1.EnvVar, error) {
envVars := make([]v1.EnvVar, 0, len(envs))
for _, e := range envs {
split := strings.SplitN(e, "=", 2)
if len(split) != 2 {
return envVars, errors.Errorf("environment variable %s is malformed; should be key=value", e)
}
ev := v1.EnvVar{
Name: split[0],
Value: split[1],
}
envVars = append(envVars, ev)
}
return envVars, nil
}
// libpodMountsToKubeVolumeMounts converts the containers mounts to a struct kube understands
func libpodMountsToKubeVolumeMounts(c *Container) ([]v1.VolumeMount, []v1.Volume, error) {
// TODO when named volumes are supported in play kube, also parse named volumes here
_, mounts := c.sortUserVolumes(c.config.Spec)
vms := make([]v1.VolumeMount, 0, len(mounts))
vos := make([]v1.Volume, 0, len(mounts))
for _, m := range mounts {
vm, vo, err := generateKubeVolumeMount(m)
if err != nil {
return vms, vos, err
}
vms = append(vms, vm)
vos = append(vos, vo)
}
return vms, vos, nil
}
// generateKubeVolumeMount takes a user specified mount and returns
// a kubernetes VolumeMount (to be added to the container) and a kubernetes Volume
// (to be added to the pod)
func generateKubeVolumeMount(m specs.Mount) (v1.VolumeMount, v1.Volume, error) {
vm := v1.VolumeMount{}
vo := v1.Volume{}
name, err := convertVolumePathToName(m.Source)
if err != nil {
return vm, vo, err
}
vm.Name = name
vm.MountPath = m.Destination
if util.StringInSlice("ro", m.Options) {
vm.ReadOnly = true
}
vo.Name = name
vo.HostPath = &v1.HostPathVolumeSource{}
vo.HostPath.Path = m.Source
isDir, err := isHostPathDirectory(m.Source)
// neither a directory or a file lives here, default to creating a directory
// TODO should this be an error instead?
var hostPathType v1.HostPathType
switch {
case err != nil:
hostPathType = v1.HostPathDirectoryOrCreate
case isDir:
hostPathType = v1.HostPathDirectory
default:
hostPathType = v1.HostPathFile
}
vo.HostPath.Type = &hostPathType
return vm, vo, nil
}
func isHostPathDirectory(hostPathSource string) (bool, error) {
info, err := os.Stat(hostPathSource)
if err != nil {
return false, err
}
return info.Mode().IsDir(), nil
}
func convertVolumePathToName(hostSourcePath string) (string, error) {
if len(hostSourcePath) == 0 {
return "", errors.Errorf("hostSourcePath must be specified to generate volume name")
}
if len(hostSourcePath) == 1 {
if hostSourcePath != "/" {
return "", errors.Errorf("hostSourcePath malformatted: %s", hostSourcePath)
}
// add special case name
return "root", nil
}
// First, trim trailing slashes, then replace slashes with dashes.
// Thus, /mnt/data/ will become mnt-data
return strings.Replace(strings.Trim(hostSourcePath, "/"), "/", "-", -1), nil
}
func determineCapAddDropFromCapabilities(defaultCaps, containerCaps []string) *v1.Capabilities {
var (
drop = []v1.Capability{}
add = []v1.Capability{}
)
dedupDrop := make(map[string]bool)
dedupAdd := make(map[string]bool)
// Find caps in the defaultCaps but not in the container's
// those indicate a dropped cap
for _, capability := range defaultCaps {
if !util.StringInSlice(capability, containerCaps) {
if _, ok := dedupDrop[capability]; !ok {
drop = append(drop, v1.Capability(capability))
dedupDrop[capability] = true
}
}
}
// Find caps in the container but not in the defaults; those indicate
// an added cap
for _, capability := range containerCaps {
if !util.StringInSlice(capability, defaultCaps) {
if _, ok := dedupAdd[capability]; !ok {
add = append(add, v1.Capability(capability))
dedupAdd[capability] = true
}
}
}
return &v1.Capabilities{
Add: add,
Drop: drop,
}
}
func capAddDrop(caps *specs.LinuxCapabilities) (*v1.Capabilities, error) {
g, err := generate.New("linux")
if err != nil {
return nil, err
}
// Combine all the default capabilities into a slice
defaultCaps := append(g.Config.Process.Capabilities.Ambient, g.Config.Process.Capabilities.Bounding...)
defaultCaps = append(defaultCaps, g.Config.Process.Capabilities.Effective...)
defaultCaps = append(defaultCaps, g.Config.Process.Capabilities.Inheritable...)
defaultCaps = append(defaultCaps, g.Config.Process.Capabilities.Permitted...)
// Combine all the container's capabilities into a slice
containerCaps := append(caps.Ambient, caps.Bounding...)
containerCaps = append(containerCaps, caps.Effective...)
containerCaps = append(containerCaps, caps.Inheritable...)
containerCaps = append(containerCaps, caps.Permitted...)
calculatedCaps := determineCapAddDropFromCapabilities(defaultCaps, containerCaps)
return calculatedCaps, nil
}
// generateKubeSecurityContext generates a securityContext based on the existing container
func generateKubeSecurityContext(c *Container) (*v1.SecurityContext, error) {
priv := c.Privileged()
ro := c.IsReadOnly()
allowPrivEscalation := !c.config.Spec.Process.NoNewPrivileges
newCaps, err := capAddDrop(c.config.Spec.Process.Capabilities)
if err != nil {
return nil, err
}
var selinuxOpts v1.SELinuxOptions
opts := strings.SplitN(c.config.Spec.Annotations[define.InspectAnnotationLabel], ":", 2)
if len(opts) == 2 {
switch opts[0] {
case "type":
selinuxOpts.Type = opts[1]
case "level":
selinuxOpts.Level = opts[1]
}
}
if len(opts) == 1 {
if opts[0] == "disable" {
selinuxOpts.Type = "spc_t"
}
}
sc := v1.SecurityContext{
Capabilities: newCaps,
Privileged: &priv,
SELinuxOptions: &selinuxOpts,
// RunAsNonRoot is an optional parameter; our first implementations should be root only; however
// I'm leaving this as a bread-crumb for later
//RunAsNonRoot: &nonRoot,
ReadOnlyRootFilesystem: &ro,
AllowPrivilegeEscalation: &allowPrivEscalation,
}
if c.User() != "" {
if !c.batched {
c.lock.Lock()
defer c.lock.Unlock()
}
if err := c.syncContainer(); err != nil {
return nil, errors.Wrapf(err, "unable to sync container during YAML generation")
}
logrus.Debugf("Looking in container for user: %s", c.User())
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.User(), nil)
if err != nil {
return nil, err
}
uid := int64(execUser.Uid)
gid := int64(execUser.Gid)
sc.RunAsUser = &uid
sc.RunAsGroup = &gid
}
return &sc, nil
}
// generateKubeVolumeDeviceFromLinuxDevice takes a list of devices and makes a VolumeDevice struct for kube
func generateKubeVolumeDeviceFromLinuxDevice(devices []specs.LinuxDevice) []v1.VolumeDevice {
volumeDevices := make([]v1.VolumeDevice, 0, len(devices))
for _, d := range devices {
vd := v1.VolumeDevice{
// TBD How are we going to sync up these names
//Name:
DevicePath: d.Path,
}
volumeDevices = append(volumeDevices, vd)
}
return volumeDevices
}
func removeUnderscores(s string) string {
return strings.Replace(s, "_", "", -1)
}