kubelet执行CNI插件
kubelet启动时,默认读取如下目录CNI插件配置文件,如读取10-flannel.conflist配置文件以使用flannel插件:
# ll /etc/cni/net.d/
total 4
-rw-r--r-- 1 root root 292 Jun 4 22:44 10-flannel.conflist
# cat /etc/cni/net.d/10-flannel.conflist
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
kubelet仅支持使用一套CNI网络方案,若/etc/cni/net.d/有多套CNI配置文件,则按特定顺序排序使用第一个。
kubelet拉起Pod,在infra容器启动后会根据上述配置文件基于CNI协议调网络插件,在该infra容器Network Namespace中配置网络栈。
/opt/cni/bin/包含CNI插件所需的基础可执行文件,如下所示,其中/opt/cni/bin/flannel为flannel容器网络提供的CNI插件:
# ll /opt/cni/bin/
total 52116
-rwxr-xr-x 1 root root 2782728 Jan 19 05:09 bandwidth
-rwxr-xr-x 1 root root 3104192 Jan 19 05:09 bridge
-rwxr-xr-x 1 root root 7607056 Jan 19 05:09 dhcp
-rwxr-xr-x 1 root root 2863024 Jan 19 05:09 dummy
-rwxr-xr-x 1 root root 3165352 Jan 19 05:09 firewall
-rwxr-xr-x 1 root root 2342446 Jun 4 22:44 flannel
-rwxr-xr-x 1 root root 2775224 Jan 19 05:09 host-device
-rwxr-xr-x 1 root root 2332792 Jan 19 05:09 host-local
-rwxr-xr-x 1 root root 2871792 Jan 19 05:09 ipvlan
-rwxr-xr-x 1 root root 2396976 Jan 19 05:09 loopback
-rwxr-xr-x 1 root root 2893624 Jan 19 05:09 macvlan
-rwxr-xr-x 1 root root 2689440 Jan 19 05:09 portmap
-rwxr-xr-x 1 root root 3000032 Jan 19 05:09 ptp
-rwxr-xr-x 1 root root 2542400 Jan 19 05:09 sbr
-rwxr-xr-x 1 root root 2074072 Jan 19 05:09 static
-rwxr-xr-x 1 root root 2456920 Jan 19 05:09 tuning
-rwxr-xr-x 1 root root 2867512 Jan 19 05:09 vlan
-rwxr-xr-x 1 root root 2566424 Jan 19 05:09 vrf
这类可执行文件按照功能分为三类:
- main插件: 创具体网络设备
- IPAM(IP Address Management)插件: 分配IP
- 内置CNI插件, 如flannel
容器网络方案,可划分为两部分工作:
- 实现网络方案本身
如flanneld主要逻辑,创建和配置flannel.1设备、配置宿主机路由、配置ARP和FDB表等
- 实现网络方案对应的CNI插件
如/opt/cni/bin/flannel bridge等可执行文件,用于配置Infra容器里网络栈,并把它连接在CNI网桥上
kubelet关键代码分析
基于1.23版本分析
kubelet初始化容器运行时服务模块时,会初始化网络配置:
func (plugin *cniNetworkPlugin) syncNetworkConfig() {
network, err := getDefaultCNINetwork(plugin.confDir, plugin.binDirs)
if err != nil {
klog.InfoS("Unable to update cni config", "err", err)
return
}
plugin.setDefaultNetwork(network)
}
getDefaultCNINetwork获取CNI配置文件,其中confDir默认为/etc/cni/net.d:
func getDefaultCNINetwork(confDir string, binDirs []string) (*cniNetwork, error) {
files, err := libcni.ConfFiles(confDir, []string{".conf", ".conflist", ".json"})
switch {
case err != nil:
return nil, err
case len(files) == 0:
return nil, fmt.Errorf("no networks found in %s", confDir)
}
cniConfig := &libcni.CNIConfig{Path: binDirs}
sort.Strings(files)
// 获取所有配置文件,并返回第一个有效的配置文件
for _, confFile := range files {
var confList *libcni.NetworkConfigList
if strings.HasSuffix(confFile, ".conflist") {
confList, err = libcni.ConfListFromFile(confFile)
if err != nil {
klog.InfoS("Error loading CNI config list file", "path", confFile, "err", err)
continue
}
} else {
conf, err := libcni.ConfFromFile(confFile)
......
return &cniNetwork{
name: confList.Name,
NetworkConfig: confList,
CNIConfig: cniConfig,
Capabilities: caps,
}, nil
}
return nil, fmt.Errorf("no valid networks found in %s", confDir)
}
kubelet CNI关键接口如下,重点关注SetUpPod和TearDownPod方法:
// NetworkPlugin is an interface to network plugins for the kubelet
type NetworkPlugin interface {
// Init initializes the plugin. This will be called exactly once
// before any other methods are called.
Init(host Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error
// Called on various events like:
// NET_PLUGIN_EVENT_POD_CIDR_CHANGE
Event(name string, details map[string]interface{})
// Name returns the plugin's name. This will be used when searching
// for a plugin by name, e.g.
Name() string
// Returns a set of NET_PLUGIN_CAPABILITY_*
Capabilities() utilsets.Int
// SetUpPod is the method called after the infra container of
// the pod has been created but before the other containers of the
// pod are launched.
SetUpPod(namespace string, name string, podSandboxID kubecontainer.ContainerID, annotations, options map[string]string) error
// TearDownPod is the method called before a pod's infra container will be deleted
TearDownPod(namespace string, name string, podSandboxID kubecontainer.ContainerID) error
// GetPodNetworkStatus is the method called to obtain the ipv4 or ipv6 addresses of the container
GetPodNetworkStatus(namespace string, name string, podSandboxID kubecontainer.ContainerID) (*PodNetworkStatus, error)
// Status returns error if the network plugin is in error state
Status() error
}
PodSandboxManager.RunPodSandbox创建Pod沙箱时,会调用SetUpPod配置容器网络,cniNetworkPlugin.SetUpPod主要做的事情如下:
1. check插件是否初始化
/etc/cni/net.d/需有CNI配置文件
2. 构建libcni.RuntimeConf对象
基于容器namespace路径、名称、注解等信息构建,RuntimeConf对象与后续ENV参数 stdin参数相关
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (*libcni.RuntimeConf, error) {
rt := &libcni.RuntimeConf{
ContainerID: podSandboxID.ID,
NetNS: podNetnsPath,
IfName: network.DefaultInterfaceName,
CacheDir: plugin.cacheDir,
// 对应ENC参数 CNI_ARGS内容
Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}
......
// CNI插件capacities参数,只有CNI插件支持的capacities参数才会注入到插件stdin参数里
rt.CapabilityArgs = map[string]interface{}{
portMappingsCapability: portMappingsParam,
}
......
if egress != nil {
bandwidthParam.EgressRate = int(egress.Value())
// Limit EgressBurst to math.MaxInt32, in practice limiting to 2Gbit is the equivalent of setting no limit
bandwidthParam.EgressBurst = math.MaxInt32
}
rt.CapabilityArgs[bandwidthCapability] = bandwidthParam
}
// Set the PodCIDR
rt.CapabilityArgs[ipRangesCapability] = [][]cniIPRange{{{Subnet: plugin.podCidr}}}
// Set dns capability args.
if dnsOptions, ok := options["dns"]; ok {
dnsConfig := runtimeapi.DNSConfig{}
err := json.Unmarshal([]byte(dnsOptions), &dnsConfig)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal dns config %q: %v", dnsOptions, err)
}
if dnsParam := buildDNSCapabilities(&dnsConfig); dnsParam != nil {
rt.CapabilityArgs[dnsCapability] = *dnsParam
}
}
return rt, nil
}
3. 获取CNI可执行文件
如 /opt/cni/bin/flannel
4. 进一步校验libcni.RuntimeConf对象并基于其构建NetworkConfig对象
5. 执行CNI可执行文件
入参有两部分:
1). 环境变量
RuntimeConf对象会转换为Args,并转换为ENV。
构建RuntimeConf代码:
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations, options map[string]string) (*libcni.RuntimeConf, error) {
rt := &libcni.RuntimeConf{
ContainerID: podSandboxID.ID,
NetNS: podNetnsPath,
IfName: network.DefaultInterfaceName,
CacheDir: plugin.cacheDir,
Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}
......
// CapabilityArgs参数准备,数据源为pod注解 CRI(获取dnsOption等配置)
rt.CapabilityArgs=...
......
}
基于RuntimeConf构建Args:
// =====
func (c *CNIConfig) args(action string, rt *RuntimeConf) *invoke.Args {
return &invoke.Args{
Command: action,
ContainerID: rt.ContainerID,
NetNS: rt.NetNS,
PluginArgs: rt.Args,
IfName: rt.IfName,
Path: strings.Join(c.Path, string(os.PathListSeparator)),
}
}
Args转换为ENV:
func (args *Args) AsEnv() []string {
env := os.Environ()
pluginArgsStr := args.PluginArgsStr
if pluginArgsStr == "" {
pluginArgsStr = stringify(args.PluginArgs)
}
// Duplicated values which come first will be overridden, so we must put the
// custom values in the end to avoid being overridden by the process environments.
env = append(env,
"CNI_COMMAND="+args.Command,
"CNI_CONTAINERID="+args.ContainerID,
"CNI_NETNS="+args.NetNS,
"CNI_ARGS="+pluginArgsStr,
"CNI_IFNAME="+args.IfName,
"CNI_PATH="+args.Path,
)
return dedupEnv(env)
}
capabilities会注入到stdin参数的runtimeConfig属性,并不会放在ENV里。当前版本支持如下capabilities:
portMappingsCapability = "portMappings"
ipRangesCapability = "ipRanges"
bandwidthCapability = "bandwidth"
dnsCapability = "dns"
capabilities参数详见:https://github.com/containernetworking/cni/blob/main/CONVENTIONS.md
2).CNI配置文件里的配置信息
主要是plugins字段及capacities.
plugins字段示例:
# cat /etc/cni/net.d/10-flannel.conflist
{
"name": "cbr0",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "flannel",
"delegate": {
"hairpinMode": true,
"isDefaultGateway": true
}
},
{
"type": "portmap",
"capabilities": {
"portMappings": true
}
}
]
}
完整配置文件字段参考:https://github.com/containernetworking/cni/blob/main/SPEC.md#network-configuration
准备完调用参数后,kubelet执行CNI可执行文件部分代码:
// AddNetworkList executes a sequence of plugins with the ADD command
func (c *CNIConfig) AddNetworkList(ctx context.Context, list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) {
var err error
var result types.Result
// 一个个执行/etc/cni/net.d/xxx文件 plugins字段定义的插件,如/opt/cni/bin/flannel /opt/cni/bin/portmap
for _, net := range list.Plugins {
result, err = c.addNetwork(ctx, list.Name, list.CNIVersion, net, result, rt)
if err != nil {
return nil, err
}
}
if err = c.cacheAdd(result, list.Bytes, list.Name, rt); err != nil {
return nil, fmt.Errorf("failed to set network %q cached result: %v", list.Name, err)
}
return result, nil
}
func ExecPluginWithResult(ctx context.Context, pluginPath string, netconf []byte, args CNIArgs, exec Exec) (types.Result, error) {
if exec == nil {
exec = defaultExec
}
// 两部分参数,ENV和stdin(netconf)
stdoutBytes, err := exec.ExecPlugin(ctx, pluginPath, netconf, args.AsEnv())
if err != nil {
return nil, err
}
// Plugin must return result in same version as specified in netconf
versionDecoder := &version.ConfigDecoder{}
confVersion, err := versionDecoder.Decode(netconf)
if err != nil {
return nil, err
}
return version.NewResult(confVersion, stdoutBytes)
}
CNI插件被kubelet执行,其入口代码示例如下:
import (
"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/cni/pkg/version"
"github.com/containernetworking/plugins/pkg/ns"
bv "github.com/containernetworking/plugins/pkg/utils/buildversion"
,,,,,,
)
func init() {
runtime.LockOSThread()
}
var (
// 插件执行一次时,保存使用的Pod名和命名空间
PodName string
PodNS string
InfraContainerID string
)
func main() {
skel.PluginMain(cmdAdd, cmdCheck, cmdDel, version.PluginSupports("0.3.0", "0.3.1", "0.4.0"), bv.BuildString("cubecni"))
}
func cmdAdd(args *skel.CmdArgs) error {
// 解析args,获取网络插件配置 / stdin参数 / 网络命名空间ID / 网卡名 / 容器ID等参数
netNS, stdInCfg, k8sCfg, err := ParseCMDArgs(args)
if err != nil {
log.Errorf("failed to parse cmd args: err [%s]", err)
return err
}
defer netNS.Close()
// 网络插件逻辑实现
......
}
// cmdDel
// cmdCheck
以上便是kubelet调用CNI插件相关代码介绍。