searchusermenu
  • 发布文章
  • 消息中心
点赞
收藏
评论
分享
原创

karpenter技术介绍-3-controller介绍

2025-09-22 10:33:41
0
0

karpenter把功能拆得很细,用到大量controller

func NewControllers(
    mgr manager.Manager,
    clock clock.Clock,
    kubeClient client.Client,
    recorder events.Recorder,
    cloudProvider cloudprovider.CloudProvider,
) []controller.Controller {
 
    // 整个集群的缓存
    cluster := state.NewCluster(clock, kubeClient)
    // 核心控制器,启用新的NodeClaim以兼容pending pod
    p := provisioning.NewProvisioner(kubeClient, recorder, cloudProvider, cluster)
    // pod驱逐队列,生产者为node terminator
    evictionQueue := terminator.NewQueue(kubeClient, recorder)
    // 中断队列,生产者为disruption controller
    disruptionQueue := orchestration.NewQueue(kubeClient, recorder, cluster, clock, p)
 
    return []controller.Controller{
        p, evictionQueue, disruptionQueue,
        // 中断控制器,判断NodeClaim是否需要中断,如需要则发送命令到中断队列
        disruption.NewController(clock, kubeClient, p, cloudProvider, recorder, cluster, disruptionQueue),
        // watch pending pod,发送到Provisioner的批处理窗口
        provisioning.NewPodController(kubeClient, p),
        // watch 可调度的node,发送到Provisioner的批处理窗口
        provisioning.NewNodeController(kubeClient, p),
        // 更新hash到nodepool,更新到所有对应NodeClaim的annotations中
        nodepoolhash.NewController(kubeClient),
        // 删除过期的NodeClaim
        expiration.NewController(clock, kubeClient),
        // 以下五个控制器用于更新cluster缓存
        informer.NewDaemonSetController(kubeClient, cluster),
        informer.NewNodeController(kubeClient, cluster),
        informer.NewPodController(kubeClient, cluster),
        informer.NewNodePoolController(kubeClient, cluster),
        informer.NewNodeClaimController(kubeClient, cluster),
        // watch deleting node,执行的操作包括删除 NodeClaim、确保底层实例被删除等,最后移除node的finalizer
        termination.NewController(clock, kubeClient, cloudProvider, terminator.NewTerminator(clock, kubeClient, evictionQueue, recorder), recorder),
        // 以下三个控制器用于统计指标
        metricspod.NewController(kubeClient),
        metricsnodepool.NewController(kubeClient),
        metricsnode.NewController(cluster),
        // 当nodepool的nodeclass引用发生变化时,校验是否存在nodeclass,并更新nodepool的conditions
        nodepoolreadiness.NewController(kubeClient, cloudProvider),
        // 统计当前集群已经分配给nodepool的资源容量,并更新resources字段(cpu、mem)
        nodepoolcounter.NewController(kubeClient, cluster),
        // 校验nodepool的属性是否合法,结果更新到Conditions中
        nodepoolvalidation.NewController(kubeClient),
        // watch pod,更新对应NodeClaim的LastPodEventTime
        podevents.NewController(clock, kubeClient),
        // 检查NodeClaim的一致性,并更新NodeClaim的Conditions
        nodeclaimconsistency.NewController(clock, kubeClient, recorder),
        // NodeClaim生命周期控制器,调用provider进行创建/删除,维护NodeClaim的conditions
        nodeclaimlifecycle.NewController(clock, kubeClient, cloudProvider, recorder),
        // NodeClaim gc控制器,当NodeClaim为已注册且对应node为not ready且时,删除该NodeClaim
        nodeclaimgarbagecollection.NewController(clock, kubeClient, cloudProvider),
        // 根据nodeclaim状态维护NodeClaim的 Drifted 和 Consolidatable 两个condiction,会影响NodeClaim是否会被中断
        nodeclaimdisruption.NewController(clock, kubeClient, cloudProvider),
        status.NewController[*v1.NodeClaim](kubeClient, mgr.GetEventRecorderFor("karpenter")),
        status.NewController[*v1.NodePool](kubeClient, mgr.GetEventRecorderFor("karpenter")),
    }
}

其中比较关键的controller有Provisioner、disruption和nodeclaimlifecycle

扩容流程

当出现pending状态的pod时会触发扩容流程

中断流程

当NodeClaim的Drifted或者其他指定condiction为true,以及集群出现过剩容量时,会触发中断流程

 

 

 

 

 

 

 

 

0条评论
0 / 1000
卖肥料的老父亲
7文章数
1粉丝数
卖肥料的老父亲
7 文章 | 1 粉丝
卖肥料的老父亲
7文章数
1粉丝数
卖肥料的老父亲
7 文章 | 1 粉丝
原创

karpenter技术介绍-3-controller介绍

2025-09-22 10:33:41
0
0

karpenter把功能拆得很细,用到大量controller

func NewControllers(
    mgr manager.Manager,
    clock clock.Clock,
    kubeClient client.Client,
    recorder events.Recorder,
    cloudProvider cloudprovider.CloudProvider,
) []controller.Controller {
 
    // 整个集群的缓存
    cluster := state.NewCluster(clock, kubeClient)
    // 核心控制器,启用新的NodeClaim以兼容pending pod
    p := provisioning.NewProvisioner(kubeClient, recorder, cloudProvider, cluster)
    // pod驱逐队列,生产者为node terminator
    evictionQueue := terminator.NewQueue(kubeClient, recorder)
    // 中断队列,生产者为disruption controller
    disruptionQueue := orchestration.NewQueue(kubeClient, recorder, cluster, clock, p)
 
    return []controller.Controller{
        p, evictionQueue, disruptionQueue,
        // 中断控制器,判断NodeClaim是否需要中断,如需要则发送命令到中断队列
        disruption.NewController(clock, kubeClient, p, cloudProvider, recorder, cluster, disruptionQueue),
        // watch pending pod,发送到Provisioner的批处理窗口
        provisioning.NewPodController(kubeClient, p),
        // watch 可调度的node,发送到Provisioner的批处理窗口
        provisioning.NewNodeController(kubeClient, p),
        // 更新hash到nodepool,更新到所有对应NodeClaim的annotations中
        nodepoolhash.NewController(kubeClient),
        // 删除过期的NodeClaim
        expiration.NewController(clock, kubeClient),
        // 以下五个控制器用于更新cluster缓存
        informer.NewDaemonSetController(kubeClient, cluster),
        informer.NewNodeController(kubeClient, cluster),
        informer.NewPodController(kubeClient, cluster),
        informer.NewNodePoolController(kubeClient, cluster),
        informer.NewNodeClaimController(kubeClient, cluster),
        // watch deleting node,执行的操作包括删除 NodeClaim、确保底层实例被删除等,最后移除node的finalizer
        termination.NewController(clock, kubeClient, cloudProvider, terminator.NewTerminator(clock, kubeClient, evictionQueue, recorder), recorder),
        // 以下三个控制器用于统计指标
        metricspod.NewController(kubeClient),
        metricsnodepool.NewController(kubeClient),
        metricsnode.NewController(cluster),
        // 当nodepool的nodeclass引用发生变化时,校验是否存在nodeclass,并更新nodepool的conditions
        nodepoolreadiness.NewController(kubeClient, cloudProvider),
        // 统计当前集群已经分配给nodepool的资源容量,并更新resources字段(cpu、mem)
        nodepoolcounter.NewController(kubeClient, cluster),
        // 校验nodepool的属性是否合法,结果更新到Conditions中
        nodepoolvalidation.NewController(kubeClient),
        // watch pod,更新对应NodeClaim的LastPodEventTime
        podevents.NewController(clock, kubeClient),
        // 检查NodeClaim的一致性,并更新NodeClaim的Conditions
        nodeclaimconsistency.NewController(clock, kubeClient, recorder),
        // NodeClaim生命周期控制器,调用provider进行创建/删除,维护NodeClaim的conditions
        nodeclaimlifecycle.NewController(clock, kubeClient, cloudProvider, recorder),
        // NodeClaim gc控制器,当NodeClaim为已注册且对应node为not ready且时,删除该NodeClaim
        nodeclaimgarbagecollection.NewController(clock, kubeClient, cloudProvider),
        // 根据nodeclaim状态维护NodeClaim的 Drifted 和 Consolidatable 两个condiction,会影响NodeClaim是否会被中断
        nodeclaimdisruption.NewController(clock, kubeClient, cloudProvider),
        status.NewController[*v1.NodeClaim](kubeClient, mgr.GetEventRecorderFor("karpenter")),
        status.NewController[*v1.NodePool](kubeClient, mgr.GetEventRecorderFor("karpenter")),
    }
}

其中比较关键的controller有Provisioner、disruption和nodeclaimlifecycle

扩容流程

当出现pending状态的pod时会触发扩容流程

中断流程

当NodeClaim的Drifted或者其他指定condiction为true,以及集群出现过剩容量时,会触发中断流程

 

 

 

 

 

 

 

 

文章来自个人专栏
文章 | 订阅
0条评论
0 / 1000
请输入你的评论
0
0