Kubernetes自动扩缩容:HPA与自定义指标
引言
在Kubernetes环境中,自动扩缩容是保证应用高可用性和资源利用率的关键能力。Horizontal Pod Autoscaler(HPA)是Kubernetes内置的自动扩缩容机制,本文将深入探讨HPA的工作原理、配置方法和最佳实践。
一、HPA工作原理
1.1 HPA架构
┌─────────────────────────────────────────────────────────────┐ │ HPA Controller │ │ │ │ │ ▼ │ │ ┌─────────────────────────────────────────────────────┐ │ │ │ 获取指标 (Metrics Server / Prometheus) │ │ │ └─────────────────────────────────────────────────────┘ │ │ │ │ │ ▼ │ │ ┌─────────────────────────────────────────────────────┐ │ │ │ 计算期望副本数 = ceil(current / target) │ │ │ └─────────────────────────────────────────────────────┘ │ │ │ │ │ ▼ │ │ ┌─────────────────────────────────────────────────────┐ │ │ │ 更新Deployment/ReplicaSet副本数 │ │ │ └─────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────┘1.2 HPA工作流程
- 收集指标:从Metrics Server或自定义指标API获取指标
- 计算副本数:根据目标值计算期望的Pod数量
- 更新副本数:调用API更新Deployment的副本数
- 等待稳定:等待Pod创建/销毁完成
二、基础HPA配置
2.1 基于CPU的HPA
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 2 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 702.2 基于内存的HPA
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 2 maxReplicas: 10 metrics: - type: Resource resource: name: memory target: type: AverageValue averageValue: 512Mi2.3 基于多个指标的HPA
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 2 maxReplicas: 10 metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 80 - type: Resource resource: name: memory target: type: Utilization averageUtilization: 702.4 Go语言创建HPA
package autoscaling import ( "context" "fmt" autoscalingv2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type HPAManager struct { client *K8sClient } func NewHPAManager(client *K8sClient) *HPAManager { return &HPAManager{client: client} } func (h *HPAManager) CreateHPA(ctx context.Context, name, namespace, deploymentName string, minReplicas, maxReplicas int32) error { hpa := &autoscalingv2.HorizontalPodAutoscaler{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, }, Spec: autoscalingv2.HorizontalPodAutoscalerSpec{ ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{ APIVersion: "apps/v1", Kind: "Deployment", Name: deploymentName, }, MinReplicas: &minReplicas, MaxReplicas: maxReplicas, Metrics: []autoscalingv2.MetricSpec{ { Type: autoscalingv2.ResourceMetricSourceType, Resource: &autoscalingv2.ResourceMetricSource{ Name: corev1.ResourceCPU, Target: autoscalingv2.MetricTarget{ Type: autoscalingv2.UtilizationMetricType, AverageUtilization: &[]int32{70}[0], }, }, }, }, }, } _, err := h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Create(ctx, hpa, metav1.CreateOptions{}) if err != nil { return fmt.Errorf("failed to create HPA: %w", err) } return nil } func (h *HPAManager) GetHPAStatus(ctx context.Context, name, namespace string) (*autoscalingv2.HorizontalPodAutoscalerStatus, error) { hpa, err := h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get HPA: %w", err) } return &hpa.Status, nil } func (h *HPAManager) UpdateHPAMetrics(ctx context.Context, name, namespace string, metrics []autoscalingv2.MetricSpec) error { hpa, err := h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get HPA: %w", err) } hpa.Spec.Metrics = metrics _, err = h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Update(ctx, hpa, metav1.UpdateOptions{}) if err != nil { return fmt.Errorf("failed to update HPA: %w", err) } return nil }三、自定义指标HPA
3.1 基于Prometheus指标的HPA
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 2 maxReplicas: 10 metrics: - type: Pods pods: metric: name: requests_per_second target: type: AverageValue averageValue: 100 - type: Object object: describedObject: apiVersion: v1 kind: Service name: myapp metric: name: queue_length target: type: Value value: 10003.2 自定义指标API配置
apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: name: myapp-monitor spec: selector: matchLabels: app: myapp endpoints: - port: http path: /metrics interval: 30s3.3 自定义指标暴露
package metrics import ( "fmt" "time" "github.com/prometheus/client_golang/prometheus" ) type CustomMetrics struct { requestsPerSecond *prometheus.CounterVec queueLength *prometheus.GaugeVec } func NewCustomMetrics() *CustomMetrics { cm := &CustomMetrics{ requestsPerSecond: prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "myapp_requests_per_second", Help: "Number of requests per second", }, []string{"endpoint"}, ), queueLength: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "myapp_queue_length", Help: "Length of the processing queue", }, []string{"queue"}, ), } prometheus.MustRegister(cm.requestsPerSecond) prometheus.MustRegister(cm.queueLength) return cm } func (cm *CustomMetrics) RecordRequest(endpoint string) { cm.requestsPerSecond.WithLabelValues(endpoint).Inc() } func (cm *CustomMetrics) SetQueueLength(queue string, length int) { cm.queueLength.WithLabelValues(queue).Set(float64(length)) } func (cm *CustomMetrics) StartMetricsExporter() { go func() { for { time.Sleep(time.Second) fmt.Println("Metrics exporter running") } }() }四、Cluster Autoscaler
4.1 Cluster Autoscaler配置
apiVersion: autoscaling/v1 kind: ClusterAutoscaler metadata: name: cluster-autoscaler spec: scaleDown: enabled: true delayAfterAdd: 10m delayAfterDelete: 5m delayAfterFailure: 3m unneededTime: 10m scaleUp: enabled: true minReplicaCount: 1 maxReplicaCount: 1004.2 Node Group配置
apiVersion: autoscaling.k8s.io/v1 kind: NodePool metadata: name: worker-pool spec: replicas: 3 template: spec: nodeSelector: node-role.kubernetes.io/worker: "" tolerations: - key: "node.kubernetes.io/role" operator: "Equal" value: "worker" effect: "NoSchedule"五、扩缩容策略
5.1 扩缩容延迟配置
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 2 maxReplicas: 10 behavior: scaleUp: stabilizationWindowSeconds: 300 policies: - type: Pods value: 2 periodSeconds: 60 selectPolicy: Max scaleDown: stabilizationWindowSeconds: 600 policies: - type: Percent value: 10 periodSeconds: 60 selectPolicy: Min5.2 扩缩容策略说明
| 参数 | 说明 | 默认值 |
|---|---|---|
| stabilizationWindowSeconds | 稳定窗口,避免频繁扩缩容 | scaleUp: 300s, scaleDown: 300s |
| policies[].type | 策略类型:Pods/Percent | - |
| policies[].value | 每次扩缩容数量/百分比 | - |
| policies[].periodSeconds | 策略生效周期 | 60s |
| selectPolicy | 多策略选择方式:Min/Max | Max |
六、HPA监控与调试
6.1 HPA状态检查
# 查看HPA状态 kubectl get hpa # 查看详细状态 kubectl describe hpa myapp-hpa # 查看事件 kubectl get events | grep hpa # 查看指标 kubectl get --raw /apis/metrics.k8s.io/v1beta1/namespaces/default/pods6.2 HPA状态分析
package autoscaling import ( "context" "fmt" autoscalingv2 "k8s.io/api/autoscaling/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func (h *HPAManager) AnalyzeHPA(ctx context.Context, name, namespace string) error { hpa, err := h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get HPA: %w", err) } fmt.Printf("=== HPA Analysis: %s ===\n", name) fmt.Printf("Current Replicas: %d\n", hpa.Status.CurrentReplicas) fmt.Printf("Desired Replicas: %d\n", hpa.Status.DesiredReplicas) fmt.Printf("Min Replicas: %d\n", *hpa.Spec.MinReplicas) fmt.Printf("Max Replicas: %d\n", hpa.Spec.MaxReplicas) fmt.Println("\n--- Metrics ---") for _, metric := range hpa.Status.Conditions { fmt.Printf("Type: %s, Status: %s, Reason: %s\n", metric.Type, metric.Status, metric.Reason) } return nil } func (h *HPAManager) GetHPAMetrics(ctx context.Context, name, namespace string) ([]autoscalingv2.MetricStatus, error) { hpa, err := h.client.clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { return nil, fmt.Errorf("failed to get HPA: %w", err) } return hpa.Status.Metrics, nil }七、最佳实践
7.1 HPA配置建议
apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: myapp-hpa spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment name: myapp minReplicas: 3 maxReplicas: 20 behavior: scaleUp: stabilizationWindowSeconds: 180 policies: - type: Percent value: 100 periodSeconds: 60 - type: Pods value: 4 periodSeconds: 60 selectPolicy: Max scaleDown: stabilizationWindowSeconds: 300 policies: - type: Percent value: 30 periodSeconds: 60 selectPolicy: Min metrics: - type: Resource resource: name: cpu target: type: Utilization averageUtilization: 757.2 注意事项
- 资源请求必须设置:HPA需要Pod的资源请求来计算利用率
- 避免震荡:设置合理的稳定窗口
- 监控扩缩容事件:定期检查HPA状态
- 预留资源:确保Node有足够的资源供新Pod使用
八、总结
Kubernetes自动扩缩容是实现弹性伸缩的核心能力:
- HPA:基于CPU/内存/自定义指标自动调整Pod副本数
- Cluster Autoscaler:根据Pod调度需求自动调整Node数量
- 扩缩容策略:通过配置控制扩缩容行为,避免震荡
- 自定义指标:支持基于业务指标的扩缩容
通过合理配置HPA和Cluster Autoscaler,可以实现自动化的资源管理,提高应用可用性和资源利用率。