商业版有如下指标pod_cpu_usage_seconds_totalpod_memory_working_set_bytes,社区版没有。不过可以另辟蹊径,通过K8S内置的cadvisor来获取。

在这里插入图片描述

在prometheus来收集cadvisor计算的数据,可以通过container_cpu_usage_seconds_totalcontainer_memory_working_set_bytes获取函数pod的cpu和内存指标数据。

在这里插入图片描述

如果不嫌麻烦,prometheus配置好抓取cadvisor的数据就能做到获取函数pod的cpu和内存指标数据。不过也可以换装到openfaas里,直接在gateway里查询一遍搬到自己的指标上。

修改prometheus配置
1. 修改prometheus-rbac.yml

openfaas的集群权限不能收集个节点数据,所以需要改一下权限。将Role改成ClusterRole,RoleBinding改成ClusterRoleBinding。同时在resources一栏添加 nodes、nodes/proxy此举把prometheus变成集群级别的了,如果只是自己测试的话无所谓了

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: openfaas-prometheus
  namespace: "openfaas"
  labels:
    app: openfaas
    component: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: openfaas-prometheus
  namespace: "openfaas"
  labels:
    app: openfaas
    component: prometheus
rules:
- apiGroups: [""]
  resources:
    - services
    - endpoints
    - pods
    - nodes
    - nodes/proxy
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: openfaas-prometheus-fn
  namespace: "openfaas-fn"
  labels:
    app: openfaas
    component: prometheus
rules:
- apiGroups: [""]
  resources:
    - services
    - endpoints
    - pods
    - nodes
    - nodes/proxy
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: openfaas-prometheus
  namespace: "openfaas"
  labels:
    app: openfaas
    component: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: openfaas-prometheus
subjects:
- kind: ServiceAccount
  name: openfaas-prometheus
  namespace: "openfaas"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: openfaas-prometheus-fn
  namespace: "openfaas-fn"
  labels:
    app: openfaas
    component: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: openfaas-prometheus-fn
subjects:
- kind: ServiceAccount
  name: openfaas-prometheus
  namespace: "openfaas"
2. 修改prometheus-cfg.yml

在prometheus-cfg.yml里面,将prometheus.yml的scrape_configs里添加一个job,用于抓取cadvisor的数据。

  - job_name: 'kubernetes-cadvisor'
    scheme: https
    tls_config:
      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    kubernetes_sd_configs:
    - role: node
    relabel_configs:
    - target_label: __address__
      replacement: kubernetes.default.svc:443
    - source_labels: [__meta_kubernetes_node_name]
      regex: (.+)
      replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
      target_label: __metrics_path__
    - action: labelmap
      regex: __meta_kubernetes_node_label_(.+)
    - source_labels: [__meta_kubernetes_node_name]
      action: replace
      target_label: node
    - source_labels: [__meta_kubernetes_node_label_node]
      action: replace
      target_label: node_name
3. 重新部署
kubectl apply -f prometheus-rbac.yml
kubectl apply -f prometheus-cfg.yml
kubectl replace --force -f prometheus-dep.yml
修改gateway源码
1. 修改metrics.go

修改gateway/metrics/metrics.go,加俩指标。这里都使用GaugeVec类型,因为只是抓取cadvisor的数据来设置到我们自定义的指标上。

type MetricOptions struct {
	GatewayFunctionInvocation        *prometheus.CounterVec
	GatewayFunctionsHistogram        *prometheus.HistogramVec
	GatewayFunctionInvocationStarted *prometheus.CounterVec

	ServiceReplicasGauge *prometheus.GaugeVec

	ServiceMetrics *ServiceMetricOptions

	// 添加cpu和memory的指标
	PodCpuUsageSecondsTotal         *prometheus.GaugeVec
	PodMemoryWorkingSetBytes        *prometheus.GaugeVec
}



func BuildMetricsOptions() MetricOptions {
	// 省略一些代码

    // 添加如下
	podCpuUsageSecondsTotal := prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: "pod",
			Name:      "cpu_usage_seconds_total",
			Help:      "CPU seconds consumed by all the replicas of a given function.",
		},
		[]string{"function_name"},
	)

	podMemoryWorkingSetBytes := prometheus.NewGaugeVec(
		prometheus.GaugeOpts{
			Subsystem: "pod",
			Name:      "memory_working_set_bytes",
			Help:      "Bytes of RAM consumed by all the replicas of a given function",
		},
		[]string{"function_name"},
	)

    
	metricsOptions := MetricOptions{
		GatewayFunctionsHistogram:        gatewayFunctionsHistogram,
		GatewayFunctionInvocation:        gatewayFunctionInvocation,
		ServiceReplicasGauge:             serviceReplicas,
		ServiceMetrics:                   serviceMetricOptions,
		GatewayFunctionInvocationStarted: gatewayFunctionInvocationStarted,
        
        // 添加如下
		PodCpuUsageSecondsTotal:          podCpuUsageSecondsTotal,
		PodMemoryWorkingSetBytes:         podMemoryWorkingSetBytes,
	}
    
    return metricsOptions
}
2. 修改prometheus_query.go

修改gateway/metrics/prometheus_query.go,修改查询结果的结构体,因为查出来的和原本的数据不一样,得加俩参数。

type VectorQueryResponse struct {
	Data struct {
		Result []struct {
			Metric struct {
				Code         string `json:"code"`
				FunctionName string `json:"function_name"`

                // 添加如下
				Container    string `json:"container"`
				Namespace    string `json:"namespace"`
			}
			Value []interface{} `json:"value"`
		}
	}
}
3. 修改exporter.go

修改gateway/metrics/exporter.go。注意,修改完,exporter_test.go会报错,因为它也调了NewExporter,可以注释掉里面的函数。

type Exporter struct {
	metricOptions     MetricOptions
	services          []types.FunctionStatus
	credentials       *auth.BasicAuthCredentials
	FunctionNamespace string

    // 加这个,用来查询prometheus
	prometheusQuery   PrometheusQueryFetcher
}

// 加最后一个参数
func NewExporter(options MetricOptions, credentials *auth.BasicAuthCredentials, namespace string, prometheusQuery PrometheusQueryFetcher) *Exporter {
	return &Exporter{
		metricOptions:     options,
		services:          []types.FunctionStatus{},
		credentials:       credentials,
		FunctionNamespace: namespace,

        // 加这个
		prometheusQuery:   prometheusQuery,
	}
}

func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
	// 省略代码

    // 加下面
	e.metricOptions.PodCpuUsageSecondsTotal.Describe(ch)
	e.metricOptions.PodMemoryWorkingSetBytes.Describe(ch)
}

func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
	e.metricOptions.GatewayFunctionsHistogram.Collect(ch)
	e.metricOptions.GatewayFunctionInvocationStarted.Collect(ch)
	e.metricOptions.ServiceReplicasGauge.Reset()

	for _, service := range e.services {
		var serviceName string
		if len(service.Namespace) > 0 {
			serviceName = fmt.Sprintf("%s.%s", service.Name, service.Namespace)
		} else {
			serviceName = service.Name
		}

		e.metricOptions.ServiceReplicasGauge.WithLabelValues(serviceName).Set(float64(service.Replicas))

    	// 加这里,不然销毁的实例没数据
		e.metricOptions.PodCpuUsageSecondsTotal.WithLabelValues(serviceName).Set(0)
		e.metricOptions.PodMemoryWorkingSetBytes.WithLabelValues(serviceName).Set(0)
	}

    // 加这个来计算
	e.calc()

    // 添加如下
	e.metricOptions.PodCpuUsageSecondsTotal.Collect(ch)
	e.metricOptions.PodMemoryWorkingSetBytes.Collect(ch)

	e.metricOptions.GatewayFunctionInvocation.Collect(ch)

	e.metricOptions.ServiceReplicasGauge.Collect(ch)

	e.metricOptions.ServiceMetrics.Counter.Collect(ch)
	e.metricOptions.ServiceMetrics.Histogram.Collect(ch)
}

// ! 这个是新加的函数,直接放最底下。即将查出来的指标转成自己定义的
func (e *Exporter) calc() {
	q1 := `sum by(container, namespace) (container_cpu_usage_seconds_total{image!="",namespace="openfaas-fn", container!="POD"})`
	q2 := `sum by(container, namespace) (container_memory_working_set_bytes{image!="",namespace="openfaas-fn", container!="POD"})`

	q1Results, err := e.prometheusQuery.Fetch(url.QueryEscape(q1))
	if err != nil {
		log.Printf("Error querying q1: %s\n", err.Error())
		return
	}

	// cpu
	for _, v := range q1Results.Data.Result {
		metricValue := v.Value[1]
		f, _ := strconv.ParseFloat(metricValue.(string), 64)
		e.metricOptions.PodCpuUsageSecondsTotal.WithLabelValues(fmt.Sprintf("%s.%s", v.Metric.Container, v.Metric.Namespace)).Set(f)
	}

	q2Results, err := e.prometheusQuery.Fetch(url.QueryEscape(q2))
	if err != nil {
		log.Printf("Error querying q2: %s\n", err.Error())
		return
	}

	// memory
	for _, v := range q2Results.Data.Result {
		metricValue := v.Value[1]
		f, _ := strconv.ParseFloat(metricValue.(string), 64)
		e.metricOptions.PodMemoryWorkingSetBytes.WithLabelValues(fmt.Sprintf("%s.%s", v.Metric.Container, v.Metric.Namespace)).Set(f)
	}
}
4. 修改main.go
// 这个原本是在底下,得往上移
prometheusQuery := metrics.NewPrometheusQuery(config.PrometheusHost, config.PrometheusPort, &http.Client{})

// 在原来的基础上加个prometheusQuery参数
exporter := metrics.NewExporter(metricsOptions, credentials, config.Namespace, prometheusQuery)
部署和结果

之后重新打包部署gateway就行。查询可以看到cpu指标、memory指标和原本的一样。
在这里插入图片描述

Logo

瓜分20万奖金 获得内推名额 丰厚实物奖励 易参与易上手

更多推荐