Kubernetes部署prometheus+alertmanager+grafana

原理相信大家看了好多了直接上部署过程!
创建一个monitoring命名空间由于存放prometheus相关的容器

Monitoring.yaml

apiVersion: v1
kind: Namespace
metadata:
  name: monitoring

Kubectl apply -f monitoring

部署node-exporter指标

Node-export.yaml

---
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitoring
  labels:
    k8s-app: node-exporter
spec:
  template:
    metadata:
      labels:
        k8s-app: node-exporter
    spec:
      containers:
      - image: prom/node-exporter
        name: node-exporter
        ports:
        - containerPort: 9100
          protocol: TCP
          name: http
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: node-exporter
  name: node-exporter
  namespace: monitoring
  annotations:
    prometheus.io/scrape: 'true'
spec:
  ports:
  - name: http
    port: 9100
    nodePort: 9100
    protocol: TCP
  type: NodePort
  selector:
    k8s-app: node-exporter

kubectl apply -f node-export.yaml #创建node-exporter容器
注意此处的prometheus.io/scrape: “true” (用于后续的kube-state-metrics endpoints使用)

部署kube-state-meteics指标

文件太多…请自行clone:https://github.com/kubernetes/kube-state-metrics
Yaml文件在$pwd/kubernetes下

部署prometheus

prometheus-configmap.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: kube-system
data:
  prometheus.yml: |
    global:
      scrape_interval:     15s
      evaluation_interval: 15s
    rule_files:
    - /etc/prometheus/rules.yml
    alerting:
      alertmanagers:
        - static_configs:
          - targets: ["alertmanager:9093"]
    scrape_configs:

    - job_name: 'kubernetes-apiservers'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics

    - job_name: 'kubernetes-cadvisor'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name
prometheus-configmap-rules.yaml 规则根据自己需求定义

Prometheus.deployment.yaml

apiVersion: apps/v1beta2
kind: Deployment
metadata:
  labels:
    name: prometheus-deployment
  name: prometheus
  namespace: kube-system
spec:
  replicas: 1
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      containers:
      - image: prom/prometheus:v2.0.0
        name: prometheus
        command:
        - "/bin/prometheus"
        args:
        - "--config.file=/etc/prometheus/prometheus.yml"
        - "--storage.tsdb.path=/prometheus"
        - "--storage.tsdb.retention=24h"
        ports:
        - containerPort: 9090
          protocol: TCP
        volumeMounts:
        - mountPath: "/prometheus"
          name: data
        - mountPath: "/etc/prometheus"
          name: config-volume
        - mountPath: "/etc/prometheus/rules"
          name: rules
        resources:
          requests:
            cpu: 100m
            memory: 100Mi
          limits:
            cpu: 500m
            memory: 2500Mi
      serviceAccountName: prometheus    
      volumes:
      - name: data
        emptyDir: {}
      - name: config-volume
        configMap:
          name: prometheus-config  
      - name: rules
        configMap:
          name: prometheus-rules
---
kind: Service
apiVersion: v1
metadata:
  labels:
    app: prometheus
  annotations:
    prometheus.io/scrape: "true"
  name: prometheus
  namespace: monitoring
spec:
  type: NodePort
  ports:
  - port: 9090
    targetPort: 9090
    protocol: TCP
    nodePort: 9090
  selector:
    app: prometheus

Kubectl apply -f prometheus-configmap.yaml
Kubectl apply -f prometheus-configmap-rules.yaml
Kubectl apply -f prometheus.deployment.yaml

部署alertmanager

Alertmanager-mail.yaml

---
kind: ConfigMap
apiVersion: v1
metadata:
  name: alertmanager
  namespace: monitoring
data:
  alertmanager.yml: |-
    global:
      smtp_smarthost: 						    ###邮箱服务端
      smtp_from: 							   	###发件人邮箱
      smtp_auth_username: 					    ###发件人邮箱认证
      smtp_auth_password: 					
      smtp_require_tls: false
    route:
      group_by: []								###自定义
      group_wait: 30s
      group_interval: 5m
      repeat_interval: 10m
      receiver: 								###自定义接收名称
    receivers:
    - name: 									###需与receiver的值对应
      email_configs:
      - to: ''									###收件人邮箱

Alertmanager.deployment.yaml

---
apiVersion: apps/v1beta2
kind: Deployment
metadata:
  labels:
    name: alertmanager-deployment
  name: alertmanager
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: alertmanager
  template:
    metadata:
      labels:
        app: alertmanager
    spec:
      containers:
      - image: prom/alertmanager:v0.16.1
        name: alertmanager
        ports:
        - containerPort: 9093
          protocol: TCP
        volumeMounts:
        - mountPath: "/alertmanager"
          name: data
        - mountPath: "/etc/alertmanager"
          name: config-volume
        resources:
          requests:
            cpu: 50m
            memory: 50Mi
          limits:
            cpu: 200m
            memory: 200Mi
      volumes:
      - name: data
        emptyDir: {}
      - name: config-volume
        configMap:
          name: alertmanager
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app: alertmanager
  annotations:
    prometheus.io/scrape: 'true'
  name: alertmanager
  namespace: monitoring
spec:
  type: NodePort
  ports:
  - port: 9093
    targetPort: 9093
    nodePort: 9093
  selector:
    app: alertmanager

Kubectl apply -f alertmanager-mail.yaml
Kubectl apply -f alertmanager.deployment.yaml

prometheus和alertmanager部署完成,可通过web页面访问prometheus和alertmanager

Prometheus访问地址:http://IP:9090
在这里插入图片描述
Alertmanager访问地址:http://IP:9093

在这里插入图片描述
prometheus 去status --> targets 查看我们添加配置
在这里插入图片描述

可以明确的看到我们添加的node-exporter和kube-state-metrics都被检测到,结合我们的rules测试一下报警及邮件发送

Rules的报警过程分为Active --> Pending --> Firing ,当rules的状态为Firing时,才会将报警信息发送给Alertmanager.
在这里插入图片描述
当rules变为红色,这时他就会想报警信息发给alertmanager,alertmanager就会发送邮箱报警了
在这里插入图片描述
在这里插入图片描述

部署grafana

grafana-deploy.yaml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: grafana-core
  namespace: monitoring
#  namespace: kube-system
  labels:
    app: grafana
    component: core
spec:
  replicas: 1
  template:
    metadata:
      labels:
        app: grafana
        component: core
    spec:
      containers:
      - image: grafana/grafana:4.2.0
        name: grafana-core
        imagePullPolicy: IfNotPresent
        # env:
        resources:
          # keep request = limit to keep this container in guaranteed class
          limits:
            cpu: 100m
            memory: 100Mi
          requests:
            cpu: 100m
            memory: 100Mi
        env:
          # The following env variables set up basic auth twith the default admin user and admin password.
          - name: GF_AUTH_BASIC_ENABLED
            value: "true"
          - name: GF_AUTH_ANONYMOUS_ENABLED
            value: "false"
          # - name: GF_AUTH_ANONYMOUS_ORG_ROLE
          #   value: Admin
          # does not really work, because of template variables in exported dashboards:
          # - name: GF_DASHBOARDS_JSON_ENABLED
          #   value: "true"
        readinessProbe:
          httpGet:
            path: /login
            port: 3000
          # initialDelaySeconds: 30
          # timeoutSeconds: 1
        volumeMounts:
        - name: grafana-persistent-storage
          mountPath: /var/lib/grafana
      volumes:
      - name: grafana-persistent-storage
        emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
  name: grafana
  namespace: monitoring
  labels:
    app: grafana
    component: core
spec:
  type: NodePort
  ports:
    - port: 3000
      nodePort: 3000
  selector:
    app: grafana
    component: core

Kubectl apply -f grafana-deploy.yaml

访问grafana:http://IP:3000

看到登录页面,点击登录

点击Add data source,添加数据源(url为prometheus地址)
在这里插入图片描述
然后点击Dashboards --> import 输入1621
在这里插入图片描述
点击load,选择事先创建好的数据源prometheus
在这里插入图片描述
点击import即可看到动态dashboard
在这里插入图片描述
至此,部署完成!


版权声明:本文为qq_31409207原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。