prometheus部署

axing
2023-09-03 / 0 评论 / 7 阅读 / 正在检测是否收录...
温馨提示:
本文最后更新于2024年09月29日,已超过115天没有更新,若内容或图片失效,请留言反馈。

一、 二进制安装prometheus server
物理机直接部署

下载最新版二进制包见:https://prometheus.io/download ,
下载历史版本见:https://github.com/prometheus/prometheus/tags
LTS: 2.53.x版
注意:最新版本的prometheus,可能会出现grafana上模板没有数据,不兼容的新规则的问题

========================================》二进制安装prometheus server

# 1、先做个软连接方便后续升级
ln -s /monitor/prometheus-2.53.0.linux-amd64 /monitor/prometheus
mkdir /monitor/prometheus/data # 创建tsdb数据目录
# 2、添加系统服务
cat > /usr/lib/systemd/system/prometheus.service << 'EOF'
[Unit]
Description=prometheus server daemon

[Service]
Restart=on-failure
ExecStart=/monitor/prometheus/prometheus --config.file=/monitor/prometheus/prometheus.yml --storage.tsdb.path=/monitor/prometheus/data  --storage.tsdb.retention.time=30d  --web.enable-lifecycle 

[Install]
WantedBy=multi-user.target

EOF

# 3、启动
systemctl daemon-reload 
systemctl enable prometheus.service
systemctl start prometheus.service
systemctl status prometheus
netstat -tunalp |grep 9090

测试

下载并构建测试程序,作为被监控者,对外暴漏了/metrics接口

yum install golang -y
git clone https://github.com/prometheus/client_golang.git
cd client_golang/examples/random

export GO111MODULE=on
export GOPROXY=https://goproxy.cn
go build # 得到一个二进制命令random

然后在 3 个独立的终端里面运行 3 个服务:

./random -listen-address=:8080  # 对外暴漏http://localhost:8080/metrics
./random -listen-address=:8081  # 对外暴漏http://localhost:8081/metrics
./random -listen-address=:8082  # 对外暴漏http://localhost:8080/metrics

因为都对外暴漏了/metrics接口,并且数据格式遵循prometheus规范,所以我们可以在prometheus.yml中添加监控项
假设8080与8081是生产实例,8082是金丝雀实例,那么我们放到不同的target里然后用标签区分,配置如下

    scrape_configs:
      - job_name: 'example-random'
        # Override the global default and scrape targets from this job every 5 seconds.
        scrape_interval: 5s
        static_configs:
          - targets: ['192.168.110.101:8080', '192.168.110.101:8081']
            labels:
              group: 'production'
          - targets: ['192.168.110.101:8082']
            labels:
              group: 'canary'
systemctl restart prometheus
然后查看页面http://192.168.110.101:9090/ 点击Status---》Targets,发现有新增的监控job

二、 安装prometheus server到k8s

#prometheus-cm.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s # Prometheus每隔15s就会从所有配置的目标端点抓取最新的数据
      scrape_timeout: 15s  # 某个抓取操作在 15 秒内未完成,会被视为超时,不会包含在最新的数据中。
      evaluation_interval: 15s #  # 每15s对告警规则进行计算
    scrape_configs:
      - job_name: "prometheus"
        static_configs:
          - targets: ["localhost:9090"]
          

prometheus-pv-pvc.yaml

apiVersion: v1
kind: PersistentVolume
metadata:
  name: prometheus-local
  labels:
    app: prometheus
spec:
  accessModes:
    - ReadWriteOnce
  capacity:
    storage: 20Gi
  storageClassName: local-storage
  local:
    path: /data/k8s/prometheus
  nodeAffinity:
    required:
      nodeSelectorTerms:
        - matchExpressions:
            - key: kubernetes.io/hostname
              operator: In
              values:
                - master01
  persistentVolumeReclaimPolicy: Retain
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus-data
  namespace: monitor
spec:
  selector:
    matchLabels:
      app: prometheus
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 20Gi
  storageClassName: local-storage

prometheus-rbac.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: monitor
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
  - apiGroups:
      - ''
    resources:
      - nodes
      - services
      - endpoints
      - pods
      - nodes/proxy
    verbs:
      - get
      - list
      - watch
  - apiGroups:
      - 'extensions'
    resources:
      - ingresses
    verbs:
      - get
      - list
      - watch
  - apiGroups:
      - ''
    resources:
      - configmaps
      - nodes/metrics
    verbs:
      - get
  - nonResourceURLs: # 用来对非资源型 metrics 进行操作的权限声明
      - /metrics
    verbs:
      - get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole # 由于我们要获取的资源信息,在每一个 namespace 下面都有可能存在,所以我们这里使用的是 ClusterRole 的资源对象
  name: prometheus
subjects:
  - kind: ServiceAccount
    name: prometheus
    namespace: monitor

prometheus-deploy.yaml

apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus
  namespace: monitor
  labels:
    app: prometheus
spec:
  selector:
    matchLabels:
      app: prometheus
  template:
    metadata:
      labels:
        app: prometheus
    spec:
      serviceAccountName: prometheus
      securityContext:  # 确保这里的缩进使用的是空格
        runAsUser: 0
      containers:
        - image: registry.cn-guangzhou.aliyuncs.com/xingcangku/oooo:1.0
          name: prometheus
          args:
            - '--config.file=/etc/prometheus/prometheus.yml'
            - '--storage.tsdb.path=/prometheus'
            - '--storage.tsdb.retention.time=24h'
            - '--web.enable-admin-api'
            - '--web.enable-lifecycle'
          ports:
            - containerPort: 9090
              name: http
          volumeMounts:
            - mountPath: '/etc/prometheus'
              name: config-volume
            - mountPath: '/prometheus'
              name: data
          resources:
            requests:
              cpu: 100m
              memory: 512Mi
            limits:
              cpu: 100m
              memory: 512Mi
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: prometheus-data
        - name: config-volume
          configMap:
            name: prometheus-config

prometheus-svc.yaml

apiVersion: v1
kind: Service
metadata:
  name: prometheus
  namespace: monitor
  labels:
    app: prometheus
spec:
  selector:
    app: prometheus
  type: NodePort
  ports:
    - name: web
      port: 9090
      targetPort: 9090
      #targetPort: http
kubectl create namespace monitor
kubectl  apply -f cm.yaml 
mkdir /data/k8s/prometheus # 在pv所亲和的节点上创建
kubectl  apply -f pv-pvc.yaml
kubectl  apply -f rbac.yaml 
kubectl  apply -f deploy.yaml 
kubectl  apply -f svc.yaml 

# 把二进制的停掉
systemctl stop prometheus
systemctl disable prometheus

# 添加监控项,然后apply -f
[root@master01 monitor]# cat prometheus-cm.yaml 
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitor
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s  # Prometheus 每隔 15 秒从所有配置的目标端点抓取最新的数据
      scrape_timeout: 15s   # 某个抓取操作在 15 秒内未完成,会被视为超时,不会包含在最新的数据中。
      evaluation_interval: 15s  # 每 15 秒对告警规则进行计算
    
    scrape_configs:
      - job_name: "prometheus"
        static_configs:
          - targets: ["localhost:9090"]
      
      - job_name: 'example-random'
        scrape_interval: 5s
        static_configs:
          - targets: ['192.168.110.101:8080', '192.168.110.101:8081']
            labels:
              group: 'production'
          - targets: ['192.168.110.101:8082']
            labels:
              group: 'canary'

#重载服务
[root@master01 monitor]# kubectl  -n monitor get pods -o wide
NAME                          READY   STATUS    RESTARTS   AGE     IP                NODE       NOMINATED NODE   READINESS GATES
prometheus-7b644bfcfc-l5twf   1/1     Running   0          5h25m   10.244.0.18       master01   <none>           <none>
[root@master01 monitor]# curl -X POST "http://10.108.206.132:9090/-/reload"

三、监控应用软件
(1)服务自带/metrics接口,直接监控,在配置中增加下述target,然后apply -f

      - job_name: "coredns"
        static_configs:
          - targets: ["kube-dns.kube-system.svc.cluster.local:9153"]
等一会后
curl -X POST "http://10.108.206.132:9090/-/reload"

(2)应用软件没有自带自带/metrics接口,需要安装对应的exporter

exporter官网地址:https://prometheus.io/docs/instrumenting/exporters/
安装redis
yum install redis -y
            
sed -ri 's/bind 127.0.0.1/bind 0.0.0.0/g' /etc/redis.conf
sed -ri 's/port 6379/port 16379/g' /etc/redis.conf


cat >> /etc/redis.conf << "EOF"
requirepass 123456
EOF

systemctl restart redis
systemctl status redis



添加redis_exporter来采集redis的监控信息
wget https://github.com/oliver006/redis_exporter/releases/download/v1.61.0/redis_exporter-v1.61.0.linux-amd64.tar.gz

# 2、安装
tar xf redis_exporter-v1.61.0.linux-amd64.tar.gz 
mv redis_exporter-v1.61.0.linux-amd64/redis_exporter /usr/bin/

# 3、制作系统服务
cat > /usr/lib/systemd/system/redis_exporter.service << 'EOF'
[Unit]
Description=Redis Exporter
Wants=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Type=simple
ExecStart=/usr/bin/redis_exporter --redis.addr=redis://127.0.0.1:16379 --redis.password=123456 --web.listen-address=0.0.0.0:9122 --exclude-latency-histogram-metrics 

[Install]
WantedBy=multi-user.target

EOF

#4、启动
systemctl daemon-reload
systemctl restart redis_exporter
systemctl status redis_exporter

# 5、在cm中增加监控项目
              - job_name: "redis-server"  # 添加这一条
                static_configs:
                  - targets: ["192.168.71.101:9122"]
      
            kubectl  apply -f prometheus-cm.yaml 


# 6、过一会后,执行prometheus server的reload
        curl -X POST "http://10.108.206.132:9090/-/reload"


# 7、补充说明
如果你的redis-server跑在k8s中,那我们通常不会像上面一样裸部署一个redis_exporter,而是会以`sidecar` 的形式将redis_exporter和主应用redis_server部署在同一个 Pod 中,如下所示

# prome-redis.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis
  namespace: monitor
spec:
  selector:
    matchLabels:
      app: redis
  template:
    metadata:
      labels:
        app: redis
    spec:
      containers:
        - name: redis
          image: redis:4
          resources:
            requests:
              cpu: 100m
              memory: 100Mi
          ports:
            - containerPort: 6379
        - name: redis-exporter
          image: oliver006/redis_exporter:latest
          resources:
            requests:
              cpu: 100m
              memory: 100Mi
          ports:
            - containerPort: 9121
---
apiVersion: v1
kind: Service
metadata:
  name: redis
  namespace: monitor
spec:
  selector:
    app: redis
  ports:
    - name: redis
      port: 6379
      targetPort: 6379
    - name: prom
      port: 9121
      targetPort: 9121

# 然后你就可以用该svc的clusterip结合9121端口来访问/metrics接口
# curl 上面的svc的clusterip地址:9121/metrics

# 添加监控项,直接用svc名字即可, 更新prometheus-cm.yaml 的配置文件如下
        - job_name: 'redis'
          static_configs:
            - targets: ['redis:9121']
0

评论 (0)

取消