一、组件说明
#如果已安装metrics-server需要先卸载,否则冲突
1. MetricServer:是kubernetes集群资源使用情况的聚合器,收集数据给kubernetes集群内使用,如kubectl,hpa,scheduler等。
2. PrometheusOperator:是一个系统监测和警报工具箱,用来存储监控数据。
3. NodeExporter:用于各node的关键度量指标状态数据。
4. KubeStateMetrics:收集kubernetes集群内资源对象数据,制定告警规则。
5. Prometheus:采用pull方式收集apiserver,scheduler,controller-manager,kubelet组件数据,通过http协议传输。
6. Grafana:是可视化数据统计和监控平台。
二、安装部署
项目地址:https://github.com/prometheus-operator/kube-prometheus
三、版本选择
可参考官方文档https://github.com/prometheus-operator/kube-prometheus?tab=readme-ov-file#compatibility,例如 k8s 版本为 1.30,推荐的 kube-Prometheus 版本为release-0.14
四、克隆项目至本地
git clone -b release-0.13 https://github.com/prometheus-operator/kube-prometheus.git
五、创建资源对象
#如果是国内 要改镜像地址
[root@master1 k8s-install]# kubectl create namespace monitoring
[root@master1 k8s-install]# cd kube-prometheus/
[root@master1 kube-prometheus]# kubectl apply --server-side -f manifests/setup
[root@master1 kube-prometheus]# kubectl wait \
--for condition=Established \
--all CustomResourceDefinition \
--namespace=monitoring
[root@master1 kube-prometheus]# kubectl apply -f manifests/
root@k8s01:~/helm/prometheus/kube-prometheus# kubectl apply --server-side -f manifests/setup
customresourcedefinition.apiextensions.k8s.io/alertmanagerconfigs.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/prometheusagents.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/scrapeconfigs.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com serverside-applied
customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com serverside-applied
namespace/monitoring serverside-applied
root@k8s01:~/helm/prometheus/kube-prometheus# kubectl wait \
> --for condition=Established \
> --all CustomResourceDefinitions \
> --namespace=monitoring
customresourcedefinition.apiextensions.k8s.io/alertmanagerconfigs.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/alertmanagers.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/certificaterequests.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/certificates.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/challenges.acme.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/clusterissuers.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/ingressroutes.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/ingressroutetcps.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/ingressrouteudps.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/instrumentations.opentelemetry.io condition met
customresourcedefinition.apiextensions.k8s.io/issuers.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/middlewares.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/middlewaretcps.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/opampbridges.opentelemetry.io condition met
customresourcedefinition.apiextensions.k8s.io/opentelemetrycollectors.opentelemetry.io condition met
customresourcedefinition.apiextensions.k8s.io/orders.acme.cert-manager.io condition met
customresourcedefinition.apiextensions.k8s.io/podmonitors.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/policybindings.sts.min.io condition met
customresourcedefinition.apiextensions.k8s.io/probes.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/prometheusagents.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/prometheuses.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/prometheusrules.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/scrapeconfigs.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/serverstransports.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/serverstransporttcps.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/servicemonitors.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/targetallocators.opentelemetry.io condition met
customresourcedefinition.apiextensions.k8s.io/thanosrulers.monitoring.coreos.com condition met
customresourcedefinition.apiextensions.k8s.io/tlsoptions.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/tlsstores.traefik.io condition met
customresourcedefinition.apiextensions.k8s.io/traefikservices.traefik.io condition met
root@k8s01:~/helm/prometheus/kube-prometheus# kubectl apply -f manifests/
alertmanager.monitoring.coreos.com/main created
networkpolicy.networking.k8s.io/alertmanager-main created
poddisruptionbudget.policy/alertmanager-main created
prometheusrule.monitoring.coreos.com/alertmanager-main-rules created
secret/alertmanager-main created
service/alertmanager-main created
serviceaccount/alertmanager-main created
servicemonitor.monitoring.coreos.com/alertmanager-main created
clusterrole.rbac.authorization.k8s.io/blackbox-exporter created
clusterrolebinding.rbac.authorization.k8s.io/blackbox-exporter created
configmap/blackbox-exporter-configuration created
deployment.apps/blackbox-exporter created
networkpolicy.networking.k8s.io/blackbox-exporter created
service/blackbox-exporter created
serviceaccount/blackbox-exporter created
servicemonitor.monitoring.coreos.com/blackbox-exporter created
secret/grafana-config created
secret/grafana-datasources created
configmap/grafana-dashboard-alertmanager-overview created
configmap/grafana-dashboard-apiserver created
configmap/grafana-dashboard-cluster-total created
configmap/grafana-dashboard-controller-manager created
configmap/grafana-dashboard-grafana-overview created
configmap/grafana-dashboard-k8s-resources-cluster created
configmap/grafana-dashboard-k8s-resources-multicluster created
configmap/grafana-dashboard-k8s-resources-namespace created
configmap/grafana-dashboard-k8s-resources-node created
configmap/grafana-dashboard-k8s-resources-pod created
configmap/grafana-dashboard-k8s-resources-workload created
configmap/grafana-dashboard-k8s-resources-workloads-namespace created
configmap/grafana-dashboard-kubelet created
configmap/grafana-dashboard-namespace-by-pod created
configmap/grafana-dashboard-namespace-by-workload created
configmap/grafana-dashboard-node-cluster-rsrc-use created
configmap/grafana-dashboard-node-rsrc-use created
configmap/grafana-dashboard-nodes-aix created
configmap/grafana-dashboard-nodes-darwin created
configmap/grafana-dashboard-nodes created
configmap/grafana-dashboard-persistentvolumesusage created
configmap/grafana-dashboard-pod-total created
configmap/grafana-dashboard-prometheus-remote-write created
configmap/grafana-dashboard-prometheus created
configmap/grafana-dashboard-proxy created
configmap/grafana-dashboard-scheduler created
configmap/grafana-dashboard-workload-total created
configmap/grafana-dashboards created
deployment.apps/grafana created
networkpolicy.networking.k8s.io/grafana created
prometheusrule.monitoring.coreos.com/grafana-rules created
service/grafana created
serviceaccount/grafana created
servicemonitor.monitoring.coreos.com/grafana created
prometheusrule.monitoring.coreos.com/kube-prometheus-rules created
clusterrole.rbac.authorization.k8s.io/kube-state-metrics created
clusterrolebinding.rbac.authorization.k8s.io/kube-state-metrics created
deployment.apps/kube-state-metrics created
networkpolicy.networking.k8s.io/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kube-state-metrics-rules created
service/kube-state-metrics created
serviceaccount/kube-state-metrics created
servicemonitor.monitoring.coreos.com/kube-state-metrics created
prometheusrule.monitoring.coreos.com/kubernetes-monitoring-rules created
servicemonitor.monitoring.coreos.com/kube-apiserver created
servicemonitor.monitoring.coreos.com/coredns created
servicemonitor.monitoring.coreos.com/kube-controller-manager created
servicemonitor.monitoring.coreos.com/kube-scheduler created
servicemonitor.monitoring.coreos.com/kubelet created
clusterrole.rbac.authorization.k8s.io/node-exporter created
clusterrolebinding.rbac.authorization.k8s.io/node-exporter created
daemonset.apps/node-exporter created
networkpolicy.networking.k8s.io/node-exporter created
prometheusrule.monitoring.coreos.com/node-exporter-rules created
service/node-exporter created
serviceaccount/node-exporter created
servicemonitor.monitoring.coreos.com/node-exporter created
clusterrole.rbac.authorization.k8s.io/prometheus-k8s created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-k8s created
networkpolicy.networking.k8s.io/prometheus-k8s created
poddisruptionbudget.policy/prometheus-k8s created
prometheus.monitoring.coreos.com/k8s created
prometheusrule.monitoring.coreos.com/prometheus-k8s-prometheus-rules created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s-config created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
rolebinding.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s-config created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
role.rbac.authorization.k8s.io/prometheus-k8s created
service/prometheus-k8s created
serviceaccount/prometheus-k8s created
servicemonitor.monitoring.coreos.com/prometheus-k8s created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
clusterrole.rbac.authorization.k8s.io/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/system:aggregated-metrics-reader created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-adapter created
clusterrolebinding.rbac.authorization.k8s.io/resource-metrics:system:auth-delegator created
clusterrole.rbac.authorization.k8s.io/resource-metrics-server-resources created
configmap/adapter-config created
deployment.apps/prometheus-adapter created
networkpolicy.networking.k8s.io/prometheus-adapter created
poddisruptionbudget.policy/prometheus-adapter created
rolebinding.rbac.authorization.k8s.io/resource-metrics-auth-reader created
service/prometheus-adapter created
serviceaccount/prometheus-adapter created
servicemonitor.monitoring.coreos.com/prometheus-adapter created
clusterrole.rbac.authorization.k8s.io/prometheus-operator created
clusterrolebinding.rbac.authorization.k8s.io/prometheus-operator created
deployment.apps/prometheus-operator created
networkpolicy.networking.k8s.io/prometheus-operator created
prometheusrule.monitoring.coreos.com/prometheus-operator-rules created
service/prometheus-operator created
serviceaccount/prometheus-operator created
servicemonitor.monitoring.coreos.com/prometheus-operator created
root@k8s01:~/helm/prometheus/kube-prometheus#
六、验证查看
#查看pod状态
root@k8s03:~# kubectl get pod -n monitoring
NAME READY STATUS RESTARTS AGE
alertmanager-main-0 2/2 Running 0 50m
alertmanager-main-1 2/2 Running 0 50m
alertmanager-main-2 2/2 Running 0 50m
blackbox-exporter-57bb665766-d9kwj 3/3 Running 0 50m
grafana-fdf8c48f-f6cck 1/1 Running 0 50m
kube-state-metrics-5ffdd9685c-hg5hc 3/3 Running 0 50m
node-exporter-8l29v 2/2 Running 0 31m
node-exporter-gdclz 2/2 Running 0 28m
node-exporter-j5r76 2/2 Running 0 50m
prometheus-adapter-7945bdf5d7-dh75k 1/1 Running 0 50m
prometheus-adapter-7945bdf5d7-nbp94 1/1 Running 0 50m
prometheus-k8s-0 2/2 Running 0 50m
prometheus-k8s-1 2/2 Running 0 50m
prometheus-operator-85c5ffc677-jk8c9 2/2 Running 0 50m
#查看top信息
root@k8s03:~# kubectl top node
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8s01 3277m 40% 6500Mi 66%
k8s02 6872m 85% 4037Mi 36%
k8s03 362m 4% 6407Mi 65%
七、新增ingress资源
#以ingress-nginx为例
[root@master1 manifests]# cat ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: alertmanager
namespace: monitoring
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
spec:
ingressClassName: nginx
rules:
- host: alertmanager.local.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: alertmanager-main
port:
number: 9093
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
namespace: monitoring
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
spec:
ingressClassName: nginx
rules:
- host: grafana.local.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: grafana
port:
number: 3000
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus
namespace: monitoring
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
spec:
ingressClassName: nginx
rules:
- host: prometheus.local.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus-k8s
port:
number: 9090
#以traefik为例:
[root@master1 manifests]# cat ingress.yaml
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: alertmanager
namespace: monitoring
spec:
entryPoints:
- web
routes:
- match: Host(`alertmanager.local.com`)
kind: Rule
services:
- name: alertmanager-main
port: 9093
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: grafana
namespace: monitoring
spec:
entryPoints:
- web
routes:
- match: Host(`grafana.local.com`)
kind: Rule
services:
- name: grafana
port: 3000
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: prometheus
namespace: monitoring
spec:
entryPoints:
- web
routes:
- match: Host(`prometheus.local.com`)
kind: Rule
services:
- name: prometheus-k8s
port: 9090
[root@master1 manifests]# kubectl apply -f ingress.yaml
ingressroute.traefik.containo.us/alertmanager created
ingressroute.traefik.containo.us/grafana created
ingressroute.traefik.containo.us/prometheus created
八、web访问验证
#新增hosts解析记录 win notepad $env:windir\System32\drivers\etc\hosts
192.168.3.200 alertmanager.local.com
192.168.3.200 prometheus.local.com
192.168.3.200 grafana.local.com
访问http://alertmanager.local.com:30080 ,查看当前激活的告警
访问http://prometheus.local.com/targets:30080,查看targets已全部up
访问http://grafana.local.com:30080/login,默认用户名和密码是admin/admin
查看数据源,以为我们自动配置Prometheus数据源
九、targets异常处理
查看targets可发现有两个监控任务没有对应的instance,这和serviceMonitor资源对象有关
root@k8s01:~/helm/prometheus/kube-prometheus# cat prometheus-kubeControllerManagerService.yaml
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager
labels:
app.kubernetes.io/name: kube-controller-manager
spec:
selector:
component: kube-controller-manager
type: ClusterIP
ports:
- name: https-metrics
port: 10257
targetPort: 10257
protocol: TCP
#新建prometheus-kubeControllerManagerService.yaml并apply创建资源
apiVersion: v1
kind: Service
metadata:
namespace: kube-system
name: kube-controller-manager
labels:
app.kubernetes.io/name: kube-controller-manager
spec:
selector:
component: kube-controller-manager
type: ClusterIP
ports:
- name: https-metrics
port: 10257
targetPort: 10257
protocol: TCP
评论 (0)