Prometheus部署流程
程序下载
wget https://github.com/prometheus/prometheus/releases/download/v2.30.3/prometheus-2.30.3.linux-amd64.tar.gz
解压并移动
tar -zxvf prometheus-2.30.3.linux-amd64.tar.gz
mv prometheus-2.30.3.linux-amd64 /usr/local/prometheus
添加到系统服务
Unit配置文件
vi /usr/lib/systemd/system/prometheus.service
[Unit]
Description=Prometheus
Documentation=https://prometheus.io
[Service]
Type=simple
ExecStart=/usr/local/prometheus/prometheus \
--config.file=/usr/local/prometheus/prometheus.yml \
--storage.tsdb.path=/usr/local/prometheus/data
Restart=on-failure
WatchdogSec=10s
[Install]
WantedBy=multi-user.target
启动程序
sudo systemctl daemon-reload
sudo systemctl start prometheus.service
sudo systemctl status prometheus.service
开机自启
sudo systemctl enable prometheus.service
简单使用
Prometheus默认端口是9090,程序启动之后从浏览器访问页面。
输入以下表达式来绘制在自抓取Prometheus中发生的每秒HTTP请求率返回状态代码200
的图表:
rate(promhttp_metric_handler_requests_total{code="200"}[1m])
配置文件
重新加载
curl -X POST http://127.0.0.1:9090/-/reload
Kubernetes部署脚本
Deployment&Service
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
labels:
app: prometheus
spec:
replicas: 1
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 1
type: RollingUpdate
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
initContainers:
- name: prometheus-data-permission-setup
image: busybox
command: ["/bin/chmod","-R","777", "/data"]
volumeMounts:
- name: prometheus-data
mountPath: /data
containers:
- name: prometheus
image: prom/prometheus
args:
- '--storage.tsdb.path=/prometheus'
- '--config.file=/etc/prometheus/prometheus.yml'
command:
- /bin/prometheus
ports:
- name: web
containerPort: 9090
resources:
limits:
cpu: 20m
memory: 150Mi
requests:
cpu: 5m
memory: 80Mi
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
- name: prometheus-data
mountPath: /prometheus
restartPolicy: Always
securityContext: {}
terminationGracePeriodSeconds: 30
serviceAccountName: prometheus
volumes:
- name: config-volume
configMap:
name: prometheus-config
- name: prometheus-data
persistentVolumeClaim:
claimName: prometheus-pvc
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
spec:
type: ClusterIP
selector:
app: prometheus
ports:
- port: 9090
targetPort: 9090
protocol: TCP
RBAC
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- ingresses
verbs:
- get
- list
- watch
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: kube-basic
PV&PVC
apiVersion: v1
kind: PersistentVolume
metadata:
name: node1-prometheus-pv
spec:
capacity:
storage: 2Gi
accessModes:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: node1-prometheus-pv
nfs:
path: /data/nfs/kubernetes/prometheus
server: node1
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: prometheus-pvc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 2Gi
storageClassName: node1-prometheus-pv
Ingress
kind: Ingress
apiVersion: networking.k8s.io/v1
metadata:
name: prometheus-ingress
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
rules:
- host: prometheus.ormissia.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: prometheus
port:
number: 9090
Node Exporter Deployment&Service
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
labels:
app: node-exporter
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
spec:
hostPID: true
hostIPC: true
hostNetwork: true
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: node-exporter
image: prom/node-exporter
ports:
- containerPort: 9100
resources:
limits:
cpu: 10m
memory: 30Mi
requests:
cpu: 2m
memory: 10Mi
securityContext:
privileged: true
args:
- --path.procfs
- /host/proc
- --path.sysfs
- /host/sys
- --path.rootfs
- /host/root
- --collector.filesystem.ignored-mount-points
- ^/(sys|proc|dev|host|etc)($|/)
- --collector.processes
volumeMounts:
- mountPath: /host/dev
name: dev
- mountPath: /host/proc
name: proc
- mountPath: /host/sys
name: sys
- mountPath: /host/root
name: rootfs
volumes:
- name: proc
hostPath:
path: /proc
- name: dev
hostPath:
path: /dev
- name: sys
hostPath:
path: /sys
- name: rootfs
hostPath:
path: /
Prometheus Config
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node1-exporter"
static_configs:
- targets: ["gateway-node-exporter:9100"]
relabel_configs:
- source_labels: [__address__]
regex: (.*)
replacement: gateway
target_label: instance
action: replace
- job_name: 'kubernetes-node-exporter'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- source_labels: [__meta_kubernetes_node_name]
action: replace
target_label: node
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_node_address_InternalIP]
action: replace
target_label: ip