storage and metrics

This commit is contained in:
royalcat 2025-03-03 16:47:59 +03:00
commit 5a0d6d639b
23 changed files with 6892 additions and 0 deletions

View file

@ -0,0 +1,60 @@
apiVersion: v1
metadata:
name: grafana-config
namespace: metrics
labels:
app.kubernetes.io/name: grafana
annotations:
use-subpath: "true"
data:
datasources.yaml: |
apiVersion: 1
datasources:
- name: Tempo
uid: tempo
orgId: 1
type: tempo
access: proxy
url: "http://tempo:3200"
jsonData:
tracesToLogsV2:
datasourceUid: loki
tracesToProfiles:
datasourceUid: pyroscope
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
- name: Loki
uid: loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
version: 1
editable: false
jsonData:
timeout: 60
maxLines: 1000
- name: Prometheus
uid: prometheus
type: prometheus
access: proxy
url: http://prometheus-server:9090
editable: false
jsonData:
httpMethod: "POST"
prometheusType: "Prometheus"
prometheusVersion: "2.9.1"
tlsSkipVerify: true
timeout: 30
- name: Pyroscope
uid: pyroscope
type: grafana-pyroscope-datasource
access: proxy
orgId: 1
url: http://pyroscope:4040
editable: false
deleteDatasources:
- name: "Prometheus"
uid: prometheus
orgId: 1
kind: ConfigMap

View file

@ -0,0 +1,13 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-data
namespace: metrics
labels:
app.kubernetes.io/name: grafana
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi

View file

@ -0,0 +1,84 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: grafana
namespace: metrics
labels:
app.kubernetes.io/name: grafana
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: grafana
strategy:
type: Recreate
template:
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: grafana
spec:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- name: grafana
image: grafana/grafana:11.1.0
env:
- name: GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP
value: "true"
- name: GF_AUTH_GENERIC_OAUTH_API_URL
value: https://sso.konfach.ru/realms/konfach/protocol/openid-connect/userinfo
- name: GF_AUTH_GENERIC_OAUTH_AUTH_URL
value: https://sso.konfach.ru/realms/konfach/protocol/openid-connect/auth
- name: GF_AUTH_GENERIC_OAUTH_CLIENT_ID
value: grafana
- name: GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET
value: oraMLSBuIaSPqZElSNRZ6gntM2xizjXL
- name: GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH
value: email
- name: GF_AUTH_GENERIC_OAUTH_ENABLED
value: "true"
- name: GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH
value: username
- name: GF_AUTH_GENERIC_OAUTH_NAME
value: KonfachSSO
- name: GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH
value: full_name
- name: GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH
value: contains(realm_access.roles[*], 'developer') && 'Editor'
- name: GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_STRICT
value: "true"
- name: GF_AUTH_GENERIC_OAUTH_SCOPES
value: openid email profile offline_access roles
- name: GF_AUTH_GENERIC_OAUTH_TOKEN_URL
value: https://sso.konfach.ru/realms/konfach/protocol/openid-connect/token
- name: GF_SECURITY_ADMIN_PASSWORD
- name: GF_SERVER_ROOT_URL
- name: GF_SERVER_SERVE_FROM_SUB_PATH
value: "false"
resources:
limits:
memory: "512Mi"
cpu: "500m"
requests:
memory: "256Mi"
cpu: "250m"
volumeMounts:
- mountPath: /var/lib/grafana
name: grafana-data
- mountPath: /etc/grafana/provisioning/datasources/ds.yaml
name: grafana-config
subPath: ds.yaml
restartPolicy: Always
volumes:
- name: grafana-data
persistentVolumeClaim:
claimName: grafana-data
- name: grafana-config
configMap:
name: grafana-config
items:
- key: datasources.yaml
path: ds.yaml

View file

@ -0,0 +1,73 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-config
namespace: metrics
annotations:
use-subpath: "true"
labels:
app.kubernetes.io/name: loki
data:
config.yaml: |
auth_enabled: false
server:
http_listen_port: 3100
common:
instance_addr: 127.0.0.1
path_prefix: /loki
storage:
filesystem:
chunks_directory: /loki/chunks
rules_directory: /loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
limits_config:
allow_structured_metadata: true
retention_period: 90d
otlp_config:
resource_attributes:
attributes_config:
- action: index_label
attributes:
- component
- function
- endpoint
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
compactor:
working_directory: /loki/compactor
compaction_interval: 10m
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
delete_request_store: filesystem
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
- from: 2024-04-12
object_store: filesystem
store: tsdb
schema: v13
index:
prefix: index_
period: 24h
ruler:
alertmanager_url: http://localhost:9093

View file

@ -0,0 +1,13 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: loki-data
namespace: metrics
labels:
app.kubernetes.io/name: loki
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi

View file

@ -0,0 +1,49 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: loki
namespace: metrics
labels:
app.kubernetes.io/name: loki
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: loki
strategy:
type: Recreate
template:
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: loki
spec:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- args:
- -config.file=/etc/loki/config.yaml
image: grafana/loki:3.1.0
name: loki
ports:
- containerPort: 3100
protocol: TCP
volumeMounts:
- mountPath: /etc/loki/config.yaml
name: loki-config
subPath: config.yaml
- mountPath: /loki
name: loki-data
restartPolicy: Always
volumes:
- configMap:
items:
- key: config.yaml
path: config.yaml
name: loki-config
name: loki-config
- name: loki-data
persistentVolumeClaim:
claimName: loki-data

14
metrics/loki-service.yaml Normal file
View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: metrics
labels:
app.kubernetes.io/name: loki
spec:
ports:
- name: "3100"
port: 3100
targetPort: 3100
selector:
app.kubernetes.io/name: loki

View file

@ -0,0 +1,76 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: otel-collector-config
namespace: metrics
annotations:
use-subpath: "true"
labels:
app.kubernetes.io/name: otel-collector
data:
config.yaml: |
extensions:
health_check:
# pprof:
zpages:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
prometheus/self:
config:
scrape_configs:
- job_name: otel-collector-metrics
scrape_interval: 10s
static_configs:
- targets: ["localhost:8888"]
labels: { "instance": "bigserver-otel-collector" }
processors:
batch:
exporters:
otlp/tempo:
endpoint: tempo:4317
tls:
insecure: true
otlphttp/prometheus-server:
endpoint: http://prometheus-server:9090/api/v1/otlp
otlphttp/loki:
endpoint: http://loki:3100/otlp
connectors:
spanmetrics:
namespace: span.metrics
histogram:
explicit:
buckets: [100us, 1ms, 2ms, 6ms, 10ms, 100ms, 250ms, 1s, 5s, 30s, 1m, 5m]
exemplars:
enabled: true
service:
extensions:
- health_check
- zpages
pipelines:
traces:
receivers: [otlp]
processors: [batch]
exporters: [otlp/tempo, spanmetrics]
metrics:
receivers: [otlp, prometheus/self, spanmetrics]
processors: [batch]
exporters: [otlphttp/prometheus-server]
logs:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp/loki]
telemetry:
metrics:
address: 0.0.0.0:8888
level: detailed

View file

@ -0,0 +1,47 @@
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: otel-collector
name: otel-collector
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: otel-collector
strategy:
type: Recreate
template:
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: otel-collector
spec:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- args:
- --config=/etc/otel-collector-config.yaml
image: otel/opentelemetry-collector-contrib:0.104.0
name: otel-collector
ports:
- containerPort: 4317
protocol: TCP
- containerPort: 4318
protocol: TCP
volumeMounts:
- mountPath: /etc/otel-collector-config.yaml
name: otel-collector-config
subPath: otel-collector-config.yaml
restartPolicy: Always
volumes:
- configMap:
items:
- key: config.yaml
path: otel-collector-config.yaml
name: otel-collector-config
name: otel-collector-config

View file

@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: otel-collector
name: otel-collector
spec:
ports:
- name: "4317"
port: 4317
targetPort: 4317
- name: "4318"
port: 4318
targetPort: 4318
selector:
app.kubernetes.io/name: otel-collector

172
metrics/prometheus.yaml Normal file
View file

@ -0,0 +1,172 @@
---
# Source: prometheus/templates/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
name: prometheus-server
namespace: metrics
annotations: {}
---
# Source: prometheus/templates/cm.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-server
namespace: metrics
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
data:
allow-snippet-annotations: "false"
prometheus.yml: |
global:
evaluation_interval: 1m
storage:
tsdb:
out_of_order_time_window: 30m
otlp:
translation_strategy: NoUTF8EscapingWithSuffixes
---
# Source: prometheus/templates/pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
name: prometheus-server
namespace: metrics
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: "8Gi"
---
# Source: prometheus/templates/service.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus-server
namespace: metrics
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
spec:
type: "ClusterIP"
sessionAffinity: None
ports:
- name: http
port: 9090
protocol: TCP
targetPort: 9090
selector:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
---
# Source: prometheus/templates/deploy.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
name: prometheus-server
namespace: metrics
spec:
selector:
matchLabels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
replicas: 1
revisionHistoryLimit: 10
strategy:
type: Recreate
template:
metadata:
labels:
app.kubernetes.io/component: server
app.kubernetes.io/name: prometheus
app.kubernetes.io/instance: prometheus
app.kubernetes.io/version: v3.2.1
app.kubernetes.io/part-of: prometheus
spec:
enableServiceLinks: true
serviceAccountName: prometheus-server
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- name: prometheus-server
image: "quay.io/prometheus/prometheus:v3.2.1"
args:
- --storage.tsdb.retention.time=15d
- --config.file=/etc/config/prometheus.yml
- --storage.tsdb.path=/data
- --web.enable-lifecycle
- --web.enable-otlp-receiver
ports:
- containerPort: 9090
resources:
limits:
cpu: "500m"
memory: "256Mi"
requests:
cpu: "250m"
readinessProbe:
httpGet:
path: /-/ready
port: 9090
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 5
timeoutSeconds: 4
failureThreshold: 3
successThreshold: 1
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 3
successThreshold: 1
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
- name: storage-volume
mountPath: /data
subPath: ""
dnsPolicy: ClusterFirst
terminationGracePeriodSeconds: 300
volumes:
- name: config-volume
configMap:
name: prometheus-server
- name: storage-volume
persistentVolumeClaim:
claimName: prometheus-server

View file

@ -0,0 +1,18 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: pyroscope-config
namespace: metrics
annotations:
use-subpath: "true"
labels:
app.kubernetes.io/name: pyroscope
data:
config.yaml: |
tracing:
enabled: true
profiling_enabled: true
pyroscopedb:
max_block_duration: 5m

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pyroscope-data
namespace: metrics
labels:
app.kubernetes.io/name: pyroscope
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi

View file

@ -0,0 +1,48 @@
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: pyroscope
name: pyroscope
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: pyroscope
strategy:
type: Recreate
template:
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: pyroscope
spec:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- image: grafana/pyroscope:1.7.1
name: pyroscope
ports:
- containerPort: 4040
protocol: TCP
volumeMounts:
- mountPath: /etc/pyroscope.yml
name: pyroscope-config
subPath: pyroscope.yml
- mountPath: /data
name: pyroscope-data
restartPolicy: Always
volumes:
- configMap:
items:
- key: config.yaml
path: pyroscope.yml
name: pyroscope-config
name: pyroscope-config
- name: pyroscope-data
persistentVolumeClaim:
claimName: pyroscope-data

View file

@ -0,0 +1,14 @@
apiVersion: v1
kind: Service
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: pyroscope
name: pyroscope
spec:
ports:
- name: "4040"
port: 4040
targetPort: 4040
selector:
app.kubernetes.io/name: pyroscope

View file

@ -0,0 +1,44 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: tempo-config
namespace: metrics
annotations:
use-subpath: "true"
labels:
app.kubernetes.io/name: tempo
data:
config.yaml: |
stream_over_http_enabled: true
server:
http_listen_port: 3200
log_level: info
distributor:
receivers:
otlp:
protocols:
http:
grpc:
ingester:
max_block_bytes: 131072000 # 128mb
compactor:
compaction:
block_retention: 672h # 28 days
compacted_block_retention: 30m
retention_concurrency: 2
storage:
trace:
backend: local
wal:
path: /tempo-data/wal # where to store the the wal locally
local:
path: /tempo-data/blocks
overrides:
defaults:
global:
max_bytes_per_trace: 10000000

View file

@ -0,0 +1,13 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: tempo
name: tempo-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 100Mi

View file

@ -0,0 +1,54 @@
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: tempo
name: tempo
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: tempo
strategy:
type: Recreate
template:
metadata:
namespace: metrics
labels:
app.kubernetes.io/name: tempo
spec:
securityContext:
runAsUser: 1000
runAsGroup: 1000
fsGroup: 1000
containers:
- args:
- -config.file=/etc/tempo/config.yaml
image: grafana/tempo:2.5.0
name: tempo
ports:
- containerPort: 3200
protocol: TCP
- containerPort: 4317
protocol: TCP
- containerPort: 4318
protocol: TCP
volumeMounts:
- mountPath: /etc/tempo/config.yaml
name: tempo-config
subPath: config.yaml
- mountPath: /tempo-data
name: tempo-data
restartPolicy: Always
volumes:
- configMap:
items:
- key: config.yaml
path: config.yaml
name: tempo-config
name: tempo-config
- name: tempo-data
persistentVolumeClaim:
claimName: tempo-data

View file

@ -0,0 +1,20 @@
apiVersion: v1
kind: Service
metadata:
name: tempo
namespace: metrics
labels:
app.kubernetes.io/name: tempo
spec:
ports:
- name: "3200"
port: 3200
targetPort: 3200
- name: "4317"
port: 4317
targetPort: 4317
- name: "4318"
port: 4318
targetPort: 4318
selector:
app.kubernetes.io/name: tempo