add source code and readme
This commit is contained in:
@@ -0,0 +1,530 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: openobserve-collector-agent
|
||||
namespace: openobserve-collector
|
||||
spec:
|
||||
managementState: managed
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.127.0
|
||||
config:
|
||||
exporters:
|
||||
otlphttp/openobserve:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
logs_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/logs
|
||||
metrics_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/metrics
|
||||
traces_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/traces
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
otlphttp/openobserve_k8s_events:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
stream-name: k8s_events
|
||||
logs_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/logs
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
extensions:
|
||||
zpages: {}
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 5000
|
||||
timeout: 30s
|
||||
send_batch_max_size: 6000
|
||||
metadata_keys:
|
||||
- k8s.namespace.name
|
||||
- k8s.pod.name
|
||||
k8sattributes:
|
||||
auth_type: serviceAccount
|
||||
extract:
|
||||
labels:
|
||||
- from: pod
|
||||
key: app.kubernetes.io/name
|
||||
tag_name: service.name
|
||||
- from: pod
|
||||
key: app.kubernetes.io/component
|
||||
tag_name: k8s.app.component
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
filter:
|
||||
node_from_env_var: K8S_NODE_NAME
|
||||
passthrough: false
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- from: resource_attribute
|
||||
name: k8s.node.name
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
attributes:
|
||||
actions:
|
||||
- key: k8s_node_name
|
||||
from_attribute: k8s.node.name
|
||||
action: upsert
|
||||
groupbyattrs/final:
|
||||
keys:
|
||||
- k8s_node_name
|
||||
- direction
|
||||
metricstransform:
|
||||
transforms:
|
||||
- include: system.network.io
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: system_network_io
|
||||
- include: system.cpu.time
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_time
|
||||
- include: system.cpu.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_utilization
|
||||
- include: k8s.node.cpu.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_utilization
|
||||
- include: system.memory.usage
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: system_memory_usage
|
||||
- include: system.memory.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_memory_utilization
|
||||
- include: system.filesystem.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_filesystem_utilization
|
||||
- include: container_fs_reads_total
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: container_fs_reads_total
|
||||
- include: container_fs_writes_total
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: container_fs_writes_total
|
||||
- include: k8s.pod.cpu_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_cpu_request_utilization
|
||||
- include: k8s.pod.cpu_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_cpu_limit_utilization
|
||||
- include: k8s.pod.memory_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_memory_request_utilization
|
||||
- include: k8s.pod.memory_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_memory_limit_utilization
|
||||
- include: k8s.container.cpu_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_cpu_request_utilization
|
||||
- include: k8s.container.cpu_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_cpu_limit_utilization
|
||||
- include: k8s.container.memory_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_memory_request_utilization
|
||||
- include: k8s.container.memory_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_memory_limit_utilization
|
||||
resourcedetection:
|
||||
detectors:
|
||||
- system
|
||||
- env
|
||||
- k8snode
|
||||
override: true
|
||||
system:
|
||||
hostname_sources:
|
||||
- os
|
||||
- dns
|
||||
# Filter out high-cardinality, low-value metrics
|
||||
filter/drop_noisy_metrics:
|
||||
metrics:
|
||||
exclude:
|
||||
match_type: regexp
|
||||
metric_names:
|
||||
- ".*_bucket$" # Drop histogram buckets for non-critical metrics
|
||||
- "go_.*" # Drop Go runtime metrics
|
||||
- "promhttp_.*" # Drop Prometheus HTTP metrics
|
||||
- "process_.*" # Drop process metrics
|
||||
- "container_spec_.*" # Drop container spec metrics
|
||||
- "container_tasks_state" # Drop task state metrics
|
||||
# Add intelligent trace sampling to reduce from 100% to ~15-20%
|
||||
tail_sampling:
|
||||
decision_wait: 10s
|
||||
num_traces: 50000
|
||||
expected_new_traces_per_sec: 10
|
||||
policies:
|
||||
# Always sample error traces (100%)
|
||||
- name: errors
|
||||
type: status_code
|
||||
status_code:
|
||||
status_codes: [ERROR]
|
||||
# Always sample slow traces >1s (100%)
|
||||
- name: slow-traces
|
||||
type: latency
|
||||
latency:
|
||||
threshold_ms: 1000
|
||||
# Always sample traces from critical namespaces (100%)
|
||||
- name: critical-namespaces
|
||||
type: string_attribute
|
||||
string_attribute:
|
||||
key: k8s.namespace.name
|
||||
values: [kube-system, openobserve, cert-manager, ingress-nginx, longhorn-system]
|
||||
# Sample 5% of normal traces (reduced from 10% for resource optimization)
|
||||
- name: probabilistic
|
||||
type: probabilistic
|
||||
probabilistic:
|
||||
sampling_percentage: 5
|
||||
receivers:
|
||||
filelog/std:
|
||||
exclude:
|
||||
- /var/log/pods/default_daemonset-collector*_*/opentelemetry-collector/*.log
|
||||
include:
|
||||
- /var/log/pods/*/*/*.log
|
||||
include_file_name: false
|
||||
include_file_path: true
|
||||
operators:
|
||||
- id: get-format
|
||||
routes:
|
||||
- expr: body matches "^\\{"
|
||||
output: parser-docker
|
||||
- expr: body matches "^[^ Z]+ "
|
||||
output: parser-crio
|
||||
- expr: body matches "^[^ Z]+Z"
|
||||
output: parser-containerd
|
||||
type: router
|
||||
- id: parser-crio
|
||||
output: extract_metadata_from_filepath
|
||||
regex: ^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
|
||||
timestamp:
|
||||
layout: 2006-01-02T15:04:05.999999999Z07:00
|
||||
layout_type: gotime
|
||||
parse_from: attributes.time
|
||||
type: regex_parser
|
||||
- id: parser-containerd
|
||||
output: extract_metadata_from_filepath
|
||||
regex: ^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
|
||||
timestamp:
|
||||
layout: "%Y-%m-%dT%H:%M:%S.%LZ"
|
||||
parse_from: attributes.time
|
||||
type: regex_parser
|
||||
- id: parser-docker
|
||||
output: extract_metadata_from_filepath
|
||||
timestamp:
|
||||
layout: "%Y-%m-%dT%H:%M:%S.%LZ"
|
||||
parse_from: attributes.time
|
||||
type: json_parser
|
||||
- cache:
|
||||
size: 128
|
||||
id: extract_metadata_from_filepath
|
||||
parse_from: attributes["log.file.path"]
|
||||
regex: ^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]{36})\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$
|
||||
type: regex_parser
|
||||
- from: attributes.log
|
||||
to: body
|
||||
type: move
|
||||
- from: attributes.stream
|
||||
to: attributes["log.iostream"]
|
||||
type: move
|
||||
- from: attributes.container_name
|
||||
to: resource["k8s.container.name"]
|
||||
type: move
|
||||
- from: attributes.namespace
|
||||
to: resource["k8s.namespace.name"]
|
||||
type: move
|
||||
- from: attributes.pod_name
|
||||
to: resource["k8s.pod.name"]
|
||||
type: move
|
||||
- from: attributes.restart_count
|
||||
to: resource["k8s.container.restart_count"]
|
||||
type: move
|
||||
- from: attributes.uid
|
||||
to: resource["k8s.pod.uid"]
|
||||
type: move
|
||||
start_at: end
|
||||
hostmetrics:
|
||||
collection_interval: 60s
|
||||
root_path: /hostfs
|
||||
scrapers:
|
||||
cpu: {}
|
||||
disk: {}
|
||||
memory: {}
|
||||
filesystem:
|
||||
exclude_fs_types:
|
||||
fs_types:
|
||||
- autofs
|
||||
- binfmt_misc
|
||||
- bpf
|
||||
- cgroup2
|
||||
- configfs
|
||||
- debugfs
|
||||
- devpts
|
||||
- devtmpfs
|
||||
- fusectl
|
||||
- hugetlbfs
|
||||
- iso9660
|
||||
- mqueue
|
||||
- nsfs
|
||||
- overlay
|
||||
- proc
|
||||
- procfs
|
||||
- pstore
|
||||
- rpc_pipefs
|
||||
- securityfs
|
||||
- selinuxfs
|
||||
- squashfs
|
||||
- sysfs
|
||||
- tracefs
|
||||
match_type: strict
|
||||
exclude_mount_points:
|
||||
match_type: regexp
|
||||
mount_points:
|
||||
- /dev/.*
|
||||
- /proc/.*
|
||||
- /sys/.*
|
||||
- /run/k3s/containerd/.*
|
||||
- /var/lib/docker/.*
|
||||
- /var/lib/kubelet/.*
|
||||
- /snap/.*
|
||||
load: {}
|
||||
network: {}
|
||||
kubeletstats:
|
||||
auth_type: serviceAccount
|
||||
collection_interval: 60s
|
||||
endpoint: https://${env:K8S_NODE_IP}:10250
|
||||
extra_metadata_labels:
|
||||
- container.id
|
||||
- k8s.volume.type
|
||||
insecure_skip_verify: true
|
||||
metric_groups:
|
||||
- node
|
||||
- pod
|
||||
- container
|
||||
- volume
|
||||
metrics:
|
||||
k8s.pod.cpu_limit_utilization:
|
||||
enabled: true
|
||||
k8s.pod.cpu_request_utilization:
|
||||
enabled: true
|
||||
k8s.pod.memory_limit_utilization:
|
||||
enabled: true
|
||||
k8s.pod.memory_request_utilization:
|
||||
enabled: true
|
||||
k8s.container.cpu_limit_utilization:
|
||||
enabled: true
|
||||
k8s.container.cpu_request_utilization:
|
||||
enabled: true
|
||||
k8s.container.memory_limit_utilization:
|
||||
enabled: true
|
||||
k8s.container.memory_request_utilization:
|
||||
enabled: true
|
||||
otlp:
|
||||
protocols:
|
||||
grpc: {}
|
||||
http: {}
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: otel-collector
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets:
|
||||
- 0.0.0.0:8888
|
||||
- job_name: postgresql-cnpg
|
||||
scrape_interval: 60s
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- postgresql-system
|
||||
relabel_configs:
|
||||
# Only scrape pods with the cnpg.io/cluster label
|
||||
- source_labels: [__meta_kubernetes_pod_label_cnpg_io_cluster]
|
||||
action: keep
|
||||
regex: postgres-shared
|
||||
# Use the metrics port (9187)
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
# Set the metrics path
|
||||
- target_label: __metrics_path__
|
||||
replacement: /metrics
|
||||
# Add useful labels
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
target_label: instance
|
||||
- source_labels: [__meta_kubernetes_pod_label_cnpg_io_cluster]
|
||||
target_label: cnpg_cluster
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
# Celery and Redis metrics - direct scraping
|
||||
- job_name: redis-exporter
|
||||
scrape_interval: 30s
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- redis-system
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: keep
|
||||
regex: redis-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: kubernetes_service_name
|
||||
- job_name: celery-metrics-exporter
|
||||
scrape_interval: 60s
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- celery-monitoring
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: keep
|
||||
regex: celery-metrics-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: kubernetes_service_name
|
||||
# Longhorn metrics still handled by target allocator via ServiceMonitor
|
||||
service:
|
||||
telemetry:
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
pipelines:
|
||||
logs:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
receivers:
|
||||
- filelog/std
|
||||
metrics:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- attributes
|
||||
- filter/drop_noisy_metrics
|
||||
- metricstransform
|
||||
receivers:
|
||||
- kubeletstats
|
||||
- hostmetrics
|
||||
- prometheus
|
||||
traces:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- tail_sampling
|
||||
receivers:
|
||||
- otlp
|
||||
env:
|
||||
- name: K8S_NODE_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: OPENOBSERVE_AUTH
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: openobserve-collector-credentials
|
||||
key: authorization
|
||||
ingress:
|
||||
route: {}
|
||||
mode: daemonset
|
||||
observability:
|
||||
metrics:
|
||||
enableMetrics: true
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 300m
|
||||
memory: 512Mi
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
serviceAccount: openobserve-collector
|
||||
hostNetwork: true
|
||||
upgradeStrategy: automatic
|
||||
volumeMounts:
|
||||
- mountPath: /hostfs
|
||||
name: hostfs
|
||||
readOnly: true
|
||||
- mountPath: /var/log/pods
|
||||
name: varlogpods
|
||||
readOnly: true
|
||||
- mountPath: /hostfs/proc
|
||||
name: proc
|
||||
readOnly: true
|
||||
- mountPath: /hostfs/sys
|
||||
name: sys
|
||||
readOnly: true
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /
|
||||
name: hostfs
|
||||
- hostPath:
|
||||
path: /var/log/pods
|
||||
name: varlogpods
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
@@ -0,0 +1,89 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: openobserve-collector
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
app: openobserve-collector
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: openobserve-collector
|
||||
labels:
|
||||
app: openobserve-collector
|
||||
rules:
|
||||
- nonResourceURLs: ["/metrics", "/metrics/cadvisor"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- endpoints
|
||||
- events
|
||||
- namespaces
|
||||
- namespaces/status
|
||||
- nodes
|
||||
- nodes/spec
|
||||
- nodes/stats
|
||||
- nodes/metrics
|
||||
- nodes/proxy
|
||||
- persistentvolumes
|
||||
- persistentvolumeclaims
|
||||
- pods
|
||||
- pods/status
|
||||
- replicationcontrollers
|
||||
- replicationcontrollers/status
|
||||
- resourcequotas
|
||||
- services
|
||||
- configmaps
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["monitoring.coreos.com"]
|
||||
resources:
|
||||
- servicemonitors
|
||||
- podmonitors
|
||||
- probes
|
||||
- scrapeconfigs
|
||||
verbs: ["*"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- statefulsets
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- jobs
|
||||
- cronjobs
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["autoscaling"]
|
||||
resources:
|
||||
- horizontalpodautoscalers
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["discovery.k8s.io"]
|
||||
resources:
|
||||
- endpointslices
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: openobserve-collector
|
||||
labels:
|
||||
app: openobserve-collector
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: openobserve-collector
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: openobserve-collector
|
||||
namespace: openobserve-collector
|
||||
@@ -0,0 +1,115 @@
|
||||
---
|
||||
# ServiceMonitor for Agent Collector Self-Monitoring
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: openobserve-collector-agent-metrics
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
app.kubernetes.io/name: openobserve-collector-agent
|
||||
app.kubernetes.io/component: metrics
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: openobserve-collector-agent
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
endpoints:
|
||||
- port: metrics
|
||||
interval: 60s
|
||||
path: /metrics
|
||||
scheme: http
|
||||
scrapeTimeout: 30s
|
||||
honorLabels: true
|
||||
relabelings:
|
||||
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||
targetLabel: pod
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
targetLabel: node
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
targetLabel: namespace
|
||||
metricRelabelings:
|
||||
- sourceLabels: [__name__]
|
||||
regex: 'otelcol_.*'
|
||||
action: keep
|
||||
- sourceLabels: [__name__]
|
||||
regex: 'up|scrape_.*'
|
||||
action: keep
|
||||
|
||||
---
|
||||
# ServiceMonitor for Gateway Collector Self-Monitoring
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: openobserve-collector-gateway-metrics
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
app.kubernetes.io/name: openobserve-collector-gateway
|
||||
app.kubernetes.io/component: metrics
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: openobserve-collector-gateway
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
endpoints:
|
||||
- port: metrics
|
||||
interval: 60s
|
||||
path: /metrics
|
||||
scheme: http
|
||||
scrapeTimeout: 30s
|
||||
honorLabels: true
|
||||
relabelings:
|
||||
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||
targetLabel: pod
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
targetLabel: node
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
targetLabel: namespace
|
||||
metricRelabelings:
|
||||
- sourceLabels: [__name__]
|
||||
regex: 'otelcol_.*'
|
||||
action: keep
|
||||
- sourceLabels: [__name__]
|
||||
regex: 'up|scrape_.*'
|
||||
action: keep
|
||||
|
||||
---
|
||||
# Service for Agent Collector Metrics (if not auto-created)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: openobserve-collector-agent-metrics
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
app.kubernetes.io/name: openobserve-collector-agent
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 8888
|
||||
protocol: TCP
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app.kubernetes.io/name: openobserve-collector-agent
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
|
||||
---
|
||||
# Service for Gateway Collector Metrics (if not auto-created)
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: openobserve-collector-gateway-metrics
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
app.kubernetes.io/name: openobserve-collector-gateway
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 8888
|
||||
protocol: TCP
|
||||
targetPort: 8888
|
||||
selector:
|
||||
app.kubernetes.io/name: openobserve-collector-gateway
|
||||
app.kubernetes.io/component: opentelemetry-collector
|
||||
@@ -0,0 +1,315 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: openobserve-collector-gateway
|
||||
namespace: openobserve-collector
|
||||
spec:
|
||||
config:
|
||||
connectors:
|
||||
servicegraph:
|
||||
dimensions:
|
||||
- http.method
|
||||
latency_histogram_buckets:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
- 5
|
||||
store:
|
||||
max_items: 10
|
||||
ttl: 1s
|
||||
spanmetrics:
|
||||
aggregation_temporality: AGGREGATION_TEMPORALITY_CUMULATIVE
|
||||
dimensions:
|
||||
- default: GET
|
||||
name: http.method
|
||||
- name: http.status_code
|
||||
dimensions_cache_size: 1000
|
||||
exemplars:
|
||||
enabled: true
|
||||
histogram:
|
||||
explicit:
|
||||
buckets:
|
||||
- 100us
|
||||
- 1ms
|
||||
- 2ms
|
||||
- 6ms
|
||||
- 10ms
|
||||
- 100ms
|
||||
- 250ms
|
||||
- 500ms
|
||||
- 1000ms
|
||||
- 1400ms
|
||||
- 2000ms
|
||||
- 5s
|
||||
- 10s
|
||||
- 30s
|
||||
- 60s
|
||||
- 120s
|
||||
- 300s
|
||||
- 600s
|
||||
metrics_flush_interval: 15s
|
||||
exporters:
|
||||
otlphttp/openobserve:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
stream-name: default
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
otlphttp/openobserve_k8s_events:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
stream-name: k8s_events
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 5000
|
||||
timeout: 30s
|
||||
send_batch_max_size: 6000
|
||||
metadata_keys:
|
||||
- k8s.namespace.name
|
||||
- k8s.pod.name
|
||||
k8sattributes:
|
||||
auth_type: serviceAccount
|
||||
extract:
|
||||
labels:
|
||||
- from: pod
|
||||
key: app.kubernetes.io/name
|
||||
tag_name: service.name
|
||||
- from: pod
|
||||
key: k8s-app
|
||||
tag_name: service.name
|
||||
- from: pod
|
||||
key: app.kubernetes.io/instance
|
||||
tag_name: k8s.app.instance
|
||||
- from: pod
|
||||
key: app.kubernetes.io/version
|
||||
tag_name: service.version
|
||||
- from: pod
|
||||
key: app.kubernetes.io/component
|
||||
tag_name: k8s.app.component
|
||||
metadata:
|
||||
- k8s.namespace.name
|
||||
- k8s.pod.name
|
||||
- k8s.node.name
|
||||
- k8s.deployment.name
|
||||
passthrough: false
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- from: resource_attribute
|
||||
name: k8s.node.name
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- sources:
|
||||
- from: connection
|
||||
resourcedetection:
|
||||
detectors:
|
||||
- env
|
||||
override: true
|
||||
timeout: 2s
|
||||
metricstransform:
|
||||
transforms:
|
||||
- include: k8s.node.allocatable_cpu
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: machine_cpu_cores
|
||||
- include: k8s.node.allocatable_memory
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: machine_memory_bytes
|
||||
- include: k8s.node.condition_ready
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_condition_ready
|
||||
- include: k8s.node.condition_memory_pressure
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_condition_memory_pressure
|
||||
- include: k8s.node.condition_disk_pressure
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_condition_disk_pressure
|
||||
- include: k8s.node.condition_pid_pressure
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_condition_pid_pressure
|
||||
receivers:
|
||||
k8s_cluster:
|
||||
allocatable_types_to_report:
|
||||
- cpu
|
||||
- memory
|
||||
- storage
|
||||
collection_interval: 60s
|
||||
metrics:
|
||||
k8s.container.cpu_limit:
|
||||
enabled: false
|
||||
k8s.container.cpu_request:
|
||||
enabled: false
|
||||
k8s.container.memory_limit:
|
||||
enabled: false
|
||||
k8s.container.memory_request:
|
||||
enabled: false
|
||||
node_conditions_to_report:
|
||||
- Ready
|
||||
- MemoryPressure
|
||||
- DiskPressure
|
||||
- PIDPressure
|
||||
k8s_events:
|
||||
auth_type: serviceAccount
|
||||
k8sobjects:
|
||||
auth_type: serviceAccount
|
||||
objects:
|
||||
- field_selector: status.phase=Running
|
||||
interval: 15m
|
||||
mode: pull
|
||||
name: pods
|
||||
- group: events.k8s.io
|
||||
mode: watch
|
||||
name: events
|
||||
otlp:
|
||||
protocols:
|
||||
grpc: {}
|
||||
http: {}
|
||||
otlp/logs:
|
||||
protocols:
|
||||
http:
|
||||
endpoint: 0.0.0.0:4418
|
||||
prometheus:
|
||||
config:
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
evaluation_interval: 30s
|
||||
external_labels: {}
|
||||
scrape_configs:
|
||||
- job_name: 'nginx-ingress'
|
||||
static_configs:
|
||||
- targets: ['<NODE_1_EXTERNAL_IP>:10254', '<NODE_2_EXTERNAL_IP>:10254', '<NODE_3_EXTERNAL_IP>:10254']
|
||||
metrics_path: /metrics
|
||||
scrape_interval: 30s
|
||||
metric_relabel_configs:
|
||||
- source_labels: [__name__]
|
||||
regex: 'nginx_ingress_controller_.*'
|
||||
action: keep
|
||||
target_allocator:
|
||||
endpoint: http://openobserve-collector-gateway-targetallocator:80
|
||||
interval: 30s
|
||||
collector_id: "${POD_NAME}"
|
||||
service:
|
||||
telemetry:
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
pipelines:
|
||||
logs/fluentbit-forward:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
receivers:
|
||||
- otlp/logs
|
||||
logs/k8s_events:
|
||||
exporters:
|
||||
- otlphttp/openobserve_k8s_events
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- resourcedetection
|
||||
receivers:
|
||||
- k8s_events
|
||||
metrics:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- resourcedetection
|
||||
- metricstransform
|
||||
receivers:
|
||||
- k8s_cluster
|
||||
- spanmetrics
|
||||
- servicegraph
|
||||
- prometheus # Re-enabled for ServiceMonitor scraping
|
||||
traces:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
- spanmetrics
|
||||
- servicegraph
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- resourcedetection
|
||||
receivers:
|
||||
- otlp
|
||||
daemonSetUpdateStrategy: {}
|
||||
deploymentUpdateStrategy: {}
|
||||
env:
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: K8S_NODE_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: OPENOBSERVE_AUTH
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: openobserve-collector-credentials
|
||||
key: authorization
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.127.0
|
||||
ingress:
|
||||
route: {}
|
||||
managementState: managed
|
||||
mode: statefulset
|
||||
observability:
|
||||
metrics:
|
||||
enableMetrics: true
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
serviceAccount: openobserve-collector
|
||||
targetAllocator:
|
||||
enabled: true
|
||||
serviceAccount: openobserve-collector
|
||||
prometheusCR:
|
||||
enabled: true
|
||||
serviceMonitorSelector: {}
|
||||
podMonitorSelector: {}
|
||||
scrapeConfigSelector: {}
|
||||
upgradeStrategy: automatic
|
||||
@@ -0,0 +1,10 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- secret.yaml
|
||||
- agent-collector.yaml
|
||||
- collector-sa.yaml
|
||||
- gateway-collector.yaml
|
||||
- longhorn-servicemonitor.yaml
|
||||
- collector-servicemonitors.yaml
|
||||
@@ -0,0 +1,18 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: longhorn-prometheus-servicemonitor
|
||||
namespace: openobserve-collector
|
||||
labels:
|
||||
name: longhorn-prometheus-servicemonitor
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: longhorn-manager
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- longhorn-system
|
||||
endpoints:
|
||||
- port: manager
|
||||
path: /metrics
|
||||
interval: 30s
|
||||
@@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: openobserve-collector
|
||||
labels:
|
||||
name: openobserve-collector
|
||||
pod-security.kubernetes.io/enforce: privileged
|
||||
pod-security.kubernetes.io/enforce-version: latest
|
||||
40
manifests/infrastructure/openobserve-collector/secret.yaml
Normal file
40
manifests/infrastructure/openobserve-collector/secret.yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: openobserve-collector-credentials
|
||||
namespace: openobserve-collector
|
||||
type: Opaque
|
||||
stringData:
|
||||
#ENC[AES256_GCM,data:2yJkOijDONhJY+hZ7Tk/29jRrv93ztrisX4JELiErla/BPDcsLdZYoIuGPmtsIhYnazTXZoD,iv:4PLBGHbzsXscXJW4RANSlyEuIhkDpFEpT8CgEo8klLM=,tag:ymxpLqQr2MQFW+A3UV+SWg==,type:comment]
|
||||
#ENC[AES256_GCM,data:AHTcPETrdrltvKOH1HLdAU57RuYA/G+dz9mhCUExN7SYmA==,iv:WEbMEVNPCVmqOkWtvVKxH/B1w+Kl5+agqZsHRirfCP8=,tag:KHRsIMS7Evx9WSDEThdHQA==,type:comment]
|
||||
authorization: ENC[AES256_GCM,data:m3CSGlha/eLqLZOaLgg+ZFezabI0Ttwb77Fi7jLL1/u5riRe4hdDk0KaC9iIxob3ZUoSJBV70tGdy9U/QAAXy8zCfAPTekBTGIeUJnuDGYOjZoMzH6jtWtfA566T0WA7jLTZKrQT,iv:IXHN2Y8qYo2Gq8qO2lUz8Dr2OcO1Mh6xVcryzdhjtXo=,tag:S/RRNsQRbtPrXmCwoqoY4g==,type:str]
|
||||
sops:
|
||||
lastmodified: "2025-06-27T23:03:22Z"
|
||||
mac: ENC[AES256_GCM,data:NjQww3sDDUCtmuCyNP1vbn+4x04dA12O+pE2GogwK4bfIyp6fSWEkKDu54a6rx/DyBJSoN9J/3Nb/nIqZ5dYCQRYYZpBFH+kdAQXgy1hnRHM6ck6gXkjGvLyPyS+UMrz1xJ7dIhse663SWD9s9JQCoPEECwYjPcjO6azK7dOvlY=,iv:YnCcpCWU2dTR7t/NbLNBNEj8vSpIYGaZ6zX79gaY4SY=,tag:TS0+mvJtcNTjU1rHmgcbdg==,type:str]
|
||||
pgp:
|
||||
- created_at: "2025-06-27T23:03:22Z"
|
||||
enc: |-
|
||||
-----BEGIN PGP MESSAGE-----
|
||||
|
||||
hF4DZT3mpHTS/JgSAQdAKEkftAs2xJfxjDSA3RfMtmtsnyC/OipUq3V24OqgCC8w
|
||||
0TW/fUq769Ao8v0zIQ1BLPin4gHLCy49j9IKf68YXwZK/kXy/Qxq/g5OtvPyTKbn
|
||||
1GYBCQIQGA7z3J4X7BwV83xHqieZPbPD7+YkLcpw+ceXuJlKE9ldoQR98vITs+S0
|
||||
/NP71qmJ2SLBxl5sX5fRUceHY/DE7PapkWDit8mg7Mi2w+fBwLi4lymN2akoxTKX
|
||||
aZcSZsj/vrw=
|
||||
=Traa
|
||||
-----END PGP MESSAGE-----
|
||||
fp: B120595CA9A643B051731B32E67FF350227BA4E8
|
||||
- created_at: "2025-06-27T23:03:22Z"
|
||||
enc: |-
|
||||
-----BEGIN PGP MESSAGE-----
|
||||
|
||||
hF4DSXzd60P2RKISAQdA6l2wYljh86fs8RTUJ/W1UY8NDxPo65TkZGSoRGFU0AQw
|
||||
daYGSXKT0R60P9uxFrGvQXyfbIGw+fuW/rd85FFtpn47wtoBphr2Mb+9cnB6kuNO
|
||||
1GYBCQIQ3JEH3kRETxoAuCKRBGn6heb+spMCjft9/fVTA31HjIoNFlYBYM0kSnc5
|
||||
p9wcP6V9YDp47mEutzVLQACx/W2qBPb6GDZrdLTTBTuUvQeI/kttga0hHzqYLc6B
|
||||
OYb4FxUXl5g=
|
||||
=DoEk
|
||||
-----END PGP MESSAGE-----
|
||||
fp: 4A8AADB4EBAB9AF88EF7062373CECE06CC80D40C
|
||||
encrypted_regex: ^(data|stringData)$
|
||||
version: 3.10.2
|
||||
Reference in New Issue
Block a user