add source code and readme
This commit is contained in:
@@ -0,0 +1,530 @@
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: openobserve-collector-agent
|
||||
namespace: openobserve-collector
|
||||
spec:
|
||||
managementState: managed
|
||||
image: ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib:0.127.0
|
||||
config:
|
||||
exporters:
|
||||
otlphttp/openobserve:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
logs_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/logs
|
||||
metrics_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/metrics
|
||||
traces_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/traces
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
otlphttp/openobserve_k8s_events:
|
||||
endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080
|
||||
headers:
|
||||
Authorization: ${OPENOBSERVE_AUTH}
|
||||
stream-name: k8s_events
|
||||
logs_endpoint: http://openobserve-openobserve-standalone.openobserve.svc.cluster.local:5080/api/default/v1/logs
|
||||
# HTTP client configuration to match OpenObserve HTTP/1.1
|
||||
compression: gzip
|
||||
max_idle_conns: 50
|
||||
max_idle_conns_per_host: 5
|
||||
idle_conn_timeout: 120s
|
||||
read_buffer_size: 8192
|
||||
write_buffer_size: 8192
|
||||
extensions:
|
||||
zpages: {}
|
||||
processors:
|
||||
batch:
|
||||
send_batch_size: 5000
|
||||
timeout: 30s
|
||||
send_batch_max_size: 6000
|
||||
metadata_keys:
|
||||
- k8s.namespace.name
|
||||
- k8s.pod.name
|
||||
k8sattributes:
|
||||
auth_type: serviceAccount
|
||||
extract:
|
||||
labels:
|
||||
- from: pod
|
||||
key: app.kubernetes.io/name
|
||||
tag_name: service.name
|
||||
- from: pod
|
||||
key: app.kubernetes.io/component
|
||||
tag_name: k8s.app.component
|
||||
metadata:
|
||||
- k8s.pod.name
|
||||
- k8s.namespace.name
|
||||
- k8s.node.name
|
||||
filter:
|
||||
node_from_env_var: K8S_NODE_NAME
|
||||
passthrough: false
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- from: resource_attribute
|
||||
name: k8s.node.name
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.name
|
||||
- from: resource_attribute
|
||||
name: k8s.namespace.name
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
attributes:
|
||||
actions:
|
||||
- key: k8s_node_name
|
||||
from_attribute: k8s.node.name
|
||||
action: upsert
|
||||
groupbyattrs/final:
|
||||
keys:
|
||||
- k8s_node_name
|
||||
- direction
|
||||
metricstransform:
|
||||
transforms:
|
||||
- include: system.network.io
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: system_network_io
|
||||
- include: system.cpu.time
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_time
|
||||
- include: system.cpu.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_utilization
|
||||
- include: k8s.node.cpu.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_cpu_utilization
|
||||
- include: system.memory.usage
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: system_memory_usage
|
||||
- include: system.memory.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_memory_utilization
|
||||
- include: system.filesystem.utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_node_filesystem_utilization
|
||||
- include: container_fs_reads_total
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: container_fs_reads_total
|
||||
- include: container_fs_writes_total
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: container_fs_writes_total
|
||||
- include: k8s.pod.cpu_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_cpu_request_utilization
|
||||
- include: k8s.pod.cpu_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_cpu_limit_utilization
|
||||
- include: k8s.pod.memory_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_memory_request_utilization
|
||||
- include: k8s.pod.memory_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_pod_memory_limit_utilization
|
||||
- include: k8s.container.cpu_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_cpu_request_utilization
|
||||
- include: k8s.container.cpu_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_cpu_limit_utilization
|
||||
- include: k8s.container.memory_request_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_memory_request_utilization
|
||||
- include: k8s.container.memory_limit_utilization
|
||||
match_type: strict
|
||||
action: update
|
||||
new_name: k8s_container_memory_limit_utilization
|
||||
resourcedetection:
|
||||
detectors:
|
||||
- system
|
||||
- env
|
||||
- k8snode
|
||||
override: true
|
||||
system:
|
||||
hostname_sources:
|
||||
- os
|
||||
- dns
|
||||
# Filter out high-cardinality, low-value metrics
|
||||
filter/drop_noisy_metrics:
|
||||
metrics:
|
||||
exclude:
|
||||
match_type: regexp
|
||||
metric_names:
|
||||
- ".*_bucket$" # Drop histogram buckets for non-critical metrics
|
||||
- "go_.*" # Drop Go runtime metrics
|
||||
- "promhttp_.*" # Drop Prometheus HTTP metrics
|
||||
- "process_.*" # Drop process metrics
|
||||
- "container_spec_.*" # Drop container spec metrics
|
||||
- "container_tasks_state" # Drop task state metrics
|
||||
# Add intelligent trace sampling to reduce from 100% to ~15-20%
|
||||
tail_sampling:
|
||||
decision_wait: 10s
|
||||
num_traces: 50000
|
||||
expected_new_traces_per_sec: 10
|
||||
policies:
|
||||
# Always sample error traces (100%)
|
||||
- name: errors
|
||||
type: status_code
|
||||
status_code:
|
||||
status_codes: [ERROR]
|
||||
# Always sample slow traces >1s (100%)
|
||||
- name: slow-traces
|
||||
type: latency
|
||||
latency:
|
||||
threshold_ms: 1000
|
||||
# Always sample traces from critical namespaces (100%)
|
||||
- name: critical-namespaces
|
||||
type: string_attribute
|
||||
string_attribute:
|
||||
key: k8s.namespace.name
|
||||
values: [kube-system, openobserve, cert-manager, ingress-nginx, longhorn-system]
|
||||
# Sample 5% of normal traces (reduced from 10% for resource optimization)
|
||||
- name: probabilistic
|
||||
type: probabilistic
|
||||
probabilistic:
|
||||
sampling_percentage: 5
|
||||
receivers:
|
||||
filelog/std:
|
||||
exclude:
|
||||
- /var/log/pods/default_daemonset-collector*_*/opentelemetry-collector/*.log
|
||||
include:
|
||||
- /var/log/pods/*/*/*.log
|
||||
include_file_name: false
|
||||
include_file_path: true
|
||||
operators:
|
||||
- id: get-format
|
||||
routes:
|
||||
- expr: body matches "^\\{"
|
||||
output: parser-docker
|
||||
- expr: body matches "^[^ Z]+ "
|
||||
output: parser-crio
|
||||
- expr: body matches "^[^ Z]+Z"
|
||||
output: parser-containerd
|
||||
type: router
|
||||
- id: parser-crio
|
||||
output: extract_metadata_from_filepath
|
||||
regex: ^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
|
||||
timestamp:
|
||||
layout: 2006-01-02T15:04:05.999999999Z07:00
|
||||
layout_type: gotime
|
||||
parse_from: attributes.time
|
||||
type: regex_parser
|
||||
- id: parser-containerd
|
||||
output: extract_metadata_from_filepath
|
||||
regex: ^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$
|
||||
timestamp:
|
||||
layout: "%Y-%m-%dT%H:%M:%S.%LZ"
|
||||
parse_from: attributes.time
|
||||
type: regex_parser
|
||||
- id: parser-docker
|
||||
output: extract_metadata_from_filepath
|
||||
timestamp:
|
||||
layout: "%Y-%m-%dT%H:%M:%S.%LZ"
|
||||
parse_from: attributes.time
|
||||
type: json_parser
|
||||
- cache:
|
||||
size: 128
|
||||
id: extract_metadata_from_filepath
|
||||
parse_from: attributes["log.file.path"]
|
||||
regex: ^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]{36})\/(?P<container_name>[^\._]+)\/(?P<restart_count>\d+)\.log$
|
||||
type: regex_parser
|
||||
- from: attributes.log
|
||||
to: body
|
||||
type: move
|
||||
- from: attributes.stream
|
||||
to: attributes["log.iostream"]
|
||||
type: move
|
||||
- from: attributes.container_name
|
||||
to: resource["k8s.container.name"]
|
||||
type: move
|
||||
- from: attributes.namespace
|
||||
to: resource["k8s.namespace.name"]
|
||||
type: move
|
||||
- from: attributes.pod_name
|
||||
to: resource["k8s.pod.name"]
|
||||
type: move
|
||||
- from: attributes.restart_count
|
||||
to: resource["k8s.container.restart_count"]
|
||||
type: move
|
||||
- from: attributes.uid
|
||||
to: resource["k8s.pod.uid"]
|
||||
type: move
|
||||
start_at: end
|
||||
hostmetrics:
|
||||
collection_interval: 60s
|
||||
root_path: /hostfs
|
||||
scrapers:
|
||||
cpu: {}
|
||||
disk: {}
|
||||
memory: {}
|
||||
filesystem:
|
||||
exclude_fs_types:
|
||||
fs_types:
|
||||
- autofs
|
||||
- binfmt_misc
|
||||
- bpf
|
||||
- cgroup2
|
||||
- configfs
|
||||
- debugfs
|
||||
- devpts
|
||||
- devtmpfs
|
||||
- fusectl
|
||||
- hugetlbfs
|
||||
- iso9660
|
||||
- mqueue
|
||||
- nsfs
|
||||
- overlay
|
||||
- proc
|
||||
- procfs
|
||||
- pstore
|
||||
- rpc_pipefs
|
||||
- securityfs
|
||||
- selinuxfs
|
||||
- squashfs
|
||||
- sysfs
|
||||
- tracefs
|
||||
match_type: strict
|
||||
exclude_mount_points:
|
||||
match_type: regexp
|
||||
mount_points:
|
||||
- /dev/.*
|
||||
- /proc/.*
|
||||
- /sys/.*
|
||||
- /run/k3s/containerd/.*
|
||||
- /var/lib/docker/.*
|
||||
- /var/lib/kubelet/.*
|
||||
- /snap/.*
|
||||
load: {}
|
||||
network: {}
|
||||
kubeletstats:
|
||||
auth_type: serviceAccount
|
||||
collection_interval: 60s
|
||||
endpoint: https://${env:K8S_NODE_IP}:10250
|
||||
extra_metadata_labels:
|
||||
- container.id
|
||||
- k8s.volume.type
|
||||
insecure_skip_verify: true
|
||||
metric_groups:
|
||||
- node
|
||||
- pod
|
||||
- container
|
||||
- volume
|
||||
metrics:
|
||||
k8s.pod.cpu_limit_utilization:
|
||||
enabled: true
|
||||
k8s.pod.cpu_request_utilization:
|
||||
enabled: true
|
||||
k8s.pod.memory_limit_utilization:
|
||||
enabled: true
|
||||
k8s.pod.memory_request_utilization:
|
||||
enabled: true
|
||||
k8s.container.cpu_limit_utilization:
|
||||
enabled: true
|
||||
k8s.container.cpu_request_utilization:
|
||||
enabled: true
|
||||
k8s.container.memory_limit_utilization:
|
||||
enabled: true
|
||||
k8s.container.memory_request_utilization:
|
||||
enabled: true
|
||||
otlp:
|
||||
protocols:
|
||||
grpc: {}
|
||||
http: {}
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: otel-collector
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets:
|
||||
- 0.0.0.0:8888
|
||||
- job_name: postgresql-cnpg
|
||||
scrape_interval: 60s
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- postgresql-system
|
||||
relabel_configs:
|
||||
# Only scrape pods with the cnpg.io/cluster label
|
||||
- source_labels: [__meta_kubernetes_pod_label_cnpg_io_cluster]
|
||||
action: keep
|
||||
regex: postgres-shared
|
||||
# Use the metrics port (9187)
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
# Set the metrics path
|
||||
- target_label: __metrics_path__
|
||||
replacement: /metrics
|
||||
# Add useful labels
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
target_label: instance
|
||||
- source_labels: [__meta_kubernetes_pod_label_cnpg_io_cluster]
|
||||
target_label: cnpg_cluster
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
# Celery and Redis metrics - direct scraping
|
||||
- job_name: redis-exporter
|
||||
scrape_interval: 30s
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- redis-system
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: keep
|
||||
regex: redis-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: kubernetes_service_name
|
||||
- job_name: celery-metrics-exporter
|
||||
scrape_interval: 60s
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
namespaces:
|
||||
names:
|
||||
- celery-monitoring
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: keep
|
||||
regex: celery-metrics-exporter
|
||||
- source_labels: [__meta_kubernetes_endpoint_port_name]
|
||||
action: keep
|
||||
regex: metrics
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
target_label: kubernetes_service_name
|
||||
# Longhorn metrics still handled by target allocator via ServiceMonitor
|
||||
service:
|
||||
telemetry:
|
||||
metrics:
|
||||
address: 0.0.0.0:8888
|
||||
pipelines:
|
||||
logs:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
receivers:
|
||||
- filelog/std
|
||||
metrics:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- attributes
|
||||
- filter/drop_noisy_metrics
|
||||
- metricstransform
|
||||
receivers:
|
||||
- kubeletstats
|
||||
- hostmetrics
|
||||
- prometheus
|
||||
traces:
|
||||
exporters:
|
||||
- otlphttp/openobserve
|
||||
processors:
|
||||
- batch
|
||||
- k8sattributes
|
||||
- tail_sampling
|
||||
receivers:
|
||||
- otlp
|
||||
env:
|
||||
- name: K8S_NODE_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.hostIP
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: OPENOBSERVE_AUTH
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: openobserve-collector-credentials
|
||||
key: authorization
|
||||
ingress:
|
||||
route: {}
|
||||
mode: daemonset
|
||||
observability:
|
||||
metrics:
|
||||
enableMetrics: true
|
||||
podDisruptionBudget:
|
||||
maxUnavailable: 1
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 300m
|
||||
memory: 512Mi
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
serviceAccount: openobserve-collector
|
||||
hostNetwork: true
|
||||
upgradeStrategy: automatic
|
||||
volumeMounts:
|
||||
- mountPath: /hostfs
|
||||
name: hostfs
|
||||
readOnly: true
|
||||
- mountPath: /var/log/pods
|
||||
name: varlogpods
|
||||
readOnly: true
|
||||
- mountPath: /hostfs/proc
|
||||
name: proc
|
||||
readOnly: true
|
||||
- mountPath: /hostfs/sys
|
||||
name: sys
|
||||
readOnly: true
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /
|
||||
name: hostfs
|
||||
- hostPath:
|
||||
path: /var/log/pods
|
||||
name: varlogpods
|
||||
- hostPath:
|
||||
path: /proc
|
||||
name: proc
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
Reference in New Issue
Block a user