Files
Michael DiLeo 7327d77dcd redaction (#1)
Add the redacted source file for demo purposes

Reviewed-on: https://source.michaeldileo.org/michael_dileo/Keybard-Vagabond-Demo/pulls/1
Co-authored-by: Michael DiLeo <michael_dileo@proton.me>
Co-committed-by: Michael DiLeo <michael_dileo@proton.me>
2025-12-24 13:40:47 +00:00

221 lines
8.9 KiB
YAML

# Keeping for reference
# ---
# # OpenObserve Alert Configuration for Celery Queue Monitoring
# # This file contains the alert configurations that should be imported into OpenObserve
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: openobserve-alert-configs
# namespace: celery-monitoring
# labels:
# app.kubernetes.io/name: openobserve-alerts
# app.kubernetes.io/component: monitoring
# data:
# celery-queue-alerts.json: |
# {
# "alerts": [
# {
# "name": "PieFed Celery Queue High",
# "description": "PieFed Celery queue has more than 10,000 pending tasks",
# "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '5 minutes'",
# "condition": "avg_queue_length > 10000",
# "frequency": "5m",
# "severity": "warning",
# "enabled": true,
# "actions": [
# {
# "type": "webhook",
# "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
# "message": "🚨 PieFed Celery queue is high: {{avg_queue_length}} tasks pending"
# }
# ]
# },
# {
# "name": "PieFed Celery Queue Critical",
# "description": "PieFed Celery queue has more than 50,000 pending tasks",
# "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '5 minutes'",
# "condition": "avg_queue_length > 50000",
# "frequency": "2m",
# "severity": "critical",
# "enabled": true,
# "actions": [
# {
# "type": "webhook",
# "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
# "message": "🔥 CRITICAL: PieFed Celery queue is critically high: {{avg_queue_length}} tasks pending. Consider scaling workers!"
# }
# ]
# },
# {
# "name": "BookWyrm Celery Queue High",
# "description": "BookWyrm Celery queue has more than 1,000 pending tasks",
# "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='total' AND database='bookwyrm' AND _timestamp >= now() - interval '5 minutes'",
# "condition": "avg_queue_length > 1000",
# "frequency": "5m",
# "severity": "warning",
# "enabled": true,
# "actions": [
# {
# "type": "webhook",
# "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
# "message": "📚 BookWyrm Celery queue is high: {{avg_queue_length}} tasks pending"
# }
# ]
# },
# {
# "name": "Redis Connection Lost",
# "description": "Redis connection is down for Celery monitoring",
# "query": "SELECT avg(redis_connection_status) as connection_status FROM metrics WHERE _timestamp >= now() - interval '2 minutes'",
# "condition": "connection_status < 1",
# "frequency": "1m",
# "severity": "critical",
# "enabled": true,
# "actions": [
# {
# "type": "webhook",
# "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
# "message": "💥 CRITICAL: Redis connection lost for Celery monitoring!"
# }
# ]
# },
# {
# "name": "Celery Queue Processing Stalled",
# "description": "Celery queue size hasn't decreased in 15 minutes",
# "query": "SELECT celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '15 minutes' ORDER BY _timestamp DESC LIMIT 1",
# "condition": "celery_queue_length > (SELECT celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '20 minutes' AND _timestamp < now() - interval '15 minutes' ORDER BY _timestamp DESC LIMIT 1)",
# "frequency": "10m",
# "severity": "warning",
# "enabled": true,
# "actions": [
# {
# "type": "webhook",
# "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
# "message": "⚠️ Celery queue processing appears stalled. Queue size hasn't decreased in 15 minutes."
# }
# ]
# }
# ]
# }
# dashboard-config.json: |
# {
# "dashboard": {
# "title": "Celery Queue Monitoring",
# "description": "Monitor Celery queue sizes and processing rates for PieFed and BookWyrm",
# "panels": [
# {
# "title": "PieFed Queue Length",
# "type": "line",
# "query": "SELECT _timestamp, celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '24 hours'",
# "x_axis": "_timestamp",
# "y_axis": "celery_queue_length"
# },
# {
# "title": "BookWyrm Total Queue Length",
# "type": "line",
# "query": "SELECT _timestamp, celery_queue_length FROM metrics WHERE queue_name='total' AND database='bookwyrm' AND _timestamp >= now() - interval '24 hours'",
# "x_axis": "_timestamp",
# "y_axis": "celery_queue_length"
# },
# {
# "title": "Queue Processing Rate (PieFed)",
# "type": "line",
# "query": "SELECT _timestamp, celery_queue_length - LAG(celery_queue_length, 1) OVER (ORDER BY _timestamp) as processing_rate FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '6 hours'",
# "x_axis": "_timestamp",
# "y_axis": "processing_rate"
# },
# {
# "title": "Redis Connection Status",
# "type": "stat",
# "query": "SELECT redis_connection_status FROM metrics WHERE _timestamp >= now() - interval '5 minutes' ORDER BY _timestamp DESC LIMIT 1"
# },
# {
# "title": "Current Queue Sizes",
# "type": "table",
# "query": "SELECT queue_name, database, celery_queue_length FROM metrics WHERE _timestamp >= now() - interval '5 minutes' GROUP BY queue_name, database ORDER BY celery_queue_length DESC"
# }
# ]
# }
# }
# ---
# # Instructions ConfigMap
# apiVersion: v1
# kind: ConfigMap
# metadata:
# name: openobserve-setup-instructions
# namespace: celery-monitoring
# data:
# README.md: |
# # OpenObserve Celery Queue Monitoring Setup
# ## 1. Import Alerts
# 1. Access your OpenObserve dashboard
# 2. Go to Alerts → Import
# 3. Copy the contents of `celery-queue-alerts.json` from the `openobserve-alert-configs` ConfigMap
# 4. Paste and import the alert configurations
# ## 2. Configure Webhooks
# Update the webhook URLs in the alert configurations:
# - Replace `https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK` with your actual Slack webhook URL
# - Or configure other notification methods (email, Discord, etc.)
# ## 3. Import Dashboard
# 1. Go to Dashboards → Import
# 2. Copy the contents of `dashboard-config.json` from the `openobserve-alert-configs` ConfigMap
# 3. Paste and import the dashboard configuration
# ## 4. Verify Metrics
# Check that metrics are being collected:
# ```sql
# SELECT * FROM metrics WHERE __name__ LIKE 'celery_%' ORDER BY _timestamp DESC LIMIT 10
# ```
# ## 5. Alert Thresholds
# Current alert thresholds:
# - **PieFed Warning**: > 10,000 tasks
# - **PieFed Critical**: > 50,000 tasks
# - **BookWyrm Warning**: > 1,000 tasks
# - **Redis Connection**: Connection lost
# Adjust these thresholds based on your normal queue sizes and processing capacity.
# ## 6. Monitoring Queries
# Useful queries for monitoring:
# ### Current queue sizes:
# ```sql
# SELECT queue_name, database, celery_queue_length
# FROM metrics
# WHERE _timestamp >= now() - interval '5 minutes'
# GROUP BY queue_name, database
# ORDER BY celery_queue_length DESC
# ```
# ### Queue processing rate (tasks/minute):
# ```sql
# SELECT _timestamp,
# celery_queue_length - LAG(celery_queue_length, 1) OVER (ORDER BY _timestamp) as processing_rate
# FROM metrics
# WHERE queue_name='celery' AND database='piefed'
# AND _timestamp >= now() - interval '1 hour'
# ```
# ### Average queue size over time:
# ```sql
# SELECT DATE_TRUNC('hour', _timestamp) as hour,
# AVG(celery_queue_length) as avg_queue_length
# FROM metrics
# WHERE queue_name='celery' AND database='piefed'
# AND _timestamp >= now() - interval '24 hours'
# GROUP BY hour
# ORDER BY hour
# ```