# Keeping for reference # --- # # OpenObserve Alert Configuration for Celery Queue Monitoring # # This file contains the alert configurations that should be imported into OpenObserve # apiVersion: v1 # kind: ConfigMap # metadata: # name: openobserve-alert-configs # namespace: celery-monitoring # labels: # app.kubernetes.io/name: openobserve-alerts # app.kubernetes.io/component: monitoring # data: # celery-queue-alerts.json: | # { # "alerts": [ # { # "name": "PieFed Celery Queue High", # "description": "PieFed Celery queue has more than 10,000 pending tasks", # "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '5 minutes'", # "condition": "avg_queue_length > 10000", # "frequency": "5m", # "severity": "warning", # "enabled": true, # "actions": [ # { # "type": "webhook", # "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK", # "message": "🚨 PieFed Celery queue is high: {{avg_queue_length}} tasks pending" # } # ] # }, # { # "name": "PieFed Celery Queue Critical", # "description": "PieFed Celery queue has more than 50,000 pending tasks", # "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '5 minutes'", # "condition": "avg_queue_length > 50000", # "frequency": "2m", # "severity": "critical", # "enabled": true, # "actions": [ # { # "type": "webhook", # "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK", # "message": "🔥 CRITICAL: PieFed Celery queue is critically high: {{avg_queue_length}} tasks pending. Consider scaling workers!" # } # ] # }, # { # "name": "BookWyrm Celery Queue High", # "description": "BookWyrm Celery queue has more than 1,000 pending tasks", # "query": "SELECT avg(celery_queue_length) as avg_queue_length FROM metrics WHERE queue_name='total' AND database='bookwyrm' AND _timestamp >= now() - interval '5 minutes'", # "condition": "avg_queue_length > 1000", # "frequency": "5m", # "severity": "warning", # "enabled": true, # "actions": [ # { # "type": "webhook", # "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK", # "message": "📚 BookWyrm Celery queue is high: {{avg_queue_length}} tasks pending" # } # ] # }, # { # "name": "Redis Connection Lost", # "description": "Redis connection is down for Celery monitoring", # "query": "SELECT avg(redis_connection_status) as connection_status FROM metrics WHERE _timestamp >= now() - interval '2 minutes'", # "condition": "connection_status < 1", # "frequency": "1m", # "severity": "critical", # "enabled": true, # "actions": [ # { # "type": "webhook", # "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK", # "message": "💥 CRITICAL: Redis connection lost for Celery monitoring!" # } # ] # }, # { # "name": "Celery Queue Processing Stalled", # "description": "Celery queue size hasn't decreased in 15 minutes", # "query": "SELECT celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '15 minutes' ORDER BY _timestamp DESC LIMIT 1", # "condition": "celery_queue_length > (SELECT celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '20 minutes' AND _timestamp < now() - interval '15 minutes' ORDER BY _timestamp DESC LIMIT 1)", # "frequency": "10m", # "severity": "warning", # "enabled": true, # "actions": [ # { # "type": "webhook", # "webhook_url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK", # "message": "⚠️ Celery queue processing appears stalled. Queue size hasn't decreased in 15 minutes." # } # ] # } # ] # } # dashboard-config.json: | # { # "dashboard": { # "title": "Celery Queue Monitoring", # "description": "Monitor Celery queue sizes and processing rates for PieFed and BookWyrm", # "panels": [ # { # "title": "PieFed Queue Length", # "type": "line", # "query": "SELECT _timestamp, celery_queue_length FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '24 hours'", # "x_axis": "_timestamp", # "y_axis": "celery_queue_length" # }, # { # "title": "BookWyrm Total Queue Length", # "type": "line", # "query": "SELECT _timestamp, celery_queue_length FROM metrics WHERE queue_name='total' AND database='bookwyrm' AND _timestamp >= now() - interval '24 hours'", # "x_axis": "_timestamp", # "y_axis": "celery_queue_length" # }, # { # "title": "Queue Processing Rate (PieFed)", # "type": "line", # "query": "SELECT _timestamp, celery_queue_length - LAG(celery_queue_length, 1) OVER (ORDER BY _timestamp) as processing_rate FROM metrics WHERE queue_name='celery' AND database='piefed' AND _timestamp >= now() - interval '6 hours'", # "x_axis": "_timestamp", # "y_axis": "processing_rate" # }, # { # "title": "Redis Connection Status", # "type": "stat", # "query": "SELECT redis_connection_status FROM metrics WHERE _timestamp >= now() - interval '5 minutes' ORDER BY _timestamp DESC LIMIT 1" # }, # { # "title": "Current Queue Sizes", # "type": "table", # "query": "SELECT queue_name, database, celery_queue_length FROM metrics WHERE _timestamp >= now() - interval '5 minutes' GROUP BY queue_name, database ORDER BY celery_queue_length DESC" # } # ] # } # } # --- # # Instructions ConfigMap # apiVersion: v1 # kind: ConfigMap # metadata: # name: openobserve-setup-instructions # namespace: celery-monitoring # data: # README.md: | # # OpenObserve Celery Queue Monitoring Setup # ## 1. Import Alerts # 1. Access your OpenObserve dashboard # 2. Go to Alerts → Import # 3. Copy the contents of `celery-queue-alerts.json` from the `openobserve-alert-configs` ConfigMap # 4. Paste and import the alert configurations # ## 2. Configure Webhooks # Update the webhook URLs in the alert configurations: # - Replace `https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK` with your actual Slack webhook URL # - Or configure other notification methods (email, Discord, etc.) # ## 3. Import Dashboard # 1. Go to Dashboards → Import # 2. Copy the contents of `dashboard-config.json` from the `openobserve-alert-configs` ConfigMap # 3. Paste and import the dashboard configuration # ## 4. Verify Metrics # Check that metrics are being collected: # ```sql # SELECT * FROM metrics WHERE __name__ LIKE 'celery_%' ORDER BY _timestamp DESC LIMIT 10 # ``` # ## 5. Alert Thresholds # Current alert thresholds: # - **PieFed Warning**: > 10,000 tasks # - **PieFed Critical**: > 50,000 tasks # - **BookWyrm Warning**: > 1,000 tasks # - **Redis Connection**: Connection lost # Adjust these thresholds based on your normal queue sizes and processing capacity. # ## 6. Monitoring Queries # Useful queries for monitoring: # ### Current queue sizes: # ```sql # SELECT queue_name, database, celery_queue_length # FROM metrics # WHERE _timestamp >= now() - interval '5 minutes' # GROUP BY queue_name, database # ORDER BY celery_queue_length DESC # ``` # ### Queue processing rate (tasks/minute): # ```sql # SELECT _timestamp, # celery_queue_length - LAG(celery_queue_length, 1) OVER (ORDER BY _timestamp) as processing_rate # FROM metrics # WHERE queue_name='celery' AND database='piefed' # AND _timestamp >= now() - interval '1 hour' # ``` # ### Average queue size over time: # ```sql # SELECT DATE_TRUNC('hour', _timestamp) as hour, # AVG(celery_queue_length) as avg_queue_length # FROM metrics # WHERE queue_name='celery' AND database='piefed' # AND _timestamp >= now() - interval '24 hours' # GROUP BY hour # ORDER BY hour # ```