--- apiVersion: postgresql.cnpg.io/v1 kind: Cluster metadata: name: postgres-shared namespace: postgresql-system labels: app: postgresql-shared backup.longhorn.io/enable: "true" spec: instances: 3 # Use CloudNativePG-compatible PostGIS image # imageName: ghcr.io/cloudnative-pg/postgresql:16.6 # Standard image imageName: registry.keyboardvagabond.com/library/cnpg-postgis:16.6-3.4-v2 # Bootstrap with initial database and user bootstrap: initdb: database: shared_db owner: shared_user encoding: UTF8 localeCollate: en_US.UTF-8 localeCType: en_US.UTF-8 # Install PostGIS extensions in template database (available to all databases) postInitTemplateSQL: - CREATE EXTENSION IF NOT EXISTS postgis; - CREATE EXTENSION IF NOT EXISTS postgis_topology; - CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; - CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder; # PostgreSQL configuration for conservative scaling (4GB memory limit) postgresql: parameters: # Performance optimizations for 4GB memory limit # Reduced max_connections based on actual usage (7 connections observed) max_connections: "150" shared_buffers: "1GB" # 25% of 4GB memory limit effective_cache_size: "3GB" # ~75% of 4GB memory limit maintenance_work_mem: "256MB" # Scaled for 4GB memory limit checkpoint_completion_target: "0.9" wal_buffers: "24MB" default_statistics_target: "100" random_page_cost: "1.1" # Good for SSD storage effective_io_concurrency: "200" work_mem: "24MB" # Increased from 14MB: 150 connections × 24MB = 3.6GB max min_wal_size: "1GB" max_wal_size: "6GB" # Additional optimizations for your hardware (tuned for 2-core limit) max_worker_processes: "8" # Scaled for 2 CPU cores max_parallel_workers: "6" # Increased for better OLTP workload max_parallel_workers_per_gather: "3" # Max 3 workers per query max_parallel_maintenance_workers: "3" # For maintenance operations # Network timeout adjustments for 100Mbps VLAN wal_sender_timeout: "10s" # Increased from 5s for slower network wal_receiver_timeout: "10s" # Increased from 5s for slower network # Multi-instance HA configuration with asynchronous replication synchronous_commit: "on" # favor data integrity # Log long running queries log_min_duration_statement: "5000" # Log queries > 5 seconds log_line_prefix: "%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h " log_statement: "none" # Only log slow queries, not all # Query activity tracking - increase limit for complex queries track_activity_query_size: "8192" # 8KB - allows full query text in pg_stat_activity # Storage configuration using PostgreSQL-optimized storage class storage: size: 50Gi storageClass: longhorn-postgresql # Separate WAL storage for better I/O performance walStorage: size: 10Gi storageClass: longhorn-postgresql # Enable pod anti-affinity for HA cluster (distribute across nodes) affinity: enablePodAntiAffinity: true topologyKey: kubernetes.io/hostname resources: requests: cpu: 750m memory: 1.5Gi limits: cpu: 2000m memory: 4Gi # Enable superuser access for maintenance enableSuperuserAccess: true # Certificate configuration using cert-manager certificates: serverTLSSecret: postgresql-shared-server-cert serverCASecret: postgresql-shared-server-cert clientCASecret: postgresql-shared-client-cert replicationTLSSecret: postgresql-shared-client-cert # Replication slot configuration - enabled for HA cluster replicationSlots: highAvailability: enabled: true # Enable HA replication slots for multi-instance cluster synchronizeReplicas: enabled: true # Enable replica synchronization for HA # Monitoring configuration for Prometheus metrics monitoring: enablePodMonitor: true # Custom metrics for dashboard compatibility customQueriesConfigMap: - name: postgresql-dashboard-metrics key: queries - name: postgresql-connection-metrics key: custom-queries # Reasonable startup delay for stable 2-instance cluster startDelay: 30 probes: startup: initialDelaySeconds: 60 # Allow PostgreSQL to start and begin recovery periodSeconds: 10 timeoutSeconds: 10 failureThreshold: 90 # 15 minutes total for replica recovery with Longhorn storage readiness: initialDelaySeconds: 30 # Allow instance manager to initialize periodSeconds: 10 timeoutSeconds: 10 failureThreshold: 3 liveness: initialDelaySeconds: 120 # Allow full startup before liveness checks periodSeconds: 30 timeoutSeconds: 10 failureThreshold: 3 primaryUpdateMethod: switchover # Use switchover instead of restart to prevent restart loops primaryUpdateStrategy: unsupervised # S3 backup configuration for CloudNativePG - TEMPORARILY DISABLED # backup: # # Backup retention policy # retentionPolicy: "30d" # Keep backups for 30 days # # # S3 backup configuration for Backblaze B2 # barmanObjectStore: # destinationPath: s3://postgresql-backups/cnpg # s3Credentials: # accessKeyId: # name: postgresql-s3-backup-credentials # key: AWS_ACCESS_KEY_ID # secretAccessKey: # name: postgresql-s3-backup-credentials # key: AWS_SECRET_ACCESS_KEY # endpointURL: https://s3.eu-central-003.backblazeb2.com # # # Backblaze B2 specific configuration # data: # compression: gzip # encryption: AES256 # immediateCheckpoint: true # jobs: 2 # Parallel backup jobs # # wal: # compression: gzip # encryption: AES256 # maxParallel: 2 # Parallel WAL archiving