Files

178 lines
5.9 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
---
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: postgres-shared
namespace: postgresql-system
labels:
app: postgresql-shared
backup.longhorn.io/enable: "true"
spec:
instances: 3
# Use CloudNativePG-compatible PostGIS image
# imageName: ghcr.io/cloudnative-pg/postgresql:16.6 # Standard image
imageName: registry.keyboardvagabond.com/library/cnpg-postgis:16.6-3.4-v2
# Bootstrap with initial database and user
bootstrap:
initdb:
database: shared_db
owner: shared_user
encoding: UTF8
localeCollate: en_US.UTF-8
localeCType: en_US.UTF-8
# Install PostGIS extensions in template database (available to all databases)
postInitTemplateSQL:
- CREATE EXTENSION IF NOT EXISTS postgis;
- CREATE EXTENSION IF NOT EXISTS postgis_topology;
- CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
- CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder;
# PostgreSQL configuration for conservative scaling (4GB memory limit)
postgresql:
parameters:
# Performance optimizations for 4GB memory limit
# Reduced max_connections based on actual usage (7 connections observed)
max_connections: "150"
shared_buffers: "1GB" # 25% of 4GB memory limit
effective_cache_size: "3GB" # ~75% of 4GB memory limit
maintenance_work_mem: "256MB" # Scaled for 4GB memory limit
checkpoint_completion_target: "0.9"
wal_buffers: "24MB"
default_statistics_target: "100"
random_page_cost: "1.1" # Good for SSD storage
effective_io_concurrency: "200"
work_mem: "24MB" # Increased from 14MB: 150 connections × 24MB = 3.6GB max
min_wal_size: "1GB"
max_wal_size: "6GB"
# Additional optimizations for your hardware (tuned for 2-core limit)
max_worker_processes: "8" # Scaled for 2 CPU cores
max_parallel_workers: "6" # Increased for better OLTP workload
max_parallel_workers_per_gather: "3" # Max 3 workers per query
max_parallel_maintenance_workers: "3" # For maintenance operations
# Network timeout adjustments for 100Mbps VLAN
wal_sender_timeout: "10s" # Increased from 5s for slower network
wal_receiver_timeout: "10s" # Increased from 5s for slower network
# Multi-instance HA configuration with asynchronous replication
synchronous_commit: "on" # favor data integrity
# Log long running queries
log_min_duration_statement: "5000" # Log queries > 5 seconds
log_line_prefix: "%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h "
log_statement: "none" # Only log slow queries, not all
# Query activity tracking - increase limit for complex queries
track_activity_query_size: "8192" # 8KB - allows full query text in pg_stat_activity
# Storage configuration using PostgreSQL-optimized storage class
storage:
size: 50Gi
storageClass: longhorn-postgresql
# Separate WAL storage for better I/O performance
walStorage:
size: 10Gi
storageClass: longhorn-postgresql
# Enable pod anti-affinity for HA cluster (distribute across nodes)
affinity:
enablePodAntiAffinity: true
topologyKey: kubernetes.io/hostname
resources:
requests:
cpu: 750m
memory: 1.5Gi
limits:
cpu: 2000m
memory: 4Gi
# Enable superuser access for maintenance
enableSuperuserAccess: true
# Certificate configuration using cert-manager
certificates:
serverTLSSecret: postgresql-shared-server-cert
serverCASecret: postgresql-shared-server-cert
clientCASecret: postgresql-shared-client-cert
replicationTLSSecret: postgresql-shared-client-cert
# Replication slot configuration - enabled for HA cluster
replicationSlots:
highAvailability:
enabled: true # Enable HA replication slots for multi-instance cluster
synchronizeReplicas:
enabled: true # Enable replica synchronization for HA
# Monitoring configuration for Prometheus metrics
monitoring:
enablePodMonitor: true
# Custom metrics for dashboard compatibility
customQueriesConfigMap:
- name: postgresql-dashboard-metrics
key: queries
- name: postgresql-connection-metrics
key: custom-queries
# Reasonable startup delay for stable 2-instance cluster
startDelay: 30
probes:
startup:
initialDelaySeconds: 60 # Allow PostgreSQL to start and begin recovery
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 90 # 15 minutes total for replica recovery with Longhorn storage
readiness:
initialDelaySeconds: 30 # Allow instance manager to initialize
periodSeconds: 10
timeoutSeconds: 10
failureThreshold: 3
liveness:
initialDelaySeconds: 120 # Allow full startup before liveness checks
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
primaryUpdateMethod: switchover # Use switchover instead of restart to prevent restart loops
primaryUpdateStrategy: unsupervised
# S3 backup configuration for CloudNativePG - TEMPORARILY DISABLED
# backup:
# # Backup retention policy
# retentionPolicy: "30d" # Keep backups for 30 days
#
# # S3 backup configuration for Backblaze B2
# barmanObjectStore:
# destinationPath: s3://postgresql-backups/cnpg
# s3Credentials:
# accessKeyId:
# name: postgresql-s3-backup-credentials
# key: AWS_ACCESS_KEY_ID
# secretAccessKey:
# name: postgresql-s3-backup-credentials
# key: AWS_SECRET_ACCESS_KEY
# endpointURL: https://s3.eu-central-003.backblazeb2.com
#
# # Backblaze B2 specific configuration
# data:
# compression: gzip
# encryption: AES256
# immediateCheckpoint: true
# jobs: 2 # Parallel backup jobs
#
# wal:
# compression: gzip
# encryption: AES256
# maxParallel: 2 # Parallel WAL archiving