178 lines
5.9 KiB
YAML
178 lines
5.9 KiB
YAML
---
|
||
apiVersion: postgresql.cnpg.io/v1
|
||
kind: Cluster
|
||
metadata:
|
||
name: postgres-shared
|
||
namespace: postgresql-system
|
||
labels:
|
||
app: postgresql-shared
|
||
backup.longhorn.io/enable: "true"
|
||
spec:
|
||
instances: 3
|
||
|
||
# Use CloudNativePG-compatible PostGIS image
|
||
# imageName: ghcr.io/cloudnative-pg/postgresql:16.6 # Standard image
|
||
imageName: registry.keyboardvagabond.com/library/cnpg-postgis:16.6-3.4-v2
|
||
|
||
# Bootstrap with initial database and user
|
||
bootstrap:
|
||
initdb:
|
||
database: shared_db
|
||
owner: shared_user
|
||
encoding: UTF8
|
||
localeCollate: en_US.UTF-8
|
||
localeCType: en_US.UTF-8
|
||
|
||
# Install PostGIS extensions in template database (available to all databases)
|
||
postInitTemplateSQL:
|
||
- CREATE EXTENSION IF NOT EXISTS postgis;
|
||
- CREATE EXTENSION IF NOT EXISTS postgis_topology;
|
||
- CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
|
||
- CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder;
|
||
|
||
|
||
# PostgreSQL configuration for conservative scaling (4GB memory limit)
|
||
postgresql:
|
||
parameters:
|
||
# Performance optimizations for 4GB memory limit
|
||
# Reduced max_connections based on actual usage (7 connections observed)
|
||
max_connections: "150"
|
||
shared_buffers: "1GB" # 25% of 4GB memory limit
|
||
effective_cache_size: "3GB" # ~75% of 4GB memory limit
|
||
maintenance_work_mem: "256MB" # Scaled for 4GB memory limit
|
||
checkpoint_completion_target: "0.9"
|
||
wal_buffers: "24MB"
|
||
default_statistics_target: "100"
|
||
random_page_cost: "1.1" # Good for SSD storage
|
||
effective_io_concurrency: "200"
|
||
work_mem: "24MB" # Increased from 14MB: 150 connections × 24MB = 3.6GB max
|
||
min_wal_size: "1GB"
|
||
max_wal_size: "6GB"
|
||
|
||
# Additional optimizations for your hardware (tuned for 2-core limit)
|
||
max_worker_processes: "8" # Scaled for 2 CPU cores
|
||
max_parallel_workers: "6" # Increased for better OLTP workload
|
||
max_parallel_workers_per_gather: "3" # Max 3 workers per query
|
||
max_parallel_maintenance_workers: "3" # For maintenance operations
|
||
|
||
# Network timeout adjustments for 100Mbps VLAN
|
||
wal_sender_timeout: "10s" # Increased from 5s for slower network
|
||
wal_receiver_timeout: "10s" # Increased from 5s for slower network
|
||
|
||
# Multi-instance HA configuration with asynchronous replication
|
||
synchronous_commit: "on" # favor data integrity
|
||
|
||
# Log long running queries
|
||
log_min_duration_statement: "5000" # Log queries > 5 seconds
|
||
log_line_prefix: "%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h "
|
||
log_statement: "none" # Only log slow queries, not all
|
||
|
||
# Query activity tracking - increase limit for complex queries
|
||
track_activity_query_size: "8192" # 8KB - allows full query text in pg_stat_activity
|
||
|
||
|
||
|
||
|
||
# Storage configuration using PostgreSQL-optimized storage class
|
||
storage:
|
||
size: 50Gi
|
||
storageClass: longhorn-postgresql
|
||
|
||
# Separate WAL storage for better I/O performance
|
||
walStorage:
|
||
size: 10Gi
|
||
storageClass: longhorn-postgresql
|
||
|
||
# Enable pod anti-affinity for HA cluster (distribute across nodes)
|
||
affinity:
|
||
enablePodAntiAffinity: true
|
||
topologyKey: kubernetes.io/hostname
|
||
|
||
resources:
|
||
requests:
|
||
cpu: 750m
|
||
memory: 1.5Gi
|
||
limits:
|
||
cpu: 2000m
|
||
memory: 4Gi
|
||
|
||
# Enable superuser access for maintenance
|
||
enableSuperuserAccess: true
|
||
|
||
# Certificate configuration using cert-manager
|
||
certificates:
|
||
serverTLSSecret: postgresql-shared-server-cert
|
||
serverCASecret: postgresql-shared-server-cert
|
||
clientCASecret: postgresql-shared-client-cert
|
||
replicationTLSSecret: postgresql-shared-client-cert
|
||
|
||
# Replication slot configuration - enabled for HA cluster
|
||
replicationSlots:
|
||
highAvailability:
|
||
enabled: true # Enable HA replication slots for multi-instance cluster
|
||
synchronizeReplicas:
|
||
enabled: true # Enable replica synchronization for HA
|
||
|
||
# Monitoring configuration for Prometheus metrics
|
||
monitoring:
|
||
enablePodMonitor: true
|
||
# Custom metrics for dashboard compatibility
|
||
customQueriesConfigMap:
|
||
- name: postgresql-dashboard-metrics
|
||
key: queries
|
||
- name: postgresql-connection-metrics
|
||
key: custom-queries
|
||
|
||
# Reasonable startup delay for stable 2-instance cluster
|
||
startDelay: 30
|
||
probes:
|
||
startup:
|
||
initialDelaySeconds: 60 # Allow PostgreSQL to start and begin recovery
|
||
periodSeconds: 10
|
||
timeoutSeconds: 10
|
||
failureThreshold: 90 # 15 minutes total for replica recovery with Longhorn storage
|
||
readiness:
|
||
initialDelaySeconds: 30 # Allow instance manager to initialize
|
||
periodSeconds: 10
|
||
timeoutSeconds: 10
|
||
failureThreshold: 3
|
||
liveness:
|
||
initialDelaySeconds: 120 # Allow full startup before liveness checks
|
||
periodSeconds: 30
|
||
timeoutSeconds: 10
|
||
failureThreshold: 3
|
||
|
||
primaryUpdateMethod: switchover # Use switchover instead of restart to prevent restart loops
|
||
primaryUpdateStrategy: unsupervised
|
||
|
||
# S3 backup configuration for CloudNativePG - TEMPORARILY DISABLED
|
||
# backup:
|
||
# # Backup retention policy
|
||
# retentionPolicy: "30d" # Keep backups for 30 days
|
||
#
|
||
# # S3 backup configuration for Backblaze B2
|
||
# barmanObjectStore:
|
||
# destinationPath: s3://postgresql-backups/cnpg
|
||
# s3Credentials:
|
||
# accessKeyId:
|
||
# name: postgresql-s3-backup-credentials
|
||
# key: AWS_ACCESS_KEY_ID
|
||
# secretAccessKey:
|
||
# name: postgresql-s3-backup-credentials
|
||
# key: AWS_SECRET_ACCESS_KEY
|
||
# endpointURL: https://s3.eu-central-003.backblazeb2.com
|
||
#
|
||
# # Backblaze B2 specific configuration
|
||
# data:
|
||
# compression: gzip
|
||
# encryption: AES256
|
||
# immediateCheckpoint: true
|
||
# jobs: 2 # Parallel backup jobs
|
||
#
|
||
# wal:
|
||
# compression: gzip
|
||
# encryption: AES256
|
||
# maxParallel: 2 # Parallel WAL archiving
|
||
|
||
|