diff --git a/build/piefed/piefed-base/Dockerfile b/build/piefed/piefed-base/Dockerfile index d6a1d61..ba314c8 100644 --- a/build/piefed/piefed-base/Dockerfile +++ b/build/piefed/piefed-base/Dockerfile @@ -60,11 +60,14 @@ RUN apk add --no-cache \ bash # Create piefed user and set up directories in a single layer +# Note: /app/app/static/media is volume-mounted in K8s, fsGroup handles permissions there +# Other directories need explicit ownership for logging and temp files RUN addgroup -g 1000 piefed \ && adduser -u 1000 -G piefed -s /bin/sh -D piefed \ && mkdir -p /app/logs /app/app/static/tmp /app/app/static/media \ /var/log/piefed /var/run/piefed \ - && chown -R piefed:piefed /var/log/piefed /var/run/piefed + && chown -R piefed:piefed /app/logs /app/app/static/tmp \ + /var/log/piefed /var/run/piefed # Set working directory WORKDIR /app @@ -75,7 +78,7 @@ COPY --from=builder --chown=piefed:piefed /app /app # Compile translations and set permissions in a single layer RUN source /app/venv/bin/activate \ && (pybabel compile -d app/translations || true) \ - && chmod 755 /app/logs /app/app/static/tmp /app/app/static/media + && chmod 755 /app/logs /app/app/static/tmp # Copy shared entrypoint utilities COPY entrypoint-common.sh /usr/local/bin/entrypoint-common.sh diff --git a/manifests/applications/piefed/deployment-web.yaml b/manifests/applications/piefed/deployment-web.yaml index 1e55d70..8517e1c 100644 --- a/manifests/applications/piefed/deployment-web.yaml +++ b/manifests/applications/piefed/deployment-web.yaml @@ -20,107 +20,109 @@ spec: app.kubernetes.io/component: web spec: serviceAccountName: piefed-init-checker + securityContext: + fsGroup: 1000 # piefed group - ensures volume mounts are writable imagePullSecrets: - - name: harbor-pull-secret + - name: harbor-pull-secret initContainers: - - name: wait-for-migrations - image: bitnami/kubectl@sha256:b407dcce69129c06fabab6c3eb35bf9a2d75a20d0d927b3f32dae961dba4270b - command: - - sh - - -c - - | - echo "Checking database migration status..." - - # Check if Job exists - if ! kubectl get job piefed-db-init -n piefed-application >/dev/null 2>&1; then - echo "ERROR: Migration job does not exist!" - echo "Expected job/piefed-db-init in piefed-application namespace" - exit 1 - fi - - # Check if Job is complete - COMPLETE_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}' 2>/dev/null) - if [ "$COMPLETE_STATUS" = "True" ]; then - echo "✓ Migrations already complete, proceeding..." - exit 0 - fi - - # Check if Job has failed - FAILED_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null) - if [ "$FAILED_STATUS" = "True" ]; then - echo "ERROR: Migration job has FAILED!" - echo "Job status:" - kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")]}' | jq . - echo "" - echo "Recent events:" - kubectl get events -n piefed-application --field-selector involvedObject.name=piefed-db-init --sort-by='.lastTimestamp' | tail -5 - exit 1 - fi - - # Job exists but is still running, wait for it - echo "Migration job running, waiting for completion..." - kubectl wait --for=condition=complete --timeout=600s job/piefed-db-init -n piefed-application || { - echo "ERROR: Migration job failed or timed out!" - exit 1 - } - - echo "✓ Migrations complete, starting web pod..." + - name: wait-for-migrations + image: bitnami/kubectl@sha256:b407dcce69129c06fabab6c3eb35bf9a2d75a20d0d927b3f32dae961dba4270b + command: + - sh + - -c + - | + echo "Checking database migration status..." + + # Check if Job exists + if ! kubectl get job piefed-db-init -n piefed-application >/dev/null 2>&1; then + echo "ERROR: Migration job does not exist!" + echo "Expected job/piefed-db-init in piefed-application namespace" + exit 1 + fi + + # Check if Job is complete + COMPLETE_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}' 2>/dev/null) + if [ "$COMPLETE_STATUS" = "True" ]; then + echo "✓ Migrations already complete, proceeding..." + exit 0 + fi + + # Check if Job has failed + FAILED_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null) + if [ "$FAILED_STATUS" = "True" ]; then + echo "ERROR: Migration job has FAILED!" + echo "Job status:" + kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")]}' | jq . + echo "" + echo "Recent events:" + kubectl get events -n piefed-application --field-selector involvedObject.name=piefed-db-init --sort-by='.lastTimestamp' | tail -5 + exit 1 + fi + + # Job exists but is still running, wait for it + echo "Migration job running, waiting for completion..." + kubectl wait --for=condition=complete --timeout=600s job/piefed-db-init -n piefed-application || { + echo "ERROR: Migration job failed or timed out!" + exit 1 + } + + echo "✓ Migrations complete, starting web pod..." containers: - - name: piefed-web - image: /library/piefed-web:latest - imagePullPolicy: Always - ports: - - containerPort: 80 - name: http - envFrom: - - configMapRef: - name: piefed-config - - secretRef: - name: piefed-secrets - env: - - name: PYTHONUNBUFFERED - value: "1" - - name: FLASK_DEBUG - value: "0" # Keep production mode but enable better logging - - name: WERKZEUG_DEBUG_PIN - value: "off" - resources: - requests: - cpu: 600m # Conservative reduction from 1000m considering 200-800x user growth - memory: 1.5Gi # Conservative reduction from 2Gi considering scaling needs - limits: - cpu: 2000m # Keep original limits for burst capacity at scale - memory: 4Gi # Keep original limits for growth - volumeMounts: - - name: app-storage - mountPath: /app/app/media - subPath: media - - name: app-storage - mountPath: /app/app/static/media - subPath: static - - name: cache-storage - mountPath: /app/cache - livenessProbe: - httpGet: - path: /health - port: 80 - initialDelaySeconds: 60 - periodSeconds: 30 - timeoutSeconds: 10 - readinessProbe: - httpGet: - path: /health - port: 80 - initialDelaySeconds: 30 - periodSeconds: 10 - timeoutSeconds: 5 + - name: piefed-web + image: registry.keyboardvagabond.com/library/piefed-web:latest + imagePullPolicy: Always + ports: + - containerPort: 80 + name: http + envFrom: + - configMapRef: + name: piefed-config + - secretRef: + name: piefed-secrets + env: + - name: PYTHONUNBUFFERED + value: "1" + - name: FLASK_DEBUG + value: "0" # Keep production mode but enable better logging + - name: WERKZEUG_DEBUG_PIN + value: "off" + resources: + requests: + cpu: 600m # Conservative reduction from 1000m considering 200-800x user growth + memory: 1.5Gi # Conservative reduction from 2Gi considering scaling needs + limits: + cpu: 2000m # Keep original limits for burst capacity at scale + memory: 4Gi # Keep original limits for growth + volumeMounts: + - name: app-storage + mountPath: /app/app/media + subPath: media + - name: app-storage + mountPath: /app/app/static/media + subPath: static + - name: cache-storage + mountPath: /app/cache + livenessProbe: + httpGet: + path: /health + port: 80 + initialDelaySeconds: 60 + periodSeconds: 30 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 80 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 volumes: - - name: app-storage - persistentVolumeClaim: - claimName: piefed-app-storage - - name: cache-storage - persistentVolumeClaim: - claimName: piefed-cache-storage + - name: app-storage + persistentVolumeClaim: + claimName: piefed-app-storage + - name: cache-storage + persistentVolumeClaim: + claimName: piefed-cache-storage --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler @@ -135,15 +137,15 @@ spec: minReplicas: 2 maxReplicas: 6 metrics: - - type: Resource - resource: - name: cpu - target: - type: AverageValue - averageValue: 1400m # 70% of 2000m limit - allow better CPU utilization - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: 90 \ No newline at end of file + - type: Resource + resource: + name: cpu + target: + type: AverageValue + averageValue: 1400m # 70% of 2000m limit - allow better CPU utilization + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 200 #3GB of the 4 available \ No newline at end of file diff --git a/manifests/applications/piefed/deployment-worker.yaml b/manifests/applications/piefed/deployment-worker.yaml index 5a39be1..47b86ca 100644 --- a/manifests/applications/piefed/deployment-worker.yaml +++ b/manifests/applications/piefed/deployment-worker.yaml @@ -20,116 +20,118 @@ spec: app.kubernetes.io/component: worker spec: serviceAccountName: piefed-init-checker + securityContext: + fsGroup: 1000 # piefed group - ensures volume mounts are writable imagePullSecrets: - - name: harbor-pull-secret + - name: harbor-pull-secret initContainers: - - name: wait-for-migrations - image: bitnami/kubectl@sha256:b407dcce69129c06fabab6c3eb35bf9a2d75a20d0d927b3f32dae961dba4270b - command: - - sh - - -c - - | - echo "Checking database migration status..." - - # Check if Job exists - if ! kubectl get job piefed-db-init -n piefed-application >/dev/null 2>&1; then - echo "ERROR: Migration job does not exist!" - echo "Expected job/piefed-db-init in piefed-application namespace" - exit 1 - fi - - # Check if Job is complete - COMPLETE_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}' 2>/dev/null) - if [ "$COMPLETE_STATUS" = "True" ]; then - echo "✓ Migrations already complete, proceeding..." - exit 0 - fi - - # Check if Job has failed - FAILED_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null) - if [ "$FAILED_STATUS" = "True" ]; then - echo "ERROR: Migration job has FAILED!" - echo "Job status:" - kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")]}' | jq . - echo "" - echo "Recent events:" - kubectl get events -n piefed-application --field-selector involvedObject.name=piefed-db-init --sort-by='.lastTimestamp' | tail -5 - exit 1 - fi - - # Job exists but is still running, wait for it - echo "Migration job running, waiting for completion..." - kubectl wait --for=condition=complete --timeout=600s job/piefed-db-init -n piefed-application || { - echo "ERROR: Migration job failed or timed out!" - exit 1 - } - - echo "✓ Migrations complete, starting worker pod..." + - name: wait-for-migrations + image: bitnami/kubectl@sha256:b407dcce69129c06fabab6c3eb35bf9a2d75a20d0d927b3f32dae961dba4270b + command: + - sh + - -c + - | + echo "Checking database migration status..." + + # Check if Job exists + if ! kubectl get job piefed-db-init -n piefed-application >/dev/null 2>&1; then + echo "ERROR: Migration job does not exist!" + echo "Expected job/piefed-db-init in piefed-application namespace" + exit 1 + fi + + # Check if Job is complete + COMPLETE_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}' 2>/dev/null) + if [ "$COMPLETE_STATUS" = "True" ]; then + echo "✓ Migrations already complete, proceeding..." + exit 0 + fi + + # Check if Job has failed + FAILED_STATUS=$(kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}' 2>/dev/null) + if [ "$FAILED_STATUS" = "True" ]; then + echo "ERROR: Migration job has FAILED!" + echo "Job status:" + kubectl get job piefed-db-init -n piefed-application -o jsonpath='{.status.conditions[?(@.type=="Failed")]}' | jq . + echo "" + echo "Recent events:" + kubectl get events -n piefed-application --field-selector involvedObject.name=piefed-db-init --sort-by='.lastTimestamp' | tail -5 + exit 1 + fi + + # Job exists but is still running, wait for it + echo "Migration job running, waiting for completion..." + kubectl wait --for=condition=complete --timeout=600s job/piefed-db-init -n piefed-application || { + echo "ERROR: Migration job failed or timed out!" + exit 1 + } + + echo "✓ Migrations complete, starting worker pod..." containers: - - name: piefed-worker - image: /library/piefed-worker:latest - imagePullPolicy: Always - envFrom: - - configMapRef: - name: piefed-config - - secretRef: - name: piefed-secrets - env: - - name: PYTHONUNBUFFERED - value: "1" - - name: FLASK_DEBUG - value: "0" # Keep production mode but enable better logging - - name: WERKZEUG_DEBUG_PIN - value: "off" - # Celery Worker Logging Configuration - - name: CELERY_WORKER_HIJACK_ROOT_LOGGER - value: "False" - # Database connection pool overrides for worker (lower than web pods) - - name: DB_POOL_SIZE - value: "5" # Workers need fewer connections than web pods - - name: DB_MAX_OVERFLOW - value: "10" # Lower overflow for background tasks - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: 2000m # Allow internal scaling to 5 workers - memory: 3Gi # Increase for multiple workers - volumeMounts: - - name: app-storage - mountPath: /app/app/media - subPath: media - - name: app-storage - mountPath: /app/app/static/media - subPath: static - - name: cache-storage - mountPath: /app/cache - livenessProbe: - exec: - command: - - python - - -c - - "import os,redis,urllib.parse; u=urllib.parse.urlparse(os.environ['CELERY_BROKER_URL']); r=redis.Redis(host=u.hostname, port=u.port, password=u.password, db=int(u.path[1:]) if u.path else 0); r.ping()" - initialDelaySeconds: 60 - periodSeconds: 60 - timeoutSeconds: 10 - readinessProbe: - exec: - command: - - python - - -c - - "import os,redis,urllib.parse; u=urllib.parse.urlparse(os.environ['CELERY_BROKER_URL']); r=redis.Redis(host=u.hostname, port=u.port, password=u.password, db=int(u.path[1:]) if u.path else 0); r.ping()" - initialDelaySeconds: 30 - periodSeconds: 30 - timeoutSeconds: 5 + - name: piefed-worker + image: registry.keyboardvagabond.com/library/piefed-worker:latest + imagePullPolicy: Always + envFrom: + - configMapRef: + name: piefed-config + - secretRef: + name: piefed-secrets + env: + - name: PYTHONUNBUFFERED + value: "1" + - name: FLASK_DEBUG + value: "0" # Keep production mode but enable better logging + - name: WERKZEUG_DEBUG_PIN + value: "off" + # Celery Worker Logging Configuration + - name: CELERY_WORKER_HIJACK_ROOT_LOGGER + value: "False" + # Database connection pool overrides for worker (lower than web pods) + - name: DB_POOL_SIZE + value: "5" # Workers need fewer connections than web pods + - name: DB_MAX_OVERFLOW + value: "10" # Lower overflow for background tasks + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m # Allow internal scaling to 5 workers + memory: 3Gi # Increase for multiple workers + volumeMounts: + - name: app-storage + mountPath: /app/app/media + subPath: media + - name: app-storage + mountPath: /app/app/static/media + subPath: static + - name: cache-storage + mountPath: /app/cache + livenessProbe: + exec: + command: + - python + - -c + - "import os,redis,urllib.parse; u=urllib.parse.urlparse(os.environ['CELERY_BROKER_URL']); r=redis.Redis(host=u.hostname, port=u.port, password=u.password, db=int(u.path[1:]) if u.path else 0); r.ping()" + initialDelaySeconds: 60 + periodSeconds: 60 + timeoutSeconds: 10 + readinessProbe: + exec: + command: + - python + - -c + - "import os,redis,urllib.parse; u=urllib.parse.urlparse(os.environ['CELERY_BROKER_URL']); r=redis.Redis(host=u.hostname, port=u.port, password=u.password, db=int(u.path[1:]) if u.path else 0); r.ping()" + initialDelaySeconds: 30 + periodSeconds: 30 + timeoutSeconds: 5 volumes: - - name: app-storage - persistentVolumeClaim: - claimName: piefed-app-storage - - name: cache-storage - persistentVolumeClaim: - claimName: piefed-cache-storage + - name: app-storage + persistentVolumeClaim: + claimName: piefed-app-storage + - name: cache-storage + persistentVolumeClaim: + claimName: piefed-cache-storage --- apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler @@ -144,15 +146,15 @@ spec: minReplicas: 1 maxReplicas: 2 metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 375 - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: 250 \ No newline at end of file + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 375 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 250 \ No newline at end of file