Files
Keybard-Vagabond-Demo/manifests/infrastructure/longhorn/network-policy-s3-block.yaml

212 lines
6.8 KiB
YAML
Raw Normal View History

---
# Longhorn S3 Access Control via NetworkPolicy
#
# NetworkPolicy that blocks external S3 access by default, with CronJobs to
# automatically remove it during backup windows (12:55 AM - 4:00 AM).
#
# Network Details:
# - Pod CIDR: 10.244.0.0/16 (within 10.0.0.0/8)
# - Service CIDR: 10.96.0.0/12 (within 10.0.0.0/8)
# - VLAN Network: 10.132.0.0/24 (within 10.0.0.0/8)
#
# How It Works:
# - NetworkPolicy is applied by default, blocking external S3 (Backblaze B2)
# - CronJob removes NetworkPolicy at 12:55 AM (5 min before earliest backup at 1 AM)
# - CronJob reapplies NetworkPolicy at 4:00 AM (after backup window closes)
# - Allows all internal cluster traffic (10.0.0.0/8) while blocking external S3
#
# Backup Schedule:
# - Daily backups: 2:00 AM
# - Weekly backups: 1:00 AM Sundays
# - Backup window: 12:55 AM - 4:00 AM (3 hours 5 minutes)
#
# See: BACKUP-GUIDE.md and S3-API-SOLUTION-FINAL.md for full documentation
---
# NetworkPolicy: Blocks S3 access by default
# This is applied initially, then managed by CronJobs below
# Using CiliumNetworkPolicy for better API server support via toEntities
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: longhorn-block-s3-access
namespace: longhorn-system
labels:
app: longhorn
purpose: s3-access-control
spec:
description: "Block external S3 access while allowing internal cluster communication"
endpointSelector:
matchLabels:
app: longhorn-manager
egress:
# Allow DNS to kube-system namespace
- toEndpoints:
- matchLabels:
k8s-app: kube-dns
toPorts:
- ports:
- port: "53"
protocol: UDP
- port: "53"
protocol: TCP
# Explicitly allow Kubernetes API server (critical for Longhorn)
# Cilium handles this specially - kube-apiserver entity is required
- toEntities:
- kube-apiserver
# Allow all internal cluster traffic (10.0.0.0/8)
# This includes:
# - Pod CIDR: 10.244.0.0/16
# - Service CIDR: 10.96.0.0/12 (API server already covered above)
# - VLAN Network: 10.132.0.0/24
# - All other internal 10.x.x.x addresses
- toCIDR:
- 10.0.0.0/8
# Allow pod-to-pod communication within cluster
# The 10.0.0.0/8 CIDR block above covers all pod-to-pod communication
# This explicit rule ensures instance-manager pods are reachable
- toEntities:
- cluster
# Block all other egress (including external S3 like Backblaze B2)
---
# RBAC for CronJobs that manage the NetworkPolicy
apiVersion: v1
kind: ServiceAccount
metadata:
name: longhorn-netpol-manager
namespace: longhorn-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: longhorn-netpol-manager
namespace: longhorn-system
rules:
- apiGroups: ["cilium.io"]
resources: ["ciliumnetworkpolicies"]
verbs: ["get", "create", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: longhorn-netpol-manager
namespace: longhorn-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: longhorn-netpol-manager
subjects:
- kind: ServiceAccount
name: longhorn-netpol-manager
namespace: longhorn-system
---
# CronJob: Remove NetworkPolicy before backups (12:55 AM daily)
# This allows S3 access during the backup window
apiVersion: batch/v1
kind: CronJob
metadata:
name: longhorn-enable-s3-access
namespace: longhorn-system
labels:
app: longhorn
purpose: s3-access-control
spec:
# Run at 12:55 AM daily (5 minutes before earliest backup at 1:00 AM Sunday weekly)
schedule: "55 0 * * *"
successfulJobsHistoryLimit: 2
failedJobsHistoryLimit: 2
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
labels:
app: longhorn-netpol-manager
spec:
serviceAccountName: longhorn-netpol-manager
restartPolicy: OnFailure
containers:
- name: delete-netpol
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
echo "Removing CiliumNetworkPolicy to allow S3 access for backups..."
kubectl delete ciliumnetworkpolicy longhorn-block-s3-access -n longhorn-system --ignore-not-found=true
echo "S3 access enabled. Backups can proceed."
---
# CronJob: Re-apply NetworkPolicy after backups (4:00 AM daily)
# This blocks S3 access after the backup window closes
apiVersion: batch/v1
kind: CronJob
metadata:
name: longhorn-disable-s3-access
namespace: longhorn-system
labels:
app: longhorn
purpose: s3-access-control
spec:
# Run at 4:00 AM daily (gives 3 hours 5 minutes for backups to complete)
schedule: "0 4 * * *"
successfulJobsHistoryLimit: 2
failedJobsHistoryLimit: 2
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
labels:
app: longhorn-netpol-manager
spec:
serviceAccountName: longhorn-netpol-manager
restartPolicy: OnFailure
containers:
- name: create-netpol
image: bitnami/kubectl:latest
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
echo "Re-applying CiliumNetworkPolicy to block S3 access..."
kubectl apply -f - <<EOF
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: longhorn-block-s3-access
namespace: longhorn-system
labels:
app: longhorn
purpose: s3-access-control
spec:
description: "Block external S3 access while allowing internal cluster communication"
endpointSelector:
matchLabels:
app: longhorn-manager
egress:
# Allow DNS to kube-system namespace
- toEndpoints:
- matchLabels:
k8s-app: kube-dns
toPorts:
- ports:
- port: "53"
protocol: UDP
- port: "53"
protocol: TCP
# Explicitly allow Kubernetes API server (critical for Longhorn)
- toEntities:
- kube-apiserver
# Allow all internal cluster traffic (10.0.0.0/8)
- toCIDR:
- 10.0.0.0/8
# Allow pod-to-pod communication within cluster
# The 10.0.0.0/8 CIDR block above covers all pod-to-pod communication
- toEntities:
- cluster
# Block all other egress (including external S3)
EOF
echo "S3 access blocked. Polling stopped until next backup window."