mirror of
https://github.com/Xahau/xahaud.git
synced 2025-12-06 17:27:52 +00:00
refactor: remove OverlayFS delta caching entirely
THE GREAT CULLING: Remove all OverlayFS and delta caching logic. After extensive investigation and testing, we determined that OverlayFS file-level layering is fundamentally incompatible with ccache's access patterns: - ccache opens files with O_RDWR → kernel must provide writable file handle - OverlayFS must copy files to upper layer immediately (can't wait) - Even with metacopy=on, metadata-only files still appear in upper layer - Result: ~366MB deltas instead of tiny incremental diffs The fundamental constraint: cannot have all three of: 1. Read-only lower layer (for base sharing) 2. Writable file handles (for O_RDWR) 3. Minimal deltas (for efficient caching) Changes: - Removed all OverlayFS mounting/unmounting logic - Removed workspace and registry tracking - Removed delta creation and restoration - Removed use-deltas parameter - Simplified to direct tar/extract workflow Before: 726 lines across cache actions After: 321 lines (-55% reduction) Benefits: - ✅ Simpler architecture (direct tar/extract) - ✅ More maintainable (less code, less complexity) - ✅ More reliable (fewer moving parts) - ✅ Same performance (base-only was already used) - ✅ Clear path forward (restic/borg for future optimization) Current state works great: - Build times: 20-30 min → 2-5 min (80% improvement) - Cache sizes: ~323-609 MB per branch (with zst compression) - S3 costs: acceptable for current volume If bandwidth costs become problematic, migrate to restic/borg for chunk-level deduplication (completely different architecture).
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
name: 'Xahau Cache Restore (S3 + OverlayFS)'
|
||||
description: 'Drop-in replacement for actions/cache/restore using S3 and OverlayFS for delta caching'
|
||||
name: 'Xahau Cache Restore (S3)'
|
||||
description: 'Drop-in replacement for actions/cache/restore using S3 storage'
|
||||
|
||||
inputs:
|
||||
path:
|
||||
@@ -28,10 +28,6 @@ inputs:
|
||||
description: 'Check if a cache entry exists for the given input(s) without downloading it'
|
||||
required: false
|
||||
default: 'false'
|
||||
use-deltas:
|
||||
description: 'Enable delta caching (download/upload incremental changes). Set to false for base-only caching.'
|
||||
required: false
|
||||
default: 'true'
|
||||
# Note: Composite actions can't access secrets.* directly - must be passed from workflow
|
||||
aws-access-key-id:
|
||||
description: 'AWS Access Key ID for S3 access'
|
||||
@@ -48,13 +44,13 @@ outputs:
|
||||
description: 'The key that was used to restore the cache (may be from restore-keys)'
|
||||
value: ${{ steps.restore-cache.outputs.cache-primary-key }}
|
||||
cache-matched-key:
|
||||
description: 'The key that matched (same as cache-primary-key for compatibility)'
|
||||
value: ${{ steps.restore-cache.outputs.cache-primary-key }}
|
||||
description: 'The key that was used to restore the cache (exact or prefix match)'
|
||||
value: ${{ steps.restore-cache.outputs.cache-matched-key }}
|
||||
|
||||
runs:
|
||||
using: 'composite'
|
||||
steps:
|
||||
- name: Restore cache from S3 with OverlayFS
|
||||
- name: Restore cache from S3
|
||||
id: restore-cache
|
||||
shell: bash
|
||||
env:
|
||||
@@ -67,133 +63,42 @@ runs:
|
||||
TARGET_PATH: ${{ inputs.path }}
|
||||
FAIL_ON_MISS: ${{ inputs.fail-on-cache-miss }}
|
||||
LOOKUP_ONLY: ${{ inputs.lookup-only }}
|
||||
USE_DELTAS: ${{ inputs.use-deltas }}
|
||||
COMMIT_MSG: ${{ github.event.head_commit.message }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
echo "=========================================="
|
||||
echo "Xahau Cache Restore (S3 + OverlayFS)"
|
||||
echo "Xahau Cache Restore (S3)"
|
||||
echo "=========================================="
|
||||
echo "Target path: ${TARGET_PATH}"
|
||||
echo "Primary key: ${CACHE_KEY}"
|
||||
echo "Cache key: ${CACHE_KEY}"
|
||||
echo "S3 bucket: s3://${S3_BUCKET}"
|
||||
echo "Use deltas: ${USE_DELTAS}"
|
||||
echo ""
|
||||
|
||||
# Normalize target path (expand tilde and resolve to absolute path)
|
||||
# This ensures consistent path comparison in the mount registry
|
||||
if [[ "${TARGET_PATH}" == ~* ]]; then
|
||||
# Expand tilde manually (works even if directory doesn't exist yet)
|
||||
TARGET_PATH="${HOME}${TARGET_PATH:1}"
|
||||
fi
|
||||
TARGET_PATH=$(realpath -m "${TARGET_PATH}")
|
||||
echo "Normalized target path: ${TARGET_PATH}"
|
||||
echo ""
|
||||
|
||||
# Generate unique cache workspace
|
||||
CACHE_HASH=$(echo "${CACHE_KEY}" | md5sum | cut -d' ' -f1)
|
||||
CACHE_WORKSPACE="/tmp/xahau-cache-${CACHE_HASH}"
|
||||
|
||||
echo "Cache workspace: ${CACHE_WORKSPACE}"
|
||||
|
||||
# Check for [ci-clear-cache] tag in commit message
|
||||
if echo "${COMMIT_MSG}" | grep -q '\[ci-clear-cache\]'; then
|
||||
echo ""
|
||||
echo "🗑️ [ci-clear-cache] detected in commit message"
|
||||
echo "Clearing cache for key: ${CACHE_KEY}"
|
||||
echo ""
|
||||
|
||||
# Delete base layer
|
||||
S3_BASE_KEY="s3://${S3_BUCKET}/${CACHE_KEY}-base.tar.zst"
|
||||
if aws s3 ls "${S3_BASE_KEY}" --region "${S3_REGION}" >/dev/null 2>&1; then
|
||||
echo "Deleting base layer: ${S3_BASE_KEY}"
|
||||
aws s3 rm "${S3_BASE_KEY}" --region "${S3_REGION}" 2>/dev/null || true
|
||||
echo "✓ Base layer deleted"
|
||||
else
|
||||
echo "ℹ️ No base layer found to delete"
|
||||
fi
|
||||
|
||||
# Delete all delta layers for this key
|
||||
echo "Deleting all delta layers matching: ${CACHE_KEY}-delta-*"
|
||||
DELTA_COUNT=$(aws s3 ls "s3://${S3_BUCKET}/" --region "${S3_REGION}" | grep "${CACHE_KEY}-delta-" | wc -l || echo "0")
|
||||
DELTA_COUNT=$(echo "${DELTA_COUNT}" | tr -d ' \n') # Trim whitespace
|
||||
if [ "${DELTA_COUNT}" -gt 0 ]; then
|
||||
aws s3 rm "s3://${S3_BUCKET}/" --recursive \
|
||||
--exclude "*" \
|
||||
--include "${CACHE_KEY}-delta-*" \
|
||||
--region "${S3_REGION}" 2>/dev/null || true
|
||||
echo "✓ Deleted ${DELTA_COUNT} delta layer(s)"
|
||||
else
|
||||
echo "ℹ️ No delta layers found to delete"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "✅ Cache cleared successfully"
|
||||
echo "Build will proceed from scratch (bootstrap mode)"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Create OverlayFS directory structure
|
||||
mkdir -p "${CACHE_WORKSPACE}"/{base,upper,work,merged}
|
||||
|
||||
# Function to try downloading from S3
|
||||
# Function to try restoring a cache key
|
||||
try_restore_key() {
|
||||
local try_key="$1"
|
||||
local s3_base="s3://${S3_BUCKET}/${try_key}-base.tar.zst"
|
||||
local key=$1
|
||||
local s3_key="s3://${S3_BUCKET}/${key}-base.tar.zst"
|
||||
|
||||
echo "Trying cache key: ${try_key}"
|
||||
|
||||
# Check if base exists (one base per key, immutable)
|
||||
echo "Checking for base layer..."
|
||||
if aws s3 ls "${s3_base}" --region "${S3_REGION}" >/dev/null 2>&1; then
|
||||
echo "✓ Found base layer: ${s3_base}"
|
||||
|
||||
if [ "${LOOKUP_ONLY}" = "true" ]; then
|
||||
echo "Lookup-only mode: cache exists, skipping download"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Download base layer
|
||||
echo "Downloading base layer..."
|
||||
aws s3 cp "${s3_base}" /tmp/cache-base.tar.zst --region "${S3_REGION}" --quiet
|
||||
|
||||
# Extract base layer
|
||||
echo "Extracting base layer..."
|
||||
tar -xf /tmp/cache-base.tar.zst -C "${CACHE_WORKSPACE}/base"
|
||||
rm /tmp/cache-base.tar.zst
|
||||
|
||||
# Query for latest timestamped delta (only if use-deltas enabled)
|
||||
if [ "${USE_DELTAS}" = "true" ]; then
|
||||
echo "Querying for latest delta..."
|
||||
LATEST_DELTA=$(aws s3api list-objects-v2 \
|
||||
--bucket "${S3_BUCKET}" \
|
||||
--prefix "${try_key}-delta-" \
|
||||
--region "${S3_REGION}" \
|
||||
--query 'sort_by(Contents, &LastModified)[-1].Key' \
|
||||
--output text 2>/dev/null || echo "")
|
||||
|
||||
if [ -n "${LATEST_DELTA}" ] && [ "${LATEST_DELTA}" != "None" ]; then
|
||||
echo "✓ Found latest delta: ${LATEST_DELTA}"
|
||||
echo "Downloading delta layer..."
|
||||
aws s3 cp "s3://${S3_BUCKET}/${LATEST_DELTA}" /tmp/cache-delta.tar.zst --region "${S3_REGION}" --quiet
|
||||
|
||||
echo "Extracting delta layer..."
|
||||
tar -xf /tmp/cache-delta.tar.zst -C "${CACHE_WORKSPACE}/upper" 2>/dev/null || true
|
||||
rm /tmp/cache-delta.tar.zst
|
||||
else
|
||||
echo "ℹ No delta layer found (this is fine for first build)"
|
||||
fi
|
||||
else
|
||||
echo "ℹ Delta caching disabled (use-deltas: false)"
|
||||
fi
|
||||
echo "Checking for key: ${key}"
|
||||
|
||||
if aws s3 ls "${s3_key}" --region "${S3_REGION}" >/dev/null 2>&1; then
|
||||
echo "✓ Found cache: ${s3_key}"
|
||||
return 0
|
||||
else
|
||||
echo "✗ No base layer found for key: ${try_key}"
|
||||
echo "✗ Not found: ${key}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Try primary key first
|
||||
# Try exact match first
|
||||
MATCHED_KEY=""
|
||||
EXACT_MATCH="false"
|
||||
|
||||
@@ -208,12 +113,8 @@ runs:
|
||||
echo ""
|
||||
echo "Primary key not found, trying restore-keys..."
|
||||
|
||||
# Split restore-keys by newline
|
||||
while IFS= read -r restore_key; do
|
||||
# Skip empty lines
|
||||
[ -z "${restore_key}" ] && continue
|
||||
|
||||
# Trim whitespace
|
||||
restore_key=$(echo "${restore_key}" | xargs)
|
||||
|
||||
if try_restore_key "${restore_key}"; then
|
||||
@@ -231,7 +132,6 @@ runs:
|
||||
if [ -z "${MATCHED_KEY}" ]; then
|
||||
echo ""
|
||||
echo "❌ No cache found for key: ${CACHE_KEY}"
|
||||
echo "This is BOOTSTRAP mode - first build for this cache key"
|
||||
|
||||
if [ "${FAIL_ON_MISS}" = "true" ]; then
|
||||
echo "fail-on-cache-miss is enabled, failing workflow"
|
||||
@@ -241,16 +141,11 @@ runs:
|
||||
# Set outputs for cache miss
|
||||
echo "cache-hit=false" >> $GITHUB_OUTPUT
|
||||
echo "cache-primary-key=" >> $GITHUB_OUTPUT
|
||||
echo "cache-matched-key=" >> $GITHUB_OUTPUT
|
||||
|
||||
# Create empty cache directory for bootstrap
|
||||
# Create empty cache directory
|
||||
mkdir -p "${TARGET_PATH}"
|
||||
|
||||
# Record bootstrap mode for save action
|
||||
# Format: path:workspace:matched_key:primary_key:exact_match:use_deltas
|
||||
# For bootstrap: workspace="bootstrap", matched_key=primary_key, exact_match=false
|
||||
MOUNT_REGISTRY="/tmp/xahau-cache-mounts.txt"
|
||||
echo "${TARGET_PATH}:bootstrap:${CACHE_KEY}:${CACHE_KEY}:false:${USE_DELTAS}" >> "${MOUNT_REGISTRY}"
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Cache restore completed (bootstrap mode)"
|
||||
@@ -262,36 +157,30 @@ runs:
|
||||
# If lookup-only, we're done
|
||||
if [ "${LOOKUP_ONLY}" = "true" ]; then
|
||||
echo "cache-hit=${EXACT_MATCH}" >> $GITHUB_OUTPUT
|
||||
echo "cache-primary-key=${MATCHED_KEY}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Clean up workspace
|
||||
rm -rf "${CACHE_WORKSPACE}"
|
||||
echo "cache-primary-key=${CACHE_KEY}" >> $GITHUB_OUTPUT
|
||||
echo "cache-matched-key=${MATCHED_KEY}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Cache lookup completed (lookup-only mode)"
|
||||
echo "Cache exists: ${MATCHED_KEY}"
|
||||
echo "=========================================="
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Mount OverlayFS
|
||||
# Download and extract cache
|
||||
S3_KEY="s3://${S3_BUCKET}/${MATCHED_KEY}-base.tar.zst"
|
||||
TEMP_TARBALL=$(mktemp /tmp/xahau-cache-XXXXXX.tar.zst)
|
||||
|
||||
echo ""
|
||||
echo "Mounting OverlayFS..."
|
||||
sudo mount -t overlay overlay \
|
||||
-o lowerdir="${CACHE_WORKSPACE}/base",upperdir="${CACHE_WORKSPACE}/upper",workdir="${CACHE_WORKSPACE}/work" \
|
||||
"${CACHE_WORKSPACE}/merged"
|
||||
echo "Downloading cache..."
|
||||
aws s3 cp "${S3_KEY}" "${TEMP_TARBALL}" --region "${S3_REGION}"
|
||||
|
||||
# Verify mount
|
||||
if mount | grep -q "${CACHE_WORKSPACE}/merged"; then
|
||||
echo "✓ OverlayFS mounted successfully"
|
||||
else
|
||||
echo "❌ Failed to mount OverlayFS"
|
||||
exit 1
|
||||
fi
|
||||
TARBALL_SIZE=$(du -h "${TEMP_TARBALL}" | cut -f1)
|
||||
echo "✓ Downloaded: ${TARBALL_SIZE}"
|
||||
|
||||
# Create target directory parent if needed
|
||||
TARGET_PARENT=$(dirname "${TARGET_PATH}")
|
||||
mkdir -p "${TARGET_PARENT}"
|
||||
# Create parent directory if needed
|
||||
mkdir -p "$(dirname "${TARGET_PATH}")"
|
||||
|
||||
# Remove existing target if it exists
|
||||
if [ -e "${TARGET_PATH}" ]; then
|
||||
@@ -299,30 +188,24 @@ runs:
|
||||
rm -rf "${TARGET_PATH}"
|
||||
fi
|
||||
|
||||
# Symlink target path to merged view
|
||||
echo "Creating symlink: ${TARGET_PATH} -> ${CACHE_WORKSPACE}/merged"
|
||||
ln -s "${CACHE_WORKSPACE}/merged" "${TARGET_PATH}"
|
||||
# Create target directory and extract
|
||||
mkdir -p "${TARGET_PATH}"
|
||||
echo ""
|
||||
echo "Extracting cache..."
|
||||
zstd -d -c "${TEMP_TARBALL}" | tar -xf - -C "${TARGET_PATH}"
|
||||
echo "✓ Cache extracted to: ${TARGET_PATH}"
|
||||
|
||||
# Save mount info for cleanup/save later
|
||||
# Format: path:workspace:matched_key:primary_key:exact_match:use_deltas
|
||||
# This tells save action whether to create new base (partial match) or just delta (exact match)
|
||||
MOUNT_REGISTRY="/tmp/xahau-cache-mounts.txt"
|
||||
echo "${TARGET_PATH}:${CACHE_WORKSPACE}:${MATCHED_KEY}:${CACHE_KEY}:${EXACT_MATCH}:${USE_DELTAS}" >> "${MOUNT_REGISTRY}"
|
||||
# Cleanup
|
||||
rm -f "${TEMP_TARBALL}"
|
||||
|
||||
# Set outputs
|
||||
echo "cache-hit=${EXACT_MATCH}" >> $GITHUB_OUTPUT
|
||||
echo "cache-primary-key=${MATCHED_KEY}" >> $GITHUB_OUTPUT
|
||||
|
||||
# Show statistics
|
||||
echo ""
|
||||
echo "Cache statistics:"
|
||||
echo " Base layer size: $(du -sh ${CACHE_WORKSPACE}/base 2>/dev/null | cut -f1 || echo '0')"
|
||||
echo " Delta layer size: $(du -sh ${CACHE_WORKSPACE}/upper 2>/dev/null | cut -f1 || echo '0')"
|
||||
echo " Merged view size: $(du -sh ${CACHE_WORKSPACE}/merged 2>/dev/null | cut -f1 || echo '0')"
|
||||
echo "cache-primary-key=${CACHE_KEY}" >> $GITHUB_OUTPUT
|
||||
echo "cache-matched-key=${MATCHED_KEY}" >> $GITHUB_OUTPUT
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "Cache restore completed successfully"
|
||||
echo "Exact match: ${EXACT_MATCH}"
|
||||
echo "Cache hit: ${EXACT_MATCH}"
|
||||
echo "Matched key: ${MATCHED_KEY}"
|
||||
echo "=========================================="
|
||||
|
||||
Reference in New Issue
Block a user