#!/usr/bin/env bash # H3: Long-range topology — graph ablation experiment # # Tests whether long-range edges (> 1 Mb) encode non-trivial topological # structure beyond local contact density. # # Trains 3 VGAE variants on the same GM12878 chr1 graph: # full — all edges up to 5 Mb (reused from H1; no retraining) # local — only edges < 250 kb (within-TAD scale) # longrange — only edges > 1 Mb (sub-compartment scale) # # Usage: # bash experiments/h3_longrange/run.sh set -euo pipefail REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" export PYTHONPATH="$REPO:${PYTHONPATH:-}" # Auto-activate conda env if its packages are not on the current Python if ! python -c "import torch_geometric" 2>/dev/null; then echo "Activating conda env chromatin_gnn..." eval "$(conda shell.bash hook)" conda activate chromatin_gnn fi CHROM="${CHROM:-chr1}" RES="${RES:-25000}" SEED="${SEED:-42}" EPOCHS="${EPOCHS:-300}" PATIENCE="${PATIENCE:-50}" HIDDEN="${HIDDEN:-256}" # match H1 full-model hyperparameters LATENT="${LATENT:-64}" DEVICE="${DEVICE:-auto}" DATA="$REPO/data" RESULTS_BASE="$REPO/results/h3_longrange" RESULTS="$RESULTS_BASE/${CHROM}" # per-chromosome outputs H1_BASE="$REPO/results/h1_representation" H1_CHR_DIR="$H1_BASE/${CHROM}" # per-chrom H1 outputs COMPARTMENTS_DIR="$H1_BASE/compartments" # shared, $CHROM in filename EXP="$REPO/experiments/h3_longrange" TRAIN="$REPO/experiments/h1_representation/train.py" mkdir -p "$RESULTS" # ── Step 1: Build ablation graphs ───────────────────────────────────────────── LOCAL_GRAPH="$DATA/processed/gm12878/${CHROM}_local.pt" LONGRANGE_GRAPH="$DATA/processed/gm12878/${CHROM}_longrange.pt" if [ -f "$LOCAL_GRAPH" ] && [ -f "$LONGRANGE_GRAPH" ]; then echo "Ablation graphs already exist, skipping build." else echo "=== Step 1: Building ablation graphs ===" python "$EXP/build_ablation_graphs.py" \ --graph "$DATA/processed/gm12878/${CHROM}.pt" \ --res "$RES" \ --short_cutoff 250000 \ --long_cutoff 1000000 \ --out_local "$LOCAL_GRAPH" \ --out_longrange "$LONGRANGE_GRAPH" fi # ── Step 2a: Train real-features variants (full is reused from H1) ─────────── for VARIANT in local longrange; do OUT="$RESULTS/${VARIANT}_only" if [ -f "$OUT/model.pt" ]; then echo "[$VARIANT real-features] Already trained, skipping" else echo "=== Step 2a: Training $VARIANT-only (real features) ===" python "$TRAIN" \ --graph "$DATA/processed/gm12878/${CHROM}_${VARIANT}.pt" \ --encoder deep_gcn \ --hidden "$HIDDEN" --latent "$LATENT" \ --epochs "$EPOCHS" --patience "$PATIENCE" \ --lr 3e-4 --dropout 0.3 --beta 0.5 --kl_anneal 100 \ --seed "$SEED" \ --device "$DEVICE" \ --outdir "$OUT" fi done # ── Step 2b: Constant-features cross-ablation ──────────────────────────────── # Trains the bottom row of the 2×3 (feature × edge) grid: constant ones features # on the full graph and on each edge-band subset. With features removed, the # encoder must rely on graph topology alone — so any compartment signal in the # resulting embeddings reflects what each edge subset carries topologically. for CELL in full local longrange; do OUT="$RESULTS/${CELL}_const" if [ "$CELL" = "full" ]; then GRAPH="$DATA/processed/gm12878/${CHROM}.pt" else GRAPH="$DATA/processed/gm12878/${CHROM}_${CELL}.pt" fi if [ -f "$OUT/model.pt" ]; then echo "[$CELL const-features] Already trained, skipping" else echo "=== Step 2b: Training $CELL (constant features) ===" python "$TRAIN" \ --graph "$GRAPH" \ --encoder deep_gcn \ --hidden "$HIDDEN" --latent "$LATENT" \ --epochs "$EPOCHS" --patience "$PATIENCE" \ --lr 3e-4 --dropout 0.3 --beta 0.5 --kl_anneal 100 \ --seed "$SEED" \ --device "$DEVICE" \ --constant_features \ --outdir "$OUT" fi done # ── Step 3a: Edge-only ablation (3-way comparison, real features) ──────────── echo "=== Step 3a: Evaluating edge-only ablation ===" python "$EXP/evaluate_ablation.py" \ --full_emb "$H1_CHR_DIR/gm12878_emb.npy" \ --full_metrics "$H1_CHR_DIR/metrics.json" \ --local_dir "$RESULTS/local_only" \ --longrange_dir "$RESULTS/longrange_only" \ --compartments "$COMPARTMENTS_DIR/gm12878_${CHROM}.csv" \ --out "$RESULTS/ablation_comparison.json" # ── Step 3b: Feature × edge cross-ablation (2×3 grid) ──────────────────────── echo "=== Step 3b: Evaluating feature × edge cross-ablation ===" python "$EXP/evaluate_cross_ablation.py" \ --full_real_dir "$H1_CHR_DIR" \ --local_real_dir "$RESULTS/local_only" \ --longrange_real_dir "$RESULTS/longrange_only" \ --full_const_dir "$RESULTS/full_const" \ --local_const_dir "$RESULTS/local_const" \ --longrange_const_dir "$RESULTS/longrange_const" \ --compartments "$COMPARTMENTS_DIR/gm12878_${CHROM}.csv" \ --out "$RESULTS/cross_ablation.json" echo "" echo "=== H3 complete ($CHROM). Results: $RESULTS ==="