Files
chromatin-vgae-hic/experiments/h2_rewiring/run.sh

102 lines
3.8 KiB
Bash

#!/usr/bin/env bash
# H2: Dynamic rewiring — RAD21 depletion perturbation analysis (HCT-116)
# Usage: bash experiments/h2_rewiring/run.sh
set -euo pipefail
REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
export PYTHONPATH="$REPO:${PYTHONPATH:-}"
# Auto-activate conda env if its packages are not on the current Python
if ! python -c "import torch_geometric" 2>/dev/null; then
echo "Activating conda env chromatin_gnn..."
eval "$(conda shell.bash hook)"
conda activate chromatin_gnn
fi
CHROM="${CHROM:-chr1}"
RES="${RES:-25000}"
EPOCHS="${EPOCHS:-300}"
PATIENCE="${PATIENCE:-50}"
SEED="${SEED:-42}"
DATA="$REPO/data"
RESULTS_BASE="$REPO/results/h2_rewiring"
RESULTS="$RESULTS_BASE/${CHROM}" # per-chromosome outputs
EXP="$REPO/experiments/h2_rewiring"
mkdir -p "$RESULTS/figures"
# ── Step 1: Build HCT-116 graphs ─────────────────────────────────────────────
for COND in control treated_6h; do
OUT="$DATA/processed/hct116/${COND}_${CHROM}.pt"
MCOOL="$DATA/raw/HCT116_${COND}.mcool"
if [ -f "$OUT" ]; then
echo "[$COND] Graph already exists, skipping"
else
python -m chromatin_gnn.build_graph \
--mcool "$MCOOL" \
--chrom "$CHROM" --res "$RES" \
--bigwigs \
"$DATA/raw/HCT116_CTCF.bw" \
"$DATA/raw/HCT116_H3K27me3.bw" \
"$DATA/raw/HCT116_H3K27ac.bw" \
"$DATA/raw/HCT116_H3K4me3.bw" \
--out "$OUT"
fi
done
# ── Step 2: Call loops on control (CTCF proxy fallback auto-triggers) ─────────
LOOPS="$DATA/processed/hct116/loops_${CHROM}_10kb.bedpe"
if [ ! -f "$LOOPS" ]; then
python "$EXP/call_loops.py" \
--mcool "$DATA/raw/HCT116_control.mcool" \
--chrom "$CHROM" --res 10000 \
--out "$LOOPS"
fi
# ── Step 3: Train VGAE on HCT-116 control ────────────────────────────────────
if [ -f "$RESULTS/model.pt" ]; then
echo "[HCT116 control] Model already exists, skipping training"
else
python "$REPO/experiments/h1_representation/train.py" \
--graph "$DATA/processed/hct116/control_${CHROM}.pt" \
--encoder deep_gcn \
--hidden 128 --latent 64 \
--epochs "$EPOCHS" --patience "$PATIENCE" \
--lr 3e-4 --dropout 0.3 --beta 0.5 --kl_anneal 100 \
--seed "$SEED" \
--outdir "$RESULTS"
fi
# ── Step 4: Perturbation analysis (encode + drift decomposition + perm test) ──
python "$EXP/perturbation_analysis.py" \
--control_graph "$DATA/processed/hct116/control_${CHROM}.pt" \
--treated_graph "$DATA/processed/hct116/treated_6h_${CHROM}.pt" \
--model "$RESULTS/model.pt" \
--loops "$LOOPS" \
--chrom "$CHROM" --res "$RES" \
--short_cutoff 1000000 \
--long_cutoff 2000000 \
--n_perm 1000 \
--outdir "$RESULTS"
# ── Step 5: Visualisations (skip with SKIP_FIGURES=1) ────────────────────────
if [ -z "${SKIP_FIGURES:-}" ]; then
python "$EXP/perturbation_viz.py" \
--control_emb "$RESULTS/control_emb.npy" \
--treated_emb "$RESULTS/treated_emb.npy" \
--drift_full "$RESULTS/drift_full.npy" \
--drift_short "$RESULTS/drift_short.npy" \
--drift_long "$RESULTS/drift_long.npy" \
--anchor_mask "$RESULTS/anchor_mask.npy" \
--stats "$RESULTS/drift_stats.json" \
--outdir "$RESULTS/figures" \
--res "$RES"
else
echo "[SKIP_FIGURES=1] Skipping visualization step."
fi
echo ""
echo "=== H2 complete ($CHROM). Results: $RESULTS ==="
cat "$RESULTS/drift_stats.json"