#!/usr/bin/env bash # Build all processed graphs needed for H1 and H2 genome-wide trainings. # Runs only the build_graph + compartment steps — no model training. # Designed to be run on the data-rich box (e.g. Contabo) so only the small # .pt files need to be transferred to the GPU box. # # Usage: bash scripts/build_genome_graphs.sh [chr2 chr5 ...] # (no args = chr1..chr22) set -euo pipefail REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" export PYTHONPATH="$REPO:${PYTHONPATH:-}" # Auto-activate conda env if ! python -c "import torch_geometric" 2>/dev/null; then eval "$(conda shell.bash hook)" conda activate chromatin_gnn fi RES="${RES:-25000}" DATA="$REPO/data" COMPARTMENTS_DIR="$REPO/results/h1_representation/compartments" mkdir -p "$COMPARTMENTS_DIR" if [ $# -gt 0 ]; then CHROMS=("$@") else CHROMS=() for i in $(seq 1 22); do CHROMS+=("chr$i"); done fi build_one() { local CELL_UPPER="$1" CELL_LOWER="$2" CHROM="$3" OUT="$4" [ -f "$OUT" ] && { echo " [$CELL_LOWER $CHROM] graph exists, skip"; return; } echo " [$CELL_LOWER $CHROM] building graph..." python -m chromatin_gnn.build_graph \ --mcool "$DATA/raw/${CELL_UPPER}.mcool" \ --chrom "$CHROM" --res "$RES" \ --bigwigs \ "$DATA/raw/${CELL_UPPER}_CTCF.bw" \ "$DATA/raw/${CELL_UPPER}_H3K27me3.bw" \ "$DATA/raw/${CELL_UPPER}_H3K4me3.bw" \ --out "$OUT" } build_compartments() { local CELL_UPPER="$1" CELL_LOWER="$2" CHROM="$3" OUT="$4" [ -f "$OUT" ] && { echo " [$CELL_LOWER $CHROM] compartments exist, skip"; return; } echo " [$CELL_LOWER $CHROM] computing compartments..." python "$REPO/experiments/h1_representation/compute_compartments.py" \ --mcool "$DATA/raw/${CELL_UPPER}.mcool" \ --chrom "$CHROM" --res "$RES" \ --bigwig_orient "$DATA/raw/${CELL_UPPER}_CTCF.bw" \ --out "$OUT" } build_hct116() { local COND="$1" CHROM="$2" local OUT="$DATA/processed/hct116/${COND}_${CHROM}.pt" [ -f "$OUT" ] && { echo " [hct116 $COND $CHROM] graph exists, skip"; return; } echo " [hct116 $COND $CHROM] building graph..." python -m chromatin_gnn.build_graph \ --mcool "$DATA/raw/HCT116_${COND}.mcool" \ --chrom "$CHROM" --res "$RES" \ --bigwigs \ "$DATA/raw/HCT116_CTCF.bw" \ "$DATA/raw/HCT116_H3K27me3.bw" \ "$DATA/raw/HCT116_H3K27ac.bw" \ "$DATA/raw/HCT116_H3K4me3.bw" \ --out "$OUT" } START=$(date +%s) for CHROM in "${CHROMS[@]}"; do echo "" echo "=== $CHROM ===" mkdir -p "$DATA/processed/gm12878" "$DATA/processed/imr90" "$DATA/processed/hct116" # H1 — GM12878 + IMR90 build_one GM12878 gm12878 "$CHROM" "$DATA/processed/gm12878/${CHROM}.pt" build_one IMR90 imr90 "$CHROM" "$DATA/processed/imr90/${CHROM}.pt" build_compartments GM12878 gm12878 "$CHROM" "$COMPARTMENTS_DIR/gm12878_${CHROM}.csv" build_compartments IMR90 imr90 "$CHROM" "$COMPARTMENTS_DIR/imr90_${CHROM}.csv" # H2 — HCT-116 control + treated build_hct116 control "$CHROM" build_hct116 treated_6h "$CHROM" done ELAPSED=$(( $(date +%s) - START )) echo "" echo "=== Done in ${ELAPSED}s. Processed graphs in $DATA/processed/, compartments in $COMPARTMENTS_DIR ===" echo "Now rsync data/processed/ and results/h1_representation/compartments/ to Vast."