94 lines
3.3 KiB
Bash
Executable File
94 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Build all processed graphs needed for H1 and H2 genome-wide trainings.
|
|
# Runs only the build_graph + compartment steps — no model training.
|
|
# Designed to be run on the data-rich box (e.g. Contabo) so only the small
|
|
# .pt files need to be transferred to the GPU box.
|
|
#
|
|
# Usage: bash scripts/build_genome_graphs.sh [chr2 chr5 ...]
|
|
# (no args = chr1..chr22)
|
|
set -euo pipefail
|
|
|
|
REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
export PYTHONPATH="$REPO:${PYTHONPATH:-}"
|
|
|
|
# Auto-activate conda env
|
|
if ! python -c "import torch_geometric" 2>/dev/null; then
|
|
eval "$(conda shell.bash hook)"
|
|
conda activate chromatin_gnn
|
|
fi
|
|
|
|
RES="${RES:-25000}"
|
|
DATA="$REPO/data"
|
|
COMPARTMENTS_DIR="$REPO/results/h1_representation/compartments"
|
|
mkdir -p "$COMPARTMENTS_DIR"
|
|
|
|
if [ $# -gt 0 ]; then
|
|
CHROMS=("$@")
|
|
else
|
|
CHROMS=()
|
|
for i in $(seq 1 22); do CHROMS+=("chr$i"); done
|
|
fi
|
|
|
|
build_one() {
|
|
local CELL_UPPER="$1" CELL_LOWER="$2" CHROM="$3" OUT="$4"
|
|
[ -f "$OUT" ] && { echo " [$CELL_LOWER $CHROM] graph exists, skip"; return; }
|
|
echo " [$CELL_LOWER $CHROM] building graph..."
|
|
python -m chromatin_gnn.build_graph \
|
|
--mcool "$DATA/raw/${CELL_UPPER}.mcool" \
|
|
--chrom "$CHROM" --res "$RES" \
|
|
--bigwigs \
|
|
"$DATA/raw/${CELL_UPPER}_CTCF.bw" \
|
|
"$DATA/raw/${CELL_UPPER}_H3K27me3.bw" \
|
|
"$DATA/raw/${CELL_UPPER}_H3K4me3.bw" \
|
|
--out "$OUT"
|
|
}
|
|
|
|
build_compartments() {
|
|
local CELL_UPPER="$1" CELL_LOWER="$2" CHROM="$3" OUT="$4"
|
|
[ -f "$OUT" ] && { echo " [$CELL_LOWER $CHROM] compartments exist, skip"; return; }
|
|
echo " [$CELL_LOWER $CHROM] computing compartments..."
|
|
python "$REPO/experiments/h1_representation/compute_compartments.py" \
|
|
--mcool "$DATA/raw/${CELL_UPPER}.mcool" \
|
|
--chrom "$CHROM" --res "$RES" \
|
|
--bigwig_orient "$DATA/raw/${CELL_UPPER}_CTCF.bw" \
|
|
--out "$OUT"
|
|
}
|
|
|
|
build_hct116() {
|
|
local COND="$1" CHROM="$2"
|
|
local OUT="$DATA/processed/hct116/${COND}_${CHROM}.pt"
|
|
[ -f "$OUT" ] && { echo " [hct116 $COND $CHROM] graph exists, skip"; return; }
|
|
echo " [hct116 $COND $CHROM] building graph..."
|
|
python -m chromatin_gnn.build_graph \
|
|
--mcool "$DATA/raw/HCT116_${COND}.mcool" \
|
|
--chrom "$CHROM" --res "$RES" \
|
|
--bigwigs \
|
|
"$DATA/raw/HCT116_CTCF.bw" \
|
|
"$DATA/raw/HCT116_H3K27me3.bw" \
|
|
"$DATA/raw/HCT116_H3K27ac.bw" \
|
|
"$DATA/raw/HCT116_H3K4me3.bw" \
|
|
--out "$OUT"
|
|
}
|
|
|
|
START=$(date +%s)
|
|
for CHROM in "${CHROMS[@]}"; do
|
|
echo ""
|
|
echo "=== $CHROM ==="
|
|
mkdir -p "$DATA/processed/gm12878" "$DATA/processed/imr90" "$DATA/processed/hct116"
|
|
|
|
# H1 — GM12878 + IMR90
|
|
build_one GM12878 gm12878 "$CHROM" "$DATA/processed/gm12878/${CHROM}.pt"
|
|
build_one IMR90 imr90 "$CHROM" "$DATA/processed/imr90/${CHROM}.pt"
|
|
build_compartments GM12878 gm12878 "$CHROM" "$COMPARTMENTS_DIR/gm12878_${CHROM}.csv"
|
|
build_compartments IMR90 imr90 "$CHROM" "$COMPARTMENTS_DIR/imr90_${CHROM}.csv"
|
|
|
|
# H2 — HCT-116 control + treated
|
|
build_hct116 control "$CHROM"
|
|
build_hct116 treated_6h "$CHROM"
|
|
done
|
|
|
|
ELAPSED=$(( $(date +%s) - START ))
|
|
echo ""
|
|
echo "=== Done in ${ELAPSED}s. Processed graphs in $DATA/processed/, compartments in $COMPARTMENTS_DIR ==="
|
|
echo "Now rsync data/processed/ and results/h1_representation/compartments/ to Vast."
|