Files
kg-scr/notebooks/02_functions_legacy.ipynb

3655 lines
112 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e58ed372",
"metadata": {},
"outputs": [],
"source": [
"import platform\n",
"from pathlib import Path\n",
"import pytesseract\n",
"from PIL import Image\n",
"from txtai.embeddings import Embeddings"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "5f2d75d9",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'kg_scr'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkg_scr\u001b[39;00m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'kg_scr'"
]
}
],
"source": [
"import kg_scr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "10a7eff9",
"metadata": {},
"outputs": [],
"source": [
"# default paths, sys agnostic\n",
"def_paths = {\n",
" \"Darwin\": Path.home() / \"Desktop\",\n",
" \"Windows\": Path.home() / \"Pictures\" / \"Screenshots\",\n",
" \"Linux\": Path.home() / \"Pictures\",\n",
"}\n",
"\n",
"# os spec. default filenames when it comes to screen shots\n",
"sc_pathpatterns = {\n",
" \"Darwin\": [\"SCR*.png\", \"Screenshot*.png\"],\n",
" \"Windows\": [\"Screenshot*.png\"],\n",
" \"Linux\": [\"Screenshot*.png\", \"scrot*.png\", \"screenshot*.png\"],\n",
"}\n",
"\n",
"# functions\n",
"\n",
"def get_screenshots(path: str | Path | None = None) -> list[str]:\n",
" \"\"\"Find screenshot files for the current OS\"\"\"\n",
" if path is None:\n",
" path = def_paths.get(platform.system(), Path.home())\n",
" path = Path(path)\n",
" patterns = sc_pathpatterns.get(platform.system(), [\"SCR*.png\"])\n",
" results = []\n",
" for pattern in patterns:\n",
" results.extend(str(f.absolute()) for f in path.glob(pattern))\n",
" return sorted(set(results))\n",
"\n",
"def extract_text(images: list[str]) -> list[str]:\n",
" \"\"\"OCR a list of image paths into text\"\"\"\n",
" return [pytesseract.image_to_string(Image.open(img)) for img in images]\n",
"\n",
"def create_and_index(data: list[str], model=\"sentence-transformers/all-MiniLM-L6-v2\") -> Embeddings:\n",
" \"\"\"Create and index embeddings from text\"\"\"\n",
" embeddings = Embeddings({\n",
" \"path\": model,\n",
" \"content\": True,\n",
" # \"graph\": True,\n",
" \"hybrid\": True,\n",
" \"scoring\": \"bm25\",\n",
" })\n",
" embeddings.index(data)\n",
" return embeddings\n",
"\n",
"def query_embedding(embeddings: Embeddings, query: str, limit: int = 100) -> list[str]:\n",
" \"\"\"Search embeddings and return matching texts\"\"\"\n",
" results = embeddings.search(query, limit)\n",
" return [r[\"text\"] for r in results]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e73d6386",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "da81a1463d0f4d5694edbfd412e52763",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading weights: 0%| | 0/103 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[1mBertModel LOAD REPORT\u001b[0m from: sentence-transformers/all-MiniLM-L6-v2\n",
"Key | Status | | \n",
"------------------------+------------+--+-\n",
"embeddings.position_ids | UNEXPECTED | | \n",
"\n",
"\u001b[3mNotes:\n",
"- UNEXPECTED\u001b[3m\t:can be ignored when loading from different task/architecture; not ok if you expect identical arch.\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"SPRINGER NATURE Link\n",
"\n",
"Find ajournal Publishwithus Track your research Q Search\n",
"\n",
"Home > Genome Biology > Article\n",
"\n",
"HiC-Pro: an optimized and flexible pipeline\n",
"for Hi-C data processing\n",
"\n",
"Software | Openaccess | Published: 01 December 2015\n",
"Volume 16, articlenumber 259, (2015) Cite this article\n",
"\n",
"Download PDF @ You have full access to this open access article\n",
"\n",
"Nicolas Servant 4, Nelle Varoquaux, Bryan R. Lajoie, Eric Viara, Chong-Jian Chen, Jean-Philippe Vert,\n",
"Edith Heard, Job Dekker & Emmanuel Barillot\n",
"\n",
"S) 65k Accesses f) 1404 Citations & 19 Altmetric & 3 Mentions Exploreall metrics >\n",
"\n",
"Abstract\n",
"\n",
"\n",
"o-Yof = AinTiating: H1iv—-rro-master/scripts/srce/cutsite_trimming.cpp\n",
"5.037 creating: HiC-Pro-master/test-op/\n",
"\n",
"5.038 inflating: HiC-Pro-master/test-op/config_test_as.txt\n",
"\n",
"5.038 inflating: HiC-Pro-master/test-op/config_test_cap.txt\n",
"\n",
"5.038 inflating: HiC-Pro-master/test-op/config_test_dnase.txt\n",
"\n",
"5.038 inflating: HiC-Pro-master/test-op/config_test_latest.txt\n",
"5.038 inflating: HiC-Pro-master/test-op/run-test-op.sh\n",
"\n",
"5.038 finishing deferred symbolic links:\n",
"\n",
"5.038 HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
"5.095 Make sure internet connection works for your shell prompt under current user's privilege ...\n",
"5.096 Starting HiC-Pro installation !\n",
"\n",
"5.122 Exit - Error : Configuration file not found\n",
"\n",
"41 # Install HiC-Pro\n",
"\n",
"42 | >>> RUN cd /opt && \\\n",
"\n",
"43 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
"\n",
"44 | >>> unzip hicpro_latest.zip && \\\n",
"\n",
"45 | >>> cd HiC-Pro-master/scripts/install && \\\n",
"\n",
"46 | >>> bash install_dependencies.sh -c config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.@ -q && \\\n",
"47 | >>> cd /opt/HiC-Pro-master && \\\n",
"\n",
"48 | >>> make install && \\\n",
"\n",
"49 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
"\n",
"5@ | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"corrected Hi-C counts\n",
"\n",
"10!\n",
"\n",
"10°\n",
"\n",
"107?\n",
"\n",
"10°\n",
"genomic distance\n",
"\n",
"—— data_mcool.h5\n",
"\n",
"> Decay curve\n",
"\n",
"> First converted into .h5\n",
"format\n",
"\n",
"> HiCExplorer—-\n",
"\n",
"hicPlotDistVsCounts()\n",
"> Data quality and\n",
"\n",
"comparison\n",
"\n",
"4.524 HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
"\n",
"4.575 Make sure internet connection works for your shell prompt under current user's privilege ...\n",
"\n",
"4.575 Starting HiC-Pro installation !\n",
"\n",
"4.976 Checking dependencies\n",
"\n",
"4.976 - Python libraries ...0K\n",
"\n",
"6.765 — R installation ...0K\n",
"\n",
"9.515 - Bowtie2 installation ...0K\n",
"\n",
"9.531 - Samtools installation ...0K\n",
"\n",
"9.590\n",
"\n",
"9.598 Checking HiC-Pro configuration\n",
"\n",
"9.758 - Configuration for TORQUE/PBS system ...0K\n",
"\n",
"9.758\n",
"\n",
"9.758 done !\n",
"\n",
"9.844 (g++ -Wall -02 -std=c++@x -o build_matrix /opt/HiC-Pro-master/scripts/src/build_matrix.cpp; mv build_matrix /opt/HiC-Pro-master/scripts)\n",
"16.47 (g++ -Wall -02 -std=c++@x -o cutsite_trimming /opt/HiC-Pro-master/scripts/src/cutsite_trimming.cpp; mv cutsite_trimming /opt/HiC-Pro-master/scripts)\n",
"19.24 realpath: /opt/hicpro/HiC-Pro_3.1.@: No such file or directory\n",
"\n",
"19.25 cp -Ri /opt/HiC-Pro-master /opt/hicpro/HiC-Pro_3.1.0\n",
"\n",
"19.26 cp: cannot create directory '/opt/hicpro/HiC-Pro_3.1.@': No such file or directory\n",
"\n",
"19.27 make: *** [Makefile:78: cp] Error 1\n",
"\n",
"Dockerfile:42\n",
"\n",
"# Install HiC-Pro\n",
"\n",
"41 |\n",
"42 | >>> RUN cd /opt && \\\n",
"43 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
"44 | >>> unzip hicpro_latest.zip && \\\n",
"45 | >>> cd HiC-Pro-master && \\\n",
"46 | >>> bash scripts/install/install_dependencies.sh -c config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.0 -q && \\\n",
"47 | >>> make install && \\\n",
"48 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
"49 | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
"|\n",
"\n",
"5@\n",
"\n",
"\n",
"Visualization: HiGlass\n",
"\n",
"HICCUPs juicer_tools:\n",
"\n",
"bedpe file\n",
"\n",
"¥\n",
"\n",
"Enrichmnet: Juicer\n",
"APA,\n",
"TADs: Arrowhead\n",
"\n",
"Juicer\n",
"\n",
"v\n",
"\n",
"Visualization: JuiceBox\n",
"Analysis: HiC Straw\n",
"\n",
"Trimmomatic, FostQC\n",
"\n",
"HIC-Pro\n",
"(Current)\n",
"\n",
"validpairs file\n",
"\n",
"¥\n",
"\n",
"Analysis: Cooler\n",
"library python\n",
"\n",
">\n",
"\n",
"FitHiC2 loop caller\n",
"\n",
"Enrichment:\n",
"coolpup.py\n",
"\n",
"HiC - Pro Juicer\n",
"\n",
"Parailel Computing Hi-C Fragment\n",
"A Sequenced Alignment and Duplicate Map creation\n",
"Hi-C Reads Chimera Handling Merge Sort removal\n",
"a on\n",
"==\" a —— RI R2\n",
"Sequencing © ———— SSS EES ESS\n",
"Ey SSS SSS . > .\n",
"\n",
"ae a ee : -.\n",
"\n",
"\n",
"Visualization: HiGlass,\n",
"JuicaBox\n",
"\n",
"HICCUPS juicer_tools:\n",
"\n",
"-bedpe file\n",
"\n",
"Enrichmnet Juicer\n",
"\n",
"APA,\n",
"TADS: Arrowhead\n",
"\n",
"Juicer\n",
"\n",
"Timmomatic, FastQC\n",
"\n",
"Hic-Pro,\n",
"\n",
"tbedpe ~—————>_GenomicLinks\n",
"\n",
"Visualization: JuiceBox\n",
"Anolysis: Hic Straw\n",
"\n",
"Juicer\n",
"dump\n",
"\n",
"Hic-Pro -\n",
"build_matrix\n",
"\n",
"Individual Matrices <——\n",
"\n",
"Analysis: Cooler\n",
"liorary python,\n",
"\n",
"> FitHiC2 loop caller\n",
"\n",
"Enrichment:\n",
"coolpup.ey\n",
"\n",
"Visualization: HiGloss\n",
"\n",
"Visualization: HiGlass\n",
"\n",
"HICCUPS juicer_tools:\n",
"\n",
"bbedpe file\n",
"\n",
"Enrichmnet: Juicer\n",
"APA,\n",
"TADs: Arrowhead\n",
"\n",
"Juicer\n",
"\n",
"Visualization: JuiceBox\n",
"Analysis: HIC Straw\n",
"\n",
"Tiimmomatic, FastQc\n",
"\n",
"Hic-Pro\n",
"(Current)\n",
"\n",
"validpairs file\n",
"\n",
"Analysis: Cooler\n",
"library python\n",
"\n",
">\n",
"\n",
"FithiC2 loop caller\n",
"\n",
"Enrichment:\n",
"coolpup.py\n",
"\n",
"corrected Hi-C counts\n",
"\n",
"10!\n",
"\n",
"10°\n",
"\n",
"107}\n",
"\n",
"104\n",
"\n",
"10°\n",
"genomic distance\n",
"\n",
"10®\n",
"\n",
"—— data_mcool.h5\n",
"\n",
"\n",
"Aman\n",
"_——\n",
"\n",
"Parallel Computing Hi-C Fragment\n",
"\n",
"——\n",
"\n",
"—_—_—\n",
"—\n",
"—4 Sequencing ——\n",
"\n",
"Singleton\n",
"Low MAPQ\n",
"\n",
"Dumped Pairs\n",
"\n",
"\n",
"706883\n",
"706884\n",
"706886\n",
"706885\n",
"706887\n",
"706888\n",
"706890\n",
"706891\n",
"706892\n",
"706889\n",
"706875\n",
"706873\n",
"706876\n",
"706874\n",
"1\n",
"\n",
"1321\n",
"\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"root\n",
"\n",
"messagebu\n",
"\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"20\n",
"\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"2866M\n",
"23440\n",
"23448\n",
"19992\n",
"19992\n",
"\n",
"164M\n",
"\n",
"9456\n",
"\n",
"2391M\n",
"2393M\n",
"2391M\n",
"2393M\n",
"2393M\n",
"2391M\n",
"2391M\n",
"2393M\n",
"2391M\n",
"2393M\n",
"7344\n",
"7344\n",
"2252\n",
"2136\n",
"11788\n",
"3364\n",
"\n",
"2412\n",
"2324\n",
"2412\n",
"2324\n",
"2324\n",
"2412\n",
"2412\n",
"2324\n",
"2412\n",
"2324\n",
"2372\n",
"2372\n",
"1720\n",
"1604\n",
"6216\n",
"1908\n",
"\n",
"NANNNNHNDDDDDDDDANN\n",
"\n",
"400.\n",
"400.\n",
"10@.\n",
"10@.\n",
"10@.\n",
"10@.\n",
"10@.\n",
"10@.\n",
"10@.\n",
"\n",
"DOAAD\n",
"\n",
"PLCTCTDPSORPRRPRRPRREBRBR\n",
"POSCTCT®VVDDVD0D0090 0\n",
"\n",
"VPVTVTAAAD\n",
"\n",
"45h22:\n",
"45h22:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"11h20:\n",
"Q4.\n",
"13.\n",
"40.\n",
"44.\n",
":1@.\n",
"18.\n",
"\n",
"51:\n",
"50:\n",
"39:\n",
"39:\n",
"\n",
"10:\n",
"\n",
"46\n",
"32\n",
"32\n",
"32\n",
"32\n",
"32\n",
"31\n",
"31\n",
"32\n",
"81\n",
"12\n",
"59\n",
"96\n",
"44\n",
"96\n",
"\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"/usr/\n",
"\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"oca\n",
"\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.\n",
"perl /usr/local/anaconda/envs/HiC-Pro_\n",
"\n",
"VPV®VVVVVVOVO\n",
"\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"-0/\n",
"\n",
"v3.\n",
"\n",
"perl /usr/local/anaconda/envs/HiC-Pro_v3.\n",
"/anaconda/envs/HiC-Pro_v3.0.0/\n",
"/anaconda/envs/HiC-Pro_v3.0.0/\n",
"/lib/systemd/systemd --system --deserialize 33\n",
"@dbus—daemon --system —-address=systemd:\n",
"\n",
"/usr/\n",
"/usr/\n",
"\n",
"oca\n",
"oca\n",
"\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"bin/bowtie2-align-s --wrapper\n",
"\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"basic-®\n",
"\n",
"--very-sensitive\n",
"—-very-sensitive\n",
"--very-sensitive\n",
"--very-sensitive\n",
"--very-sensitive\n",
"--very-sensitive\n",
"--very-sensitive\n",
"--very-sensitive\n",
"—-very-sensitive\n",
"—-very-sensitive\n",
"\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"30\n",
"\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"--score-min\n",
"—-score-min\n",
"\n",
"DAARBAAADH\n",
"NNNNNNNNN\n",
"\n",
"Pere rere\n",
"SoooKoKOOOO\n",
"\n",
"--end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"—-end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"--end-to-end\n",
"\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"—-reo\n",
"\n",
"@.0/bin/bowtie2 --very-sensitive -L 3@ --score-min L,-@.6,-@.2 --end-to- end --reorder --un bowtie_resu\n",
"@.0/bin/bowtie2 --very-sensitive -L 3@ --score-min L,-@.6,-@.2 --end-to-end --reorder --un bowtie_resu\n",
"\n",
"bin/samtools view -F 4 -bS —\n",
"bin/samtools view -F 4 -bS —\n",
"\n",
"--nofork --nopidfile --systemd-activation --syslog-only\n",
"\n",
"Fig x: Visualization in Juicebox for two HiC datasets\n",
"\n",
"The 10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files\n",
"locally. The left panel shows the matrix from the cis-regulatory elements in Maize study and the one\n",
"on the right is from (7). Even though the raw hic sequencing data was trimmed correctly the second\n",
"dataset showed poor quality as is evident from the figure. The noise was high and HiCCUPs couldn't\n",
"find loops correctly.\n",
"\n",
"-hic & .cooV.mcool:; Binary formats for Hi-C data\n",
"> Compressed contact matrices at multiple resolutions\n",
"Genomic intervals for binned data\n",
"\n",
">\n",
"> Interaction frequencies between loci\n",
"> Supports multiple bin sizes & corrections in one file\n",
"\n",
"\n",
"GQAAAGP RPP PPP PPP PRP PPP PPP PP RS\n",
"\n",
"«/16\n",
"-717\n",
"-717\n",
"-717\n",
"-718\n",
"-718\n",
"-718\n",
"-718\n",
"-718\n",
"-719\n",
"-719\n",
"-719\n",
"-719\n",
"-720\n",
"-720\n",
"-720\n",
"-720\n",
"+721\n",
"- 786\n",
"-814\n",
"+917\n",
"-969\n",
"-969\n",
"-340\n",
"-341\n",
"-342\n",
"-343\n",
"-346\n",
"\n",
"inflating: hiC-Pro-master/scripts/onlarget.py\n",
"\n",
"inflating: HiC-Pro-master/scripts/plot_hic_contacts.R\n",
"\n",
"inflating: HiC-Pro-master/scripts/plot_hic_fragment.R\n",
"\n",
"inflating: HiC-Pro-master/scripts/plot_mapping_portion.R\n",
"\n",
"inflating: HiC-Pro-master/scripts/plot_pairing_portion.R\n",
"\n",
"inflating: HiC-Pro-master/scripts/split_valid_interactions.py\n",
"\n",
"creating: HiC-Pro-master/scripts/src/\n",
"\n",
"extracting: HiC-Pro-master/scripts/src/README\n",
"\n",
"inflating: HiC-Pro-master/scripts/src/build_matrix.cpp\n",
"\n",
"inflating: HiC-Pro-master/scripts/src/cutsite_trimming.cpp\n",
"\n",
"creating: HiC-Pro-master/test-op/\n",
"\n",
"inflating: HiC-Pro-master/test-op/config_test_as.txt\n",
"\n",
"inflating: HiC-Pro-master/test-op/config_test_cap.txt\n",
"\n",
"inflating: HiC-Pro-master/test-op/config_test_dnase.txt\n",
"\n",
"inflating: HiC-Pro-master/test-op/config_test_latest.txt\n",
"\n",
"inflating: HiC-Pro-master/test-op/run-test-op.sh\n",
"finishing deferred symbolic links:\n",
"\n",
"HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
"make -f ./scripts/install/Makefile CONFIG_SYS=./config-install.txt prefix=/opt/hicpro\n",
"make[1]: Entering directory '/opt/HiC-Pro-master'\n",
"./scripts/install/install_dependencies.sh -c ./config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.@ -q\n",
"Make sure internet connection works for your shell prompt under current user's privilege ...\n",
"Starting HiC-Pro installation !\n",
"Directory /opt/hicpro does not exist!\n",
"Exit - Error - unable to install/check dependancies !\n",
"make[1]: **x* [scripts/install/Makefile:41: configure] Error 1\n",
"make[1]: Leaving directory '/opt/HiC-Pro-master'\n",
"make: **x* [Makefile:38: configure] Error 2\n",
"\n",
"40 | # Install HiC-Pro\n",
"\n",
"41 | >>> RUN cd /opt && \\\n",
"\n",
"42 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
"\n",
"43 | >>> unzip hicpro_latest.zip && \\\n",
"\n",
"44 | >>> cd HiC-Pro-master && \\\n",
"\n",
"45 | >>> make configure prefix=/opt/hicpro && \\\n",
"\n",
"46 | >>> make install && \\\n",
"\n",
"47 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
"\n",
"48 | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
"\n",
"49 |\n",
"ERROR: failed to solve: process \"/bin/sh -c cd /opt && wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && unzip hicpro_latest.zip && cd HiC-Pro-master && make\n",
"configure prefix=/opt/hicpro && make install && ln -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\" did not complete successfully: exit code:\n",
"\n",
"2\n",
"\n",
"A Sequenced\n",
"Hi-C Reads\n",
"\n",
"Alignment and\n",
"Chimera Handling Merge Sort\n",
"\n",
"SS Sass = SS\n",
"—_ oo i\n",
"—\n",
"SSS oo\n",
"a\n",
"\n",
"oe OT\n",
"\n",
"Duplicate\n",
"removal\n",
"\n",
"Map creation\n",
"\n",
"i\n",
"—————\n",
"\n",
"\n",
"-hic & .cool/.mcool: Binary formats for Hi-C data\n",
"Compressed contact matrices at multiple resolutions\n",
"\n",
"Genomic intervals for binned data\n",
"Interaction frequencies between loci\n",
"Supports multiple bin sizes & corrections in one file\n",
"\n",
"\n",
"Overall Interpretation\n",
"\n",
"e The data show a good proportion of valid Hi-C contacts (17.40%), but a large number of reads\n",
"(64.87%) are excluded due to low quality (MAPQ). This could be due to sequence complexity,\n",
"\n",
"genome alignment issues, or technical problems during sequencing.\n",
"\n",
"e The balance in pair types and dominance of intra-chromosomal contacts indicate proper\n",
"\n",
"library preparation and plausible results for downstream analysis.\n",
"\n",
"e Long-range contacts provide meaningful insights into chromatin organization and can be\n",
"\n",
"used for modeling chromosomal structure.\n",
"\n",
"workflow_aman\n",
"\n",
"a i\n",
"a\n",
"ToDo\n",
"\n",
"hic hic2cool cool\n",
"\n",
"plot\n",
"\n",
"matrix (exported from\n",
"\n",
"juicerbon) Python script Plot\n",
"\n",
"\n",
"QW 6B github.com/kuikui-C/DconnLoop W © Search Startpage\n",
"\n",
"(1) README o\n",
"\n",
"pip install matplotlib\n",
"conda install hicexplorer\n",
"conda activate DconnLoop\n",
"\n",
"Usage\n",
"\n",
"The input data used can be downloaded in the supplementary materials of the paper. The input contact maps use\n",
"the cool file format, which, if needed, can be converted and normalized using the HiCExplorer's hicConvertFormat\n",
"command.\n",
"\n",
"HiC to cool\n",
"\n",
"hicConvertFormat -m ./ENCFFQ97SKJ.hic --inputFormat hic --outputFormat cool -o ./ENCFF@97SKJ.c oO\n",
"hicConvertFormat -m ./ENCFFQ97SKJ_10000.cool --inputFormat cool —-outputFormat cool -o ./ENCFF\n",
"\n",
"Generate positive and negative samples\n",
"\n",
"python PosNeg_Samp_Gen.py -p ./input/gm12878/Ra02014—GM12878-MboI-allreps—filtered.1@kb.cool — oO\n",
"\n",
"Training\n",
"\n",
"python leave_one_train.py -d ./PosNeg_samp/ -g 1,2,3 —b 256 -lr @.001 -e 3@ -w 0.0005 -c ./mod oO\n",
"\n",
"Testing\n",
"\n",
"python leave_one_test.py -d ./PosNeg_samp/ -g 1,2,3 -c ./model/ -f ./model/chri5-record_test. oO\n",
"\n",
"Score\n",
"\n",
"python score_chromosome.py -p ./input/gm12878/Ra02014—GM12878-MboI-allreps—filtered.1@kb.cool oO\n",
"\n",
"Cluster\n",
"\n",
"python cluster.py -d 5 -i ./scores/chr15.bed -r 10000 -m 0.97 -p 75 -e 10 -o ./cluster/chr15-L oO\n",
"\n",
"@ Terminal Shell Edit View Window Help SU GB O+ 8 © & WD ® F Q B® SatFeb15 12:24\n",
"\n",
"ee@ aman — aman@unicorn: ~/fihic_bias — ssh -L 9005:localhost:9005 aman@10.162.143.69 — 208x63\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 1.5 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 1 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 100000 -f @.2 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 1000000 -f @.2 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.0@.jar hiccups --cpu —-threads 16 -r 25000 -f @.2 -p 2 -i 12 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@0kb/\n",
"\n",
"-20.00.jar hiccups --cpu —-threads 16 -r 50000 -f @.2 -p 2 -i 12 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@0kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 25000 -f @.25 -p 2 -i 14 -d 25000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"-20.00.jar hiccups --cpu --threads 16 -r 50000 -f @.25 -p 2 -i 14 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_too -20.00.jar hiccups --cpu --threads 16 -r 5000,10000,25000 -f 0.30 -p 1.5 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
"java -jar ~/juicer/CPU/common/juicer_too -20.00.jar hiccups --cpu --threads 16 -r 5000, 25000 -f @.3@ -p 1 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 25000 -f @.3@ -p 1 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
"\n",
"nano /home/aman/hiccups_results/enriched_pixels_25000.bedpe\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000,20000,50000 /mnt/storage3/aman/hicpro2jui\n",
"cebox/data.allValidPairs.hic ~/hiccups_optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000, 25000,50000 /mnt/storage3/aman/hicpro2jui\n",
"cebox/data.allValidPairs.hic ~/hiccups_optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000,50000 /mnt/storage3/aman/hicpro2juicebox/\n",
"data.allValidPairs.hic ~/hiccups_optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -k KR -f .1 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hi\n",
"ccups_optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f .1 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_\n",
"optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f 2 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_o\n",
"ptimized_results/\n",
"\n",
"cd ~/hiccups_optimized_results/\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f @.2 -p 4 -i 7 -t 0.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccup\n",
"_optimized_results/\n",
"\n",
"~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups -h\n",
"\n",
"~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups\n",
"\n",
"cat ~/.bash_history | hiccups\n",
"\n",
"cat ~/.bash_history | grep hiccups\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_1@kb/\n",
"\n",
"mkdir hiccups2_1@kb\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
"\n",
"1s -lh ~/hiccups2_1@kb/\n",
"\n",
"we -l ~/hiccups2_10kb/fdr_thresholds_10000\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
"\n",
"1s ~/hiccups2_10kb/\n",
"\n",
"we -l1 ~/hiccups2_10kb/*\n",
"\n",
"cat ~/hiccups2_10kb/fdr_thresholds_5000\n",
"\n",
"1s -ltrh ~/hiccups2_10kb/\n",
"\n",
"(base) aman@unicorn:~/fihic_bias$ java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 1000@ -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccupsfinal_10kb/\n",
"\n",
"WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n",
"\n",
"WARN [2025-@2-15T11:23:46,503] [Globals.java:138] [main] Development mode is enabled\n",
"\n",
"Usage: juicer_tools hiccups [-m matrixSize] [-k normalization (NONE/VC/VC_SQRT/KR)] [-c chromosome(s)] [-r resolution(s)] [--restrict] [-f fdr] [-p peak width] [-i window] [-t thresholds] [-d centroid dista\n",
"neces] <hicFile> <outputDirectory> [specified_loop_list]\n",
"\n",
"(base) aman@unicorn:~/fihic_bias$ java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccupsfinal_10kb/\n",
"\n",
"WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n",
"\n",
"WARN [2025-@2-15T11:24:14,443] [Globals.java:138] [main] Development mode is enabled\n",
"\n",
"Reading file: /mnt/storage3/aman/data.allValidPairs.hic\n",
"\n",
"Using the following configurations for HiCCUPS:\n",
"\n",
"Config res: 10000 peak: 2 window: 5 fdr: 10% radius: 20000\n",
"\n",
"WARNING - You are using the CPU version of HiCCUPS.\n",
"\n",
"The GPU version of HiCCUPS is the official version and has been tested extensively.\n",
"\n",
"The CPU version only searches for loops within 8MB (by default) of the diagonal and is still experimental.\n",
"\n",
"Using 16 CPU thread(s) for primary task\n",
"\n",
"Warning Hi-C map may be too sparse to find many loops via HiCCUPS.\n",
"\n",
"Running HiCCUPS for resolution 10000\n",
"\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"java -jar ~/juicer/CPU/common/juicer_too\n",
"\n",
"DHOHDHHHHHHHD\n",
"NNNNNNNNNNN\n",
"\n",
"\n",
"Upregulated Downregulated\n",
"\n",
"H3K27me signal(cp) at upregulated genes HoKzrmed slgnalcotrt at uprepuaes genes nes\n",
"3x27 me3 signal HaK2763 signal\n",
"\n",
"ene astce (9) ge dace\n",
"\n",
"In [54]: import fanc\n",
"import fanc.peaks\n",
"import fanc.plotting as fancplot\n",
"\n",
"import logging\n",
"logging. basicConfig(level=logging. INFO, format=\"%(asctime)s %(levelname)s %(message)s\")\n",
"\n",
"hic_data = fanc. load('/mnt/storage3/aman/wdbasejuicer_new/aligned/inter_3@.hic')\n",
"loop_caller = fanc.RaoPeakCaller()\n",
"\n",
"/home/aman/. lLocal/lib/python3.10/site-packages/fanc/compatibility/juicer.py:330: UserWarning: No resolution chosen\n",
"for Juicer Hic - using 2500000bp. Specify a custom resolution using <.hic file>@<resolution>\n",
"warnings.warn(\"No resolution chosen for Juicer Hic - using {}bp. \"\n",
"/home/aman/. lLocal/lib/python3.10/site-packages/fanc/compatibility/juicer.py:353: UserWarning: Support for Juicer .h\n",
"ic v9 is still in beta. Please report any issues to https://github.com/vaquerizas lab/fanc/issues/92\n",
"warnings.warn(f\"Support for Juicer .hic v{self.version} is still in beta. \"\n",
"\n",
"ne...@broadinstitute.org Jan 18, 2019, 8:16:32PM y+ roN\n",
"to AS, 3D Genomics\n",
"\n",
"Hello,\n",
"You can just run HiCCUPS or Arrowhead on the hic file using the latest jar: https://github.com/aidenlab/juicer/wiki/Download\n",
"There is extensive documentation here: https://github.com/aidenlab/juicer/wiki/CPU-HiCCUPS\n",
"\n",
"Please note: 300 million reads is not enough to reliably call loops. We also do not recommend domain calling at this depth. The ENCODE standard for loop\n",
"calling is 2 billion reads.\n",
"\n",
"Best\n",
"Neva\n",
"\n",
"You received this message because you are subscribed to the Google Groups \"3D Genomics\" group.\n",
"\n",
"To unsubscribe from this group and stop receiving emails from it, send an email to 3d-genomics...@googlegroups.com.\n",
"To view this discussion on the web visit https://groups.google.com/d/msgid/3d-genomics/761 6da19-9387-4c46-99d6-\n",
"ef852e2b0170%40googlegroups.com.\n",
"\n",
"For more options, visit https://groups.google.com/d/optout.\n",
"\n",
"Neva Cherniavsky Durand, Ph.D.\n",
"Staff Scientist, Aiden Lab\n",
"www.aidenlab.org\n",
"\n",
"> ValidPairs file from HiC-Pro used\n",
"as pre-input. 78M entries. Format:\n",
"\n",
"chri start1l endl chr2 start2 end2 readID strand1 strand2\n",
"\n",
"> .bedpe format (input):\n",
"\n",
"chri start1l endl chr2 start2 end2\n",
"\n",
"> Output csv\n",
"format:\n",
"\n",
"chr sl el chr s2 e2 prob interacted\n",
"\n",
"> 50000 entry bedpe file - 11249\n",
"with interacted score 1\n",
"\n",
"\n",
"Hi-C Signal\n",
"\n",
"25\n",
"\n",
"N\n",
"°\n",
"\n",
"rR\n",
"uw\n",
"\n",
"10\n",
"\n",
"Interaction Decay\n",
"\n",
"—— Row Sum (interactions by position)\n",
"—— Column Sum (Interactions by position)\n",
"\n",
"2 3 4 5 6\n",
"Position Relative to Anchor\n",
"\n",
"EXPLORER\n",
"\n",
"\\ AMAN [SSH: SCC]\n",
"\n",
"> hic-pro-git\n",
"\n",
"> mustache-git\n",
"\n",
"chrom.sizes\n",
"\n",
"cool_balance.sh\n",
"GEO2457_5kb_mustache_loops.bedpe\n",
"GEO2457_5kb.cool\n",
"GEO2457_dots_5kb.bedpe\n",
"GEO2457_expected_1kb.tsv\n",
"GEO2457_expected_5kb.tsv\n",
"GEO2457_v2.mcool\n",
"\n",
"GEO2457.hic\n",
"GEO2459_5kb_mustache_loops.bedpe\n",
"GEO2459_5kb.cool\n",
"GEO2459_expected_5kb.tsv\n",
"GEO2459_v2_expected_cis.tsv\n",
"GEO2459_v2.mcool\n",
"\n",
"GEO2459.hic\n",
"\n",
"$ hic2cool_aman.sh\n",
"\n",
"$ test.sh\n",
"\n",
"mY 6 oO DB\n",
"\n",
"6]\n",
"\n",
"@\n",
"\n",
"hy «D “OUTLINE\n",
"\n",
"PP aman [SSH: SCC]\n",
"\n",
"Show All Commands\n",
"Go to File\n",
"\n",
"Find in Files\n",
"\n",
"Toggle Full Screen\n",
"\n",
"Show Settings\n",
"\n",
"Veeb dls MOODLE E-mail Help\n",
"\n",
"Tah, Catalogue Dashboard My courses Q Dp Aman Shamil Nalakath © aa\n",
"\n",
"Bioinformatics Il MOOC: View: Overview report\n",
"\n",
"Bioinformatics Il information 2024 Course Participants Grades\n",
"\n",
"General Introduction to Bioinformatics II - Ol...\n",
"\n",
"Introduction to the course\n",
"\n",
"Course info (link to study information syste... Overview report\n",
"\n",
"Teacher's announcements\n",
"\n",
"Course participant's forum (ask questions fr... Aman Shamil Nalakath\n",
"\n",
"Project Work 1 - Genome project plan\n",
"\n",
"Grade\n",
"\n",
"Bioinformatics group project example from ... Course name\n",
"\n",
"Week 1 Bioinformatics II MOOC 97.00\n",
"\n",
"Week 1 general discussion\n",
"\n",
"Lecture 1 A - Introduction\n",
"\n",
"Video: Lecture 1 A - Introduction\n",
"\n",
"Students introduction and aims (DL 12.09. 2...\n",
"\n",
"How to (seriously) read a scientific paper\n",
"\n",
"Article 1\n",
"\n",
"Lecture 1B - Setting up HPC Access\n",
"\n",
"Video: Lecture 1 B - Setting up HPC access TAL\n",
"\n",
"Meet and greet\n",
"\n",
"Get the mobile app\n",
"\n",
"Coursework 1 on Article 1: Aspects of geno... Policies\n",
"\n",
"\n",
"Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n",
"\n",
"face mar rapa mat\n",
"\n",
"eQTL-gene\n",
"links >20 kb\n",
"\n",
"shuffled pairs :\n",
"\n",
"\n",
"Sequencing\n",
"\n",
"Sequenced Reads: 547812856\n",
"\n",
"Duplication and Complexity (% Sequenced Reads)\n",
"\n",
"Analysis of Unique Reads (% Sequenced Reads / % Unique Reads)\n",
"\n",
"Intra-fragment Reads: 34,307,600\n",
"\n",
"Below MAPQ Threshold: 355,353,763 (64.87% / 73.27%)\n",
"\n",
"Hi-C Contacts: 95,311,495 (17.40% / 19.65%)\n",
"3' Bias (Long Range): 97% - 3%\n",
"\n",
"Pair Type % (L-I-O-R): 25% - 25% - 25% - 25%\n",
"\n",
"Analysis of Hi-C Contacts (% Sequenced Reads / % Unique Reads)\n",
"\n",
"Inter-chromosomal: 22,195,088 (4.05% / 4.58%)\n",
"Intra-chromosomal: 73,116,407 (13.35% / 15.08%)\n",
"Long Range (>20Kb): 35,425,148 (6.47% / 7.30%)\n",
"\n",
"Solving environment: ...working... INFO conda.cc\n",
"INFO conda.conda_libmamba_solver.solver:_solve_é\n",
"{\n",
"\n",
"\"INSTALL\": [\n",
"\n",
"\"hicexplorer\"\n",
"\n",
"]\n",
"}\n",
"info libmamba Parsing MatchSpec hicexplorer\n",
"info libmamba Parsing MatchSpec hicexplorer\n",
"info libmamba Adding job: hicexplorer\n",
"\n",
".\n",
"\n",
"@ MainWindow Mon Nov 4 21:17\n",
"\n",
"eee [Juicebox 2.17.00] Hi-C Map <9>: inter.hic\n",
"\n",
"File View Bookmarks Assembly Dev\n",
"Chromosomes\n",
"\n",
"All All Be\n",
"\n",
"Show\n",
"\n",
"Normalization (Obs | Ctrl) Color Range\n",
"2 I Tr\n",
"\n",
"3773\n",
"\n",
"Observed None None\n",
"\n",
"I I I I I I It\n",
"2.5MB 500 KB 100KB 25KB 5KB 1KB 200BP\n",
"\n",
"LayerO << oO\n",
"\n",
"Show Annotation Panel J\n",
"\n",
"\n",
"GSM3398051: HiC maize Leaf-HiC rep2; Zea mays; Hi-C\n",
"\n",
"1 ILLUMINA (NextSeq 500) run: 528.9M spots, 80.4G bases, 30.8Gb downloads\n",
"Accession: SRX4727418\n",
"\n",
"GSM3398050: HiC maize Leaf-HiC rep1; Zea mays; Hi-C\n",
"\n",
"1 ILLUMINA (NextSeq 500) run: 89.8M spots, 13.7G bases, 4.5Gb downloads\n",
"Accession: SRX4727417\n",
"\n",
"(mustache_aman) [papantonis1@gwdu1@1 aman]$ awk '$1 == $9 {print $1}' GE02457_dots_5kb.bedpe | sort | unig -c && wc -1 GE02457_dots_5kb.bedpe\n",
"842 chri\n",
"413 chr1e\n",
"465 chri1\n",
"442 chr12\n",
"244 chri3\n",
"254 chri4\n",
"234 chris\n",
"174 chri16\n",
"248 chr17\n",
"196 chri8\n",
"122 chri9\n",
"817 chr2\n",
"196 chr2e\n",
"\n",
"81 chr21\n",
"78 chr22\n",
"731 chr3\n",
"\n",
"594 chr4\n",
"609 chr5\n",
"631 chr6é\n",
"478 chr7\n",
"\n",
"505 chr8&\n",
"349 chr9\n",
"\n",
"184 chrx\n",
"\n",
"8888 GE02457_dots_5kb.bedpe\n",
"\n",
"@ Mainwindow @®@@6OeOr+ezek@ee =) FS Q SS MonDec30 1\n",
"\n",
"[ Rem ) [Juicebox 2.17.00] Hi-C Map <9>: data.allValidPairs.hic\n",
"\n",
"File View Bookmarks Assembly Dev\n",
"Chromosomes\n",
"\n",
"6 @ « Ge\n",
"\n",
"Normalization (Obs | Ctrl) Resolution (BP) Color Range\n",
"\n",
"6:113,950,001-114,000,000\n",
"\n",
"100 MB\n",
"\n",
"merge... <p> [> oO\n",
"\n",
"10000... <—@\n",
"LayerO <<\n",
"\n",
"Show Annotation Panel\n",
"\n",
"\n",
"% TADS\n",
"\n",
"[papantonis1@gwdu101 mustache_results]$\n",
"BIN1_CHR BIN1_START BIN1_END\n",
"chr 5510000 5515000 chr1 5610000\n",
"chr1 5505000 5510000 chr1 5745000\n",
"chr1 5635000 5640000 chr1 5745000\n",
"chr1 7665000 7670000 chr1 7750000\n",
"chr1 7985000 7990000 chr1 8325000\n",
"chri1 7990000 7995000 chr1 8105000\n",
"chr1 8020000 8025000 chr1 8310000\n",
"chri1 8020000 8025000 chr1 8240000\n",
"chr 8560000 8565000 chr1 8725000\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"papantonis1@gwdu101 mustache_results]$\n",
"12007 anchor_bed_try2/rbp1_anchors_final\n",
"papantonis1@gwdu101 mustache_results]$\n",
"13598 anchor_bed_try2/ctrl_anchors_final\n",
"\n",
"papantonis1@gwdu101 mustache_results]$\n",
"\n",
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/C!\n",
"> -u > anchor_bed_try2/rbp1_anchors_wi\n",
"\n",
"papantonis1@gwdu101 mustache_results]$\n",
"1153 anchor_bed_try2/rbp1_anchors_with_C\n",
"\n",
"papantonis1@gwdu101 mustache_results]$\n",
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/C.\n",
"> -u > anchor_bed_try2/ctrl_anchors_wi\n",
"\n",
"1767 anchor_bed_try2/ctrl_anchors_with_C\n",
"papantonis1@gwdu101 mustache_results]$\n",
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/Cl\n",
"> -w 500@ -u > anchor_bed_try2/rbp1_an\n",
"papantonis1@gwdu101 mustache_results]$\n",
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/C.\n",
"> -w 50@@ -u > anchor_bed_try2/ctrl_an\n",
"\n",
"1833 anchor_bed_try2/rbp1_anchors_near5k\n",
"\n",
"2689 anchor_bed_try2/ctrl_anchors_near5k\n",
"papantonis1@gwdu101 mustache_results]$\n",
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/Cl\n",
"> -w 10000 -u > anchor_bed_try2/rbp1_a\n",
"papantonis1@gwdu101 mustache_results]$\n",
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
"> -b ~/aman/microc_data/nadine_macro/C.\n",
"> -w 10000 -u > anchor_bed_try2/ctrl_a\n",
"\n",
"2046 anchor_bed_try2/rbp1_anchors_near1®\n",
"\n",
"3055 anchor_bed_try2/ctrl_anchors_near1@\n",
"\n",
"head rbp1_loops_5k.bedpe\n",
"BIN2_CHROMOSOME BIN2_START BIN2_END\n",
"5615000\n",
"5750000\n",
"5750000\n",
"7755000\n",
"8330000\n",
"8110000\n",
"8315000\n",
"8245000\n",
"8730000\n",
"tail -n +2 rbp1_loops_5k.bedpe | cut -f1-3 > anchor_bed_try2/rbp1_anchor1.bed\n",
"tail -n +2 rbp1_loops_5k.bedpe | cut -f4-6 > anchor_bed_try2/rbp1_anchor2.bed\n",
"cat anchor_bed_try2/rbp1_anchor1.bed anchor_bed_try2/rbp1_anchor2.bed | sort -k1,1 -k2,2n | uniq > anchor_bed_try2/rbp1_anchors_final_tab.bed\n",
"tail -n +2 ctrl_loops_5k.bedpe | cut -f1-3 > anchor_bed_try2/ctrl_anchor1.bed\n",
"tail -n +2 ctrl_loops_5k.bedpe | cut -f4-6 > anchor_bed_try2/ctrl_anchor2.bed\n",
"cat anchor_bed_try2/ctrl_anchor1.bed anchor_bed_try2/ctrl_anchor2.bed | sort -k1,1 -k2,2n | uniq > anchor_bed_try2/ctrl_anchors_final_tab.bed\n",
"we -l anchor_bed_try2/rbp1_anchors_final_tab.bed\n",
"\n",
"_tab.bed\n",
"\n",
"we -l anchor_bed_try2/ctrl_anchors_final_tab.bed\n",
"_tab.bed\n",
"\n",
"bedtools intersect \\\n",
"\n",
"l_tab.bed \\\n",
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
"th_CTCF.bed\n",
"\n",
"we -l anchor_bed_try2/rbp1_anchors_with_CTCF.bed\n",
"TCF. bed\n",
"\n",
"bedtools intersect \\\n",
"\n",
"l_tab.bed \\\n",
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
"th_CTCF.bed\n",
"\n",
"papantonis1@gwdu101 mustache_results]$ we -l anchor_bed_try2/ctrl_anchors_with_CTCF.bed\n",
"\n",
"TCF. bed\n",
"\n",
"bedtools window \\\n",
"\n",
"l_tab.bed \\\n",
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
"chors_near5kb_CTCF.bed\n",
"\n",
"bedtools window \\\n",
"\n",
"l_tab.bed \\\n",
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
"chors_near5kb_CTCF.bed\n",
"\n",
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/rbp1_anchors_near5kb_CTCF.bed\n",
"\n",
"b_CTCF.bed\n",
"\n",
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/ctrl_anchors_near5kb_CTCF.bed\n",
"\n",
"b_CTCF.bed\n",
"\n",
"bedtools window \\\n",
"\n",
"l_tab.bed \\\n",
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
"nchors_near1@kb_CTCF.bed\n",
"\n",
"bedtools window \\\n",
"\n",
"l_tab.bed \\\n",
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
"nchors_near1@kb_CTCF.bed\n",
"\n",
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/rbp1_anchors_near1@kb_CTCF.bed\n",
"\n",
"kb_CTCF.bed\n",
"\n",
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/ctrl_anchors_neari@kb_CTCF.bed\n",
"\n",
"kb_CTCF.bed\n",
"\n",
"ML classification model\n",
"\n",
"ML regression model\n",
"\n",
"ATAC QC\n",
"\n",
"ATAC peak detection\n",
"\n",
"7 ,\n",
"GitHub\n",
"8\n",
"Nextflow for ML\n",
"\n",
"\n",
"Extracting Hi-C contact matrix from.hic file\n",
"\n",
"The process obtains the hic contact matrix for each chromosome from the.hic file. It will output the\n",
"frequency_matrix file.\n",
"\n",
"Modify the path to the input and output files in the GetBigMatrix_Cells_KRobserved.sh file: The.jar file is the path\n",
"where the juicer tools resides, and run:\n",
"\n",
"bash GetBigMatrix_Cells_KRobserved.sh O\n",
"\n",
"Generating sub-matrix from Hi-C contact matrix\n",
"\n",
"The process cuts the hic contact matrix of each chromosome into multiple submatrices. Modify the path to the\n",
"input and output files in the Getnpymatrix_chr_all_sample.sh file, where the input file is the output file from the\n",
"previous step, DPATH is the root directory of the frequence_matrix file, and run:\n",
"\n",
"bash Getnpymatrix_chr_all_sample.sh oO\n",
"\n",
"i al\n",
"\n",
"Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n",
"\n",
"eQTL-gene\n",
"links >20 kb |\n",
"\n",
"shuffled pairs\n",
"\n",
"\n",
"A Sequenced ignment and\n",
"Hi-C Reads Chimera Handling\n",
"RI Re\n",
"a —————I\n",
"\n",
"Duplicate\n",
"Merge Sort removal\n",
"\n",
"Map creation\n",
"\n",
"_\n",
"\n",
"\n",
"Insights from the study\n",
"\n",
"> Total identified loops according to the study (long-range\n",
"loops 2 20 kb): 1,177;\n",
"\n",
"> Paper only analyzed chromatin loops 2 20 kb in length in\n",
"the Hi-C dataset\n",
"\n",
"> Resolutions and parameters not mentioned in paper and\n",
"Sl\n",
"\n",
"> Less no. of chromatin loops identified due to limited\n",
"sequencing depth.HenceHiChIP to detect more loops\n",
"\n",
"> H3K4me3-HiChIP dataset: 24,141 loops;\n",
"> H3K27me3-HiChIP dataset: 18,106 loops\n",
"\n",
"> FitHiC2 on resolution 20kb : 89000 loops\n",
"\n",
"PRC1 PRC2\n",
"\n",
"\n",
"What this suggests:\n",
"\n",
">\n",
"\n",
"High Mapping Percentage: The mapping quality is quite high (99.20%), which is good.\n",
"\n",
"However, the properly paired issue (0%) should be looked into further.\n",
"\n",
"Paired Read Alignment Issues: The @% for properly paired reads suggests that the\n",
"alignment tool or the pairing information may not be correct. This is a crucial issue for Hi-C\n",
"data since proper pairing indicates the correct relationship between paired-end reads. It's\n",
"worth verifying that the correct options are being used in the alignment step and whether the\n",
"\n",
"pairing information is retained properly.\n",
"\n",
"Inter-chromosomal Interactions: Given the significant number of reads with mates mapped\n",
"to different chromosomes, this aligns with your Hi-C analysis, which typically shows inter-\n",
"chromosomal interactions. However, excessive inter-chromosomal mapping could indicate a\n",
"\n",
"problem if the number is unusually high.\n",
"\n",
"A Sequenced Alignment and Duplicate Map creation\n",
"Hi-C Reads Chimera Handling Merge Sort removal\n",
"RI R2 RARA\n",
"i 1 — =~ —————— = .\n",
"SS .\n",
"; == Se = I\n",
"C t 1 ———— a =\"\n",
"ess I | =\n",
": p 1 }\n",
"+ —v ===. >} 1 7 , 1\n",
"\n",
"[2]:\n",
"\n",
"hicPlotDistVsCounts —-matrix /mnt/storage3/aman/data_mcool.h5 —-outFileName contact_decay.png\n",
"usage: hicPlotDistVsCounts --matrices MATRICES [MATRICES ...] --plotFile file\n",
"\n",
"name [--labels LABELS [LABELS ...]]\n",
"[--skipDiagonal] [--maxdepth INT bp] [--perchr]\n",
"\n",
"[--chromosomeExclude CHROMOSOMEEXCLUDE [CHROMOSOMEEXCLUDE ...]]\n",
"[--outFileData OUTFILEDATA]\n",
"[--plotsize PLOTSIZE PLOTSIZE] [--help] [--version]\n",
"\n",
"hicPlotDistVsCounts: error: the following arguments are required: —-matrices/—m, —-plotFile/-o\n",
"B 2\n",
"\n",
"icv ome v Tracks ¥ mple Info v Session v Share Bookmark Save Image Circular View v Help v\n",
"\n",
"IGV oxford_e...me.fasta tig00000002:1,752,510-1,825,110 Q 72 kb (Select Tracks ) (\"Crosshairs ) (Center Line ) (Track Labels) @ qumm@ +)\n",
"\n",
"C | D)\n",
"\n",
"1,760 kb 1,770 kb 1,780 kb 1,790 kb 1,800 kb 1,810 kb 1,820 kb\n",
"L 1 n L L 1 n\n",
"\n",
"11D 2 ee) ee es 2 ie\n",
"\n",
"IKAOHOFJ_01984yijE metFmetLrpmE cytR hsilU gipF gipX tpiA pfkAcpxR sodA_1 thaBrhaD IKAOHOFJ_02046fdhE_1 dtdcsqR yihTyihR yihQ yihP_1 ompL IKAOHOFJ_02079 glnA gl\n",
"\n",
"priA_2 menA_2 sbp_2 cpxA_1 rhaT_2rhaA_1 ysdC_2 fdoG_3 yinV yihP_2 GFM1\n",
"\n",
"Adapter Content [Zi\n",
"\n",
"ecamuteprctoe cout open tr lay wanna aap ce a oat\n",
"\n",
"FastQC: Adapter Content\n",
"\n",
"Status Checks\n",
"Sua ren Fc sc ston wn mite ere erm ey eg ys\n",
"FastQC: Status Checks\n",
"Software Versions\n",
"\n",
"ai 8 a nse\n",
"anes Py ar ot es le\n",
"\n",
"Siseqera\n",
"\n",
"Y cut_n_tag/nadine_cut_tag /nadine_cut_tag\n",
"> Aux_CPI_H3K27me3_results\n",
"> C_H3K27me3_results\n",
"> C_H3K27me3_Spi_results\n",
"> CPILH3K27me3_results\n",
"\n",
"\n",
"AIAG SH =m\n",
"\n",
"\n",
"@ Terminal Shell Edit View Window Help\n",
"\n",
"4) FS Q S Sun 29. Jun 12:54\n",
"\n",
"eee ~~ aman — a.nalakath@node08:~ — ssh -L 9006:localhost:9006 a.nalakath@10.152.154.1 — 208x61\n",
"\n",
"Last login: Sun Jun 29 11:01:36 on ttys@ee\n",
"\n",
"aman@Laptop-von-Aman ~ % tum_ngs\n",
"\n",
"DRA A AA AA A A RRA A RA AA RR RRA A RRR RRA A AR AR AR HAA RR A\n",
"* Welcome to PGEN cluster *\n",
"DRA AA A AR A AA A RRA A AA ARR A RA RR A RR RRA AR A RR AA A HRA RR A\n",
"\n",
"Please use this node only to submit your jobs.\n",
"Don't use it for calculations or CPU/RAM intensive tasks!!!\n",
"\n",
"DARA A AAA AA A A AA A RA AR A RR RRA A RRR RR ARR A HRA HAR A HAA RR A\n",
"(a.nalakath@10.152.154.1) Password:\n",
"\n",
"Kickstarted on 2018-12-07\n",
"\n",
"Last login: Wed Jun 25 08:01:17 2025 from 10.157.58.238\n",
"[a.nalakath@frontend ~]$ ssh node@s\n",
"\n",
"Password:\n",
"\n",
"Kickstarted on 2018-12-04\n",
"\n",
"Last login: Wed Jun 25 08:01:32 2025 from 10.152.154.1\n",
"[a.nalakath@node@8 ~]$ tmux ls\n",
"\n",
"@: 1 windows (created Sat Jun 21 08:01:35 2025)\n",
"[a.nalakath@nodees ~1$ ff\n",
"\n",
"\n",
"AN Tene enginevelry, 5 Py weeds\n",
"- eZ Anal biota Beat dp\n",
"ate - Tyce Gear bei Oo\n",
"46, Trtwesip * Feashig UP ES\n",
"\n",
"yor\n",
"\n",
"SONGS [ab 2 S welts wort\n",
"\n",
"Coup peggy\n",
"\n",
"Repars — PDO)\n",
"Brcacvnud,\n",
"Summer school\n",
"\n",
"Reding Dalukinnoyy gprs\n",
"L¥ Pap & Stiles\n",
"Chater — Pronses\n",
"Saf & Stee Duatle fo Ui?\n",
"Anping fe Hos. Haig HG\n",
"\n",
"Mar\n",
"\n",
"elp\n",
"\n",
"@ Vivaldi File Edit View Bookmarks Mail Tools Wine\n",
"\n",
"New merch store now open, including a limited edition metal keycap! monkeytype.store\n",
"\n",
"monkeytype\n",
"\n",
"70\n",
"97%\n",
"\n",
"time 15\n",
"english\n",
"\n",
"19% 15s\n",
"\n",
"77 88/1/8/8\n",
"\n",
"jectives a hivinter | Etherpad\n",
"\n",
"0@0B6\n",
"\n",
"[1]:\n",
"\n",
"import h5py\n",
"\n",
"# Open the HDF5 file\n",
"\n",
"with h5py.File('cool_pileup_combined', 'r') as f:\n",
"# Inspect the structure\n",
"print(\"Keys:\", list(f.keys()))\n",
"\n",
"# Check the 'data' dataset\n",
"\n",
"data = f['data'][:]\n",
"\n",
"print(f\"'data' dataset shape: {data.shape}\")\n",
"print(f\"'data' dataset contents:\\n{data}\")\n",
"\n",
"Keys: ['annotation', attrs', data']\n",
"data' dataset shape: (16488, 3)\n",
"data' dataset contents:\n",
"[[1.1873085 1.2874519 1.4797186]\n",
"[1. 7349982 2.228282 3.1729212]\n",
"[1.5040904 1.3009566 1.1008095]\n",
"[1.9000989 2.8981235 1.9658103]\n",
"[2.9235291 4.7604017 2.8729181]\n",
"[1.9822323 2.930699 1.9129672]]\n",
"\n",
"\n",
"0@°8@\n",
"W PICO 5.09\n",
"\n",
"Docus_tag\n",
"KBOCNLJJ_00001\n",
"KBOCNLIJJ_00002\n",
"KBOCNLIJJ_00003\n",
"KBOCNLJJ_00004\n",
"KBOCNLJJ_00005\n",
"KBOCNLIIJ_00006\n",
"KBOCNLJJ_00007\n",
"KBOCNLJJ_00008\n",
"KBOCNLJJ_00009\n",
"KBOCNLJJ_00010\n",
"CRISPR\n",
"KBOCNLJJ_00011\n",
"KBOCNLJJ_00012\n",
"KBOCNLIJJ_00013\n",
"KBOCNLIJJ_00014\n",
"KBOCNLIJJ_00015\n",
"KBOCNLIJJ_00016\n",
"KBOCNLIJJ_00017\n",
"KBOCNLJJ_00018\n",
"KBOCNLIJJ_00019\n",
"KBOCNLJJ_00020\n",
"KBOCNLIJJ_00021\n",
"KBOCNLIJ_00022\n",
"KBOCNLIJJ_00023\n",
"KBOCNLIJJ_00024\n",
"KBOCNLIJJ_00025\n",
"KBOCNLIJJ_00026\n",
"KBOCNLJJ_00027\n",
"KBOCNLJJ_00028\n",
"KBOCNLIJJ_00029\n",
"KBOCNLIJJ_00030\n",
"KBOCNLIJJ_00031\n",
"KBOCNLIJJ_00032\n",
"KBOCNLIJJ_00033\n",
"KBOCNLIJIJ_00034\n",
"KBOCNLIJJ_00035\n",
"KBOCNLIJIJ_00036\n",
"KBOCNLIJJ_00037\n",
"KBOCNLJJ_00038\n",
"KBOCNLIJ_00039\n",
"KBOCNLIJJ_00040\n",
"KBOCNLIJJ_00041\n",
"KBOCNLIJ_00042\n",
"KBOCNLJJ_00043\n",
"KBOCNLIJ_00044\n",
"KBOCNLJJ_00045\n",
"KBOCNLIIJ_00046\n",
"KBOCNLIJJ_00047\n",
"KBOCNLIJJ_00048\n",
"KBOCNLIJJ_00049\n",
"KBOCNLJJ_00050\n",
"KBOCNLIJJ_00051\n",
"KBOCNLIIJ_00052\n",
"KBOCNLIJJ_00053\n",
"KBOCNLIJJ_00054\n",
"KBOCNLJJ_00055\n",
"KBOCNLIIJ_00056\n",
"\n",
"Wie) Get Help\n",
"Wed Exit\n",
"\n",
"ftype\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"763\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"CDS\n",
"cDS\n",
"cDS\n",
"CDS\n",
"CDS\n",
"\n",
"length_bp\n",
"\n",
"1545 cysI_1\n",
"735 cysH_1\n",
"2667 ygcB_1\n",
"1509 casA_1\n",
"483 casB_1\n",
"1092 casC_1\n",
"675 casD_1\n",
"600 casE_1\n",
"918 ygbT_1\n",
"285 ygbF_1\n",
"1038\n",
"\n",
"909 cysD_1\n",
"1428 cysN\n",
"606 cysC\n",
"324 ygbE\n",
"312 ftsB\n",
"711 ispD\n",
"480 ispF\n",
"1050 truD\n",
"762 surE\n",
"627 pem\n",
"1140 nlpD_1\n",
"993 rpoS\n",
"1365 ygbN\n",
"777 otni\n",
"639 otnc\n",
"372 otnK_1\n",
"834 otnK_2\n",
"909 1tnD\n",
"768 glcR\n",
"657 pphB\n",
"2562 mutS\n",
"135\n",
"\n",
"354\n",
"\n",
"2079 fhlA\n",
"1011 hypE\n",
"1122 hypD\n",
"273 hypC\n",
"873 hypB\n",
"351 hypA\n",
"462 hycA\n",
"612 hyfA_1\n",
"1827 ndhB_1\n",
"924 hycD\n",
"1710 hycE\n",
"543 ndhI_1\n",
"768 hycG_1\n",
"411\n",
"\n",
"471 hycI\n",
"1425 bglH_1\n",
"1458 bglF_1\n",
"1014 ascG\n",
"528 hyfA_2\n",
"2253 hypF\n",
"1134 norw\n",
"1440 norv\n",
"\n",
"n\n",
"\n",
"8.1.2 COG@155\n",
"8.4.8 COG@175\n",
"tbo=\n",
"\n",
"ge\n",
"1.\n",
"1.\n",
"3. -- C0G1203\n",
"\n",
"3.1.-.-\n",
"- c0G1518\n",
"\n",
"2.7.7.4 COG0175\n",
"2.7.7.4 CO0G2895\n",
"2.7.1.2\n",
"\n",
"c0G2919\n",
"\n",
"7\n",
"COGQ496\n",
"\n",
"C0G0739\n",
"COG@568\n",
"C0G2610\n",
"\n",
"C0G1349\n",
"3.1.3.16\n",
"C0GE249\n",
"\n",
"CO0G3604\n",
"4.2.1.- C0G0@309\n",
"COGe409\n",
"C0G0298\n",
"C0G378\n",
"C0G@375\n",
"\n",
"1.-.-.- C0G1142\n",
"7.1.1.-\n",
"\n",
"COGe65e\n",
"\n",
"C0G3261\n",
"7.1.1.-\n",
"\n",
"C0G3260\n",
"3.4.23.51\n",
"3.2.1.86\n",
"\n",
"C0G1263\n",
"CO0G1609\n",
"1.-.-.- C0G1142\n",
"6.2.-.- C0G0068\n",
"\n",
"1.18.1.-\n",
"COGQ426\n",
"\n",
"We) WriteOut\n",
"We) Justify\n",
"\n",
"EC_number CoG\n",
"\n",
"aman — nano ./Downloads/assignment/Ecoli_hifi/Ecoli_hifi_genome.tsv — 208x63\n",
"\n",
"/Downloads/as\n",
"\n",
"product\n",
"\n",
"Sulfite reductase [NADPH] hemoprotein beta-component\n",
"Phosphoadenosine phosphosulfate reductase\n",
"CRISPR-associated endonuclease/helicase Cas3\n",
"CRISPR system Cascade subunit CasA\n",
"\n",
"CRISPR system Cascade subunit CasB\n",
"\n",
"CRISPR system Cascade subunit CasC\n",
"\n",
"CRISPR system Cascade subunit CasD\n",
"\n",
"CRISPR system Cascade subunit CasE\n",
"CRISPR-associated endonuclease Cas1\n",
"CRISPR-associated endoribonuclease Cas2\n",
"\n",
"hypothetical protein\n",
"Sulfate adenylyltransferase subunit 2\n",
"Sulfate adenylyltransferase subunit 1\n",
"C0G@529 Adenylyl-sulfate kinase\n",
"Inner membrane protein YgbE\n",
"Cell division protein FtsB\n",
"C0G1211 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase\n",
"C0G@245 2-C-methyl-D-erythritol 2,4-cyclodiphosphate synthase\n",
"COG@585 tRNA pseudouridine synthase D\n",
"5'/3'-nucleotidase SurE\n",
"C0G2518 Protein-L-isoaspartate O-methyltransferase\n",
"Murein hydrolase activator NlpD\n",
"RNA polymerase sigma factor RpoS\n",
"Inner membrane permease YgbN\n",
"C0G3622 2-oxo-tetronate isomerase\n",
"\n",
"3-oxo-tetronate 4-phosphate decarboxylase\n",
"C0G3395 3-oxo-tetronate kinase\n",
"C0G3395 3-oxo-tetronate kinase\n",
"\n",
"L-threonate dehydrogenase\n",
"HTH-type transcriptional repressor GlcR\n",
"C0G@639 Serine/threonine-protein phosphatase 2\n",
"DNA mismatch repair protein MutS\n",
"hypothetical protein\n",
"hypothetical protein\n",
"Formate hydrogenlyase transcriptional activator FhlA\n",
"Carbamoyl dehydratase HypE\n",
"Hydrogenase maturation factor HypD\n",
"Hydrogenase maturation factor HypC\n",
"Hydrogenase maturation factor HypB\n",
"Hydrogenase maturation factor HypA\n",
"Formate hydrogenlyase regulatory protein HycA\n",
"Hydrogenase-4 component A\n",
"NAD(P)H-quinone oxidoreductase subunit 2, chloroplastic\n",
"Formate hydrogenlyase subunit 4\n",
"Formate hydrogenlyase subunit 5\n",
"NAD(P)H-quinone oxidoreductase subunit I, chloroplastic\n",
"Formate hydrogenlyase subunit 7\n",
"hypothetical protein\n",
"COG@68@ Hydrogenase 3 maturation protease\n",
"C0G2723 Aryl-phospho-beta—D-glucosidase BglH\n",
"PTS system beta-glucoside-specific EIIBCA component\n",
"HTH-type transcriptional regulator AscG\n",
"Hydrogenase-4 component A\n",
"Carbamoyltransferase HypF\n",
"C0G1251 Nitric oxide reductase F1Rd-NAD(+) reductase\n",
"Anaerobic nitric oxide reductase flavorubredoxin\n",
"\n",
"Ws) Read File Way Prev Pg\n",
"Wi] Where is WAY Next Pg\n",
"\n",
"AKI\n",
"AU\n",
"\n",
"nment/Ecoli_hifi/Ecoli_hifi_genome.tsv\n",
"\n",
"Cut Text\n",
"UnCut Text\n",
"\n",
"me Cur Pos\n",
"Way To Spell\n",
"\n",
"OMB\n",
"\n",
"100 MB\n",
"\n",
"200 MB\n",
"\n",
"Chromosomes Show Normalization (Obs | Ctrl) Resolution (BP)\n",
"“aw “aw a a a — y,\n",
"2 Bp Observed Bala... None © Pivrb ttre teins\n",
"2.5MB 500KB 100KB 25KB 5KB 1KB 200BP\n",
"OMB 100 MB 200 MB 300 MB\n",
"\n",
"\n",
"Fragment\n",
"\n",
"= ————\n",
"\n",
"=. —,\n",
"> sequencing *——— a\n",
"\n",
"\n",
"@ Mainwindow\n",
"\n",
"Omeoerork oe oO =\n",
"\n",
"[Juicebox 2.17.00] Hi-C Map <9>: inter.hic\n",
"\n",
"View Bookmarks\n",
"Chromosomes\n",
"\n",
"Assembly Dev\n",
"\n",
"Normalization (Obs | Ctrl) Resolution (BP) Color Range\n",
"\n",
"F Q SBS MonNov4 20:49\n",
"\n",
"2.5MB 500KB 100KB 25KB 5KB\n",
"\n",
"156,000 KB 155,000 KB 154,000 KB 153,000 KB 152,000 KB 151,000 KB\n",
"\n",
"157,000 KB\n",
"\n",
"1:159,230,001-159,240,000\n",
"1:153,390,001-153,400,000\n",
"observed value (O) = 0.0\n",
"lexpected value (E) = 0.032\n",
"O/E =0\n",
"\n",
"LayerO <> | & |\n",
"\n",
"Show Annotation Panel\n",
"\n",
"\n",
"(mustache_aman) [papantonis1@gwdu101 aman]$ awk '$1 == $4 {print $1}' GEO2457_5kb_mustache_loops.bedpe | sort | unig -c && wc -1 GE02457_5kb_mustache_loops.bedpe\n",
"88@ chri\n",
"457 chr1e\n",
"536 chri1\n",
"542 chri2\n",
"297 chr13\n",
"306 chri4\n",
"278 chri5\n",
"173 chr16\n",
"253 chr17\n",
"244 chri8\n",
"\n",
"92 chri9\n",
"942 chr2\n",
"216 chr2e\n",
"\n",
"88 chr21\n",
"\n",
"65 chr22\n",
"804 chr3\n",
"686 chr4\n",
"663 chrd\n",
"731 chré\n",
"552 chr7\n",
"574 chr8&\n",
"402 chr9\n",
"205 chrx\n",
"\n",
"9987 GEO2457_5kb_mustache_loops.bedpe\n",
"\n",
"(mustache_aman) [papantonis1@gwdu1@1 aman]$ awk '$1 == $4 {print $1}' GE02459_5kb_mustache_loops.bedpe | sort | unig -c && wc -1 GE02459_5kb_mustache_loops.bedpe\n",
"673 chri\n",
"341 chr1e\n",
"394 chri1\n",
"433 chri2\n",
"233 chr13\n",
"254 chri4\n",
"207 chri5\n",
"108 chr16\n",
"147 chr1i7\n",
"234 chri8\n",
"\n",
"29 chri9\n",
"626 chr2\n",
"173 chr2e\n",
"\n",
"83 chr21\n",
"\n",
"30 chr22\n",
"60@ chr3\n",
"534 chr4\n",
"484 chrd5\n",
"536 chré\n",
"425 chr7\n",
"481 chr8\n",
"286 chr9\n",
"158 chrx\n",
"\n",
"7478 GEO2459_5kb_mustache_loops.bedpe\n",
"\n",
"Contact Matrices:\n",
"\n",
"Fig x: Visualization in Juicebox for two HiC datasets\n",
"\n",
"The 10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files\n",
"locally. The left panel shows the matrix from the cis-regulatory elements in Maize study and the one\n",
"on the right is from (7).The right panel is chromosome one at resolution 500 kb. The 10*10\n",
"chromosomes full contact matrix was visualized in Juicebox GUI app by importing files locally. The\n",
"10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files locally.\n",
"\n",
"Ice ot\n",
"earn ere ta rao pen 2 prema ne oe [eeremsne [seen]\n",
"\n",
"FastQC: Per Sequence GC Content\n",
"Pea Samp\n",
"\n",
"Per Base N Content [aim\n",
"\n",
"epocenapecttancastcan poten ren an asa\n",
"\n",
"FastQC: Per Base N Content\n",
"\n",
"Sequence Length Distribution [a\n",
"\n",
"Mimosa equa ci ng)\n",
"\n",
"Sequence Duplication Levels SE (ome)\n",
"eae ge yer\n",
"[eeewwres [cere]\n",
"\n",
"FastQC: Sequence Duplication Levels,\n",
"\n",
"Overrepresented sequences by sample SKIN\n",
"\n",
"Pett arr ctonnpeericsminceanh eaten.\n",
"\n",
"Top overrepresented sequences\n",
"\n",
"ie onmmteeseince sr ssarde The soe 2 trent ser cern aye noosa yr\n",
"\n",
"Adapter Content [ZI [ome]\n",
"\n",
"Peamusiepenep cathe sana yay te asa en aspen enon\n",
"[eeremsne [seen]\n",
"\n",
"FastQC: Adapter Content\n",
"\n",
"\n",
"© aman — nano ./Downloads/assignment/Ecoli_|\n",
"\n",
"fi/Ecoli_hifi_genome.gff — 208x63\n",
"\n",
"nment/Ecoli_hifi/Ecoli_hifi_genome.gff\n",
"\n",
"ile: ./Downloads/as\n",
"\n",
"Ww PICO 5.09\n",
"\n",
"i#gff-version 3\n",
"\n",
"##sequence-region tig@0000001 1\n",
"\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tige0eee0e1\n",
"tigeeeee0e1\n",
"\n",
"Wie) Get Help\n",
"Wed Exit\n",
"\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"minced:@.2.0\n",
"\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"Prodigal: 002006\n",
"\n",
"465\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"CRI\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"cDS\n",
"\n",
"7533\n",
"\n",
"99\n",
"1718\n",
"2811\n",
"5892\n",
"7393\n",
"7888\n",
"8982\n",
"9643\n",
"10258\n",
"11177\n",
"11567\n",
"12412\n",
"13701\n",
"14611\n",
"16038\n",
"16693\n",
"17210\n",
"17540\n",
"18250\n",
"18726\n",
"19756\n",
"20511\n",
"21277\n",
"22479\n",
"23565\n",
"25018\n",
"25799\n",
"26529\n",
"26863\n",
"27693\n",
"28797\n",
"29615\n",
"30377\n",
"33014\n",
"33225\n",
"33615\n",
"35767\n",
"36774\n",
"37895\n",
"38158\n",
"39034\n",
"39596\n",
"40182\n",
"40790\n",
"42619\n",
"43560\n",
"45279\n",
"45821\n",
"46585\n",
"46988\n",
"47617\n",
"49050\n",
"50764\n",
"51926\n",
"52606\n",
"54986\n",
"\n",
"SPR\n",
"\n",
"We) WriteOut\n",
"We) Justify\n",
"\n",
"1643\n",
"\n",
"2452\n",
"\n",
"5477\n",
"\n",
"7400\n",
"\n",
"7875\n",
"\n",
"8979\n",
"\n",
"9656\n",
"\n",
"10242\n",
"11175\n",
"11461\n",
"12329\n",
"13449\n",
"14609\n",
"16038\n",
"16643\n",
"17016\n",
"17521\n",
"18250\n",
"18729\n",
"19775\n",
"20517\n",
"21137\n",
"22416\n",
"23471\n",
"24929\n",
"25794\n",
"26437\n",
"26900\n",
"27696\n",
"28601\n",
"29564\n",
"30271\n",
"32938\n",
"33148\n",
"33578\n",
"35693\n",
"36777\n",
"37895\n",
"38167\n",
"39030\n",
"39384\n",
"40057\n",
"40793\n",
"42616\n",
"43542\n",
"45269\n",
"45821\n",
"46588\n",
"46995\n",
"47458\n",
"49041\n",
"50507\n",
"51777\n",
"52453\n",
"54858\n",
"56119\n",
"\n",
"tet et eteetse\n",
"\n",
"tet et etetetsei\n",
"\n",
"i\n",
"\n",
"tet etetesti\n",
"\n",
"++H1\n",
"\n",
"F\n",
"\n",
"SPBV2VVDVVVOVVO\n",
"\n",
"PBYWDVDDWDD WDD VDD DD VDD VV VDD VDDD DVD VDDVDDVDDVDVDVDVDVDVDVDVDVDVVVVVVOVOQ:\n",
"\n",
"Wii Read File\n",
"Wil) Where is\n",
"\n",
"ID=KBOCNLJJ_00001; eC_number=1.8.1.2;Name=cysI_1;db_xref=COG:C0G0155; gene=cysI_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00002; eC_number=1.8.4.8;Name=cysH_1;db_xref=COG:C0G0175; gene=cysH_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00003; eC_number=3.1.-.-—;Name=ygcB_1;db_xref=COG:C0G1203; gene=ygcB_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00004;Name=casA_1;gene=casA_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4690$\n",
"ID=KBOCNLJJ_@0005 ; Name=casB_1;gene=casB_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P7663$\n",
"ID=KBOCNLJJ_00006;Name=casC_1;gene=casC_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n",
"ID=KBOCNLJJ_@0007 ; Name=casD_1;gene=casD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n",
"ID=KBOCNLJJ_00008; eC_number=3.1. j;Name=casE_1;gene=casE_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n",
"ID=KBOCNLIJJ_00009; eC_number=3.1. j;Name=ygbT_1;db_xref=COG:C0G1518; gene=ygbT_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00010; eC_number=3.1.-.-—;Name=ygbF_1;gene=ygbF_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n",
"note=CRISPR with 13 repeat units;rpt_family=CRISPR;rpt_type=direct\n",
"\n",
"ID=KBOCNLJJ_00011;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00011;product=hypothetical protein\n",
"ID=KBOCNLJJ_00012; eC_number=2.7.7.4;Name=cysD_1;db_xref=COG:C0G0175; gene=cysD_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00013; eC_number=2.7.7.4;Name=cysN; db_xref=COG:C0G2895; gene=cysN;inference=ab initio prediction:Prodigal: 002006, simi$\n",
"ID=KBOCNLJJ_00014; eC_number=2.7.1.25;Name=cysC; db_xref=COG:C0G@529; gene=cysC;inference=ab initio prediction:Prodigal: 002006, sim$\n",
"ID=KBOCNLJJ_00015 ; Name=ygbE; gene=ygbE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P46141;1lo$\n",
"ID=KBOCNLJJ_00016;Name=ftsB; db_xref=COG:C0G2919; gene=ftsB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00017; eC_number=2.7.7.60;Name=ispD; db_xref=COG:C0G1211; gene=ispD;inference=ab initio prediction:Prodigal: 002006, sim$\n",
"ID=KBOCNLJJ_00018; eC_number=4.6.1.12;Name=ispF; db_xref=COG:C0G@245; gene=ispF;inference=ab initio prediction:Prodigal: 002006, sim$\n",
"ID=KBOCNLJJ_00019; eC_number=5.4.99.27;Name=truD; db_xref=COG:C0G0585; gene=truD; inference=ab initio prediction:Prodigal:002006,si$\n",
"ID=KBOCNLJJ_00020; eC_number=3.1.3.5;Name=surE; db_xref=COG:C0G0496; gene=surE;inference=ab initio prediction:Prodigal: 002006, simi$\n",
"ID=KBOCNLJJ_00021; eC_number=2.1.1.77;Name=pcm; db_xref=COG:C0G2518; gene=pcm; inference=ab initio prediction:Prodigal: 002006, simil$\n",
"ID=KBOCNLJJ_00022;Name=n1pD_1; db_xref=COG:C0G@739; gene=nlpD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
"ID=KBOCNLJJ_00023;Name=rpoS; db_xref=COG:C0G0568; gene=rpoS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00024;Name=ygbN; db_xref=COG:C0G2610;gene=ygbN; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00025; eC_number=5.3.1.35;Name=otnI; db_xref=COG:C0G3622; gene=otnI;inference=ab initio prediction:Prodigal: 002006, sim$\n",
"ID=KBOCNLJJ_00026; eC_number=4.1.1.104;Name=otnC;gene=otnC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_0@0027; eC_number=2.7.1.217;Name=otnK_1;db_xref=COG:C0G3395; gene=otnK_1;inference=ab initio prediction:Prodigal:00200$\n",
"ID=KBOCNLJJ_00028; eC_number=2.7.1.217;Name=otnK_2;db_xref=COG:C0G3395; gene=otnK_2;inference=ab initio prediction:Prodigal:00200$\n",
"ID=KBOCNLJJ_00029; eC_number=1.1.1.411;Name=1tnD; gene=1tnD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00030;Name=g1lcR;db_xref=COG:C0G1349; gene=glcR;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00031; eC_number=3.1.3.16;Name=pphB; db_xref=COG:C0G@639; gene=pphB; inference=ab initio prediction:Prodigal: 002006, sim$\n",
"ID=KBOCNLJJ_00032;Name=mutS;db_xref=COG:C0G0249; gene=mutS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00033;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00033;product=hypothetical protein\n",
"ID=KBOCNLJJ_00034;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00034;product=hypothetical protein\n",
"ID=KBOCNLJJ_00035 ; Name=fh1A; db_xref=COG:C0G3604;gene=fhlA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLIJJ_00036; eC_number=4.2.1.—;Name=hypE; db_xref=COG:C0G@309; gene=hypE;inference=ab initio prediction:Prodigal: 002006, simi$\n",
"ID=KBOCNLJJ_00037 ; Name=hypD; db_xref=COG:C0G0409; gene=hypD; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00038; Name=hypC; db_xref=COG:C0G0298; gene=hypC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLIJJ_00039 ; Name=hypB; db_xref=COG:C0G0378; gene=hypB; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_@0040 ; Name=hypA; db_xref=COG:C0G0375; gene=hypA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_@0041;Name=hycA;gene=hycA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P@AEV4; 1lo$\n",
"ID=KBOCNLJJ_00042; eC_number=1.-. j;Name=hyfA_1; db_xref=COG:C0G1142; gene=hyfA_1;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00043; eC_number=7.1.1.—;Name=ndhB_1;gene=ndhB_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n",
"ID=KBOCNLJJ_00044;Name=hycD; db_xref=COG:C0G0650;gene=hycD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_@0045 ; Name=hycE; db_xref=COG:C0G3261; gene=hycE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00046; eC_number=7.1.1.—;Name=ndhI_1;gene=ndhI_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n",
"ID=KBOCNLJJ_00047 ; Name=hycG_1; db_xref=COG:C0G3260; gene=hycG_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
"ID=KBOCNLJJ_00048;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00048;product=hypothetical protein\n",
"ID=KBOCNLJJ_0@0049; eC_number=3.4.23.51;Name=hycI ;db_xref=COG:C0G0680;gene=hycI;inference=ab initio prediction:Prodigal:002006,si$\n",
"ID=KBOCNLJJ_@0050; eC_number=3.2.1.86;Name=bg1H_1;db_xref=COG:C0G2723; gene=bg1H_1;inference=ab initio prediction:Prodigal:002006$\n",
"ID=KBOCNLJJ_00051; Name=bg1F_1;db_xref=COG:C0G1263; gene=bg1F_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
"ID=KBOCNLJJ_00052;Name=ascG; db_xref=COG:C0G1609; gene=ascG; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
"ID=KBOCNLJJ_00053; eC_number=1.-.-.-—;Name=hyfA_2;db_xref=COG:C0G1142;gene=hyfA_2;inference=ab initio prediction:Prodigal:002006,$\n",
"ID=KBOCNLJJ_00054; eC_number=6.2.-—.—;Name=hypF; db_xref=COG:C0G@068; gene=hypF;inference=ab initio prediction:Prodigal: 002006, simi$\n",
"ID=KBOCNLJJ_00055; eC_number=1.18.1.-—;Name=norw; db_xref=COG:C0G1251; gene=norW; inference=ab initio prediction:Prodigal: 002006, sim$\n",
"\n",
"bad Prev Pg Wag Cut Text wie Cur Pos\n",
"WA) Next Pg wig) UnCut Text Way To Spell\n",
"\n",
"\n",
"About Library _ Statistics\n",
"\n",
"Sequencing\n",
"\n",
"Sequenced Reads: 547812856\n",
"\n",
"Duplication and Complexity (% Sequenced Reads)\n",
"\n",
"Analysis of Unique Reads (% Sequenced Reads / % Unique Reads)\n",
"\n",
"Intra-fragment Reads: 34,307,613\n",
"\n",
"Below MAPQ Threshold: 355,354,506 (64.87% / 73.27%)\n",
"\n",
"Hi-C Contacts: 95,311,375 (17.40% / 19.65%)\n",
"3' Bias (Long Range): 97% - 3%\n",
"\n",
"Pair Type % (L-I-O-R): 25% - 25% - 25% - 25%\n",
"\n",
"Analysis of Hi-C Contacts (% Sequenced Reads / % Unique Reads)\n",
"\n",
"Inter-chromosomal: 22,194,956 (4.05% / 4.58%)\n",
"Intra-chromosomal: 73,116,419 (13.35% / 15.08%)\n",
"Long Range (>20Kb): 35,425,178 (6.47% / 7.30%)\n",
"\n",
"RUN wget https://github.com/samtools/htslib/releases/download/1.18/htslib-1.18.tar.bz2 && \\\n",
"tar -xvf htslib-1.18.tar.bz2 && \\\n",
"cd htslib-1.18 && \\\n",
"./configure --enable-libcurl && \\\n",
"make -j$(nproc) && \\\n",
"make install && \\\n",
"cd .. && rm -rf htslib-1.18*\n",
"\n",
"# User addition\n",
"\n",
"RUN useradd -m -u 1001 aman && echo 'aman:123' | chpasswd\n",
"RUN usermod —aG sudo aman\n",
"\n",
"RUN usermod -aG rstudio aman\n",
"\n",
"# persistent volumes. Use flag -v\n",
"RUN mkdir -p /home/rstudio/data\n",
"\n",
"RUN chown -R aman:aman /home/rstudio\n",
"VOLUME [\"/home/rstudio/data\"]\n",
"\n",
"[1]\n",
"\n",
"(4)\n",
"\n",
"tv)\n",
"\n",
"(4)\n",
"\n",
"print(hic.getGenomeID())\n",
"print(hic.getResolutions())\n",
"\n",
"hg19\n",
"[2500000, 1000000, 500000, 250000, 100000, 50000, 25000, 10000, 5000, 1000]\n",
"\n",
"now print out the chromosomes in this file.\n",
"\n",
"for chrom in hic.getChromosomes():\n",
"print(chrom.name, chrom. length)\n",
"\n",
"All 3098789\n",
"249250621\n",
"243199373\n",
"198022430\n",
"191154276\n",
"180915260\n",
"171115067\n",
"159138663\n",
"146364022\n",
"141213431\n",
"10 135534747\n",
"11 135006516\n",
"12 133851895\n",
"13 115169878\n",
"14 107349540\n",
"15 102531392\n",
"16 90354753\n",
"17 81195210\n",
"18 78077248\n",
"19 59128983\n",
"20 63025520\n",
"21 48129895\n",
"22 51304566\n",
"X 155270560\n",
"Y 59373566\n",
"MT 16569\n",
"\n",
"COIYAHAWNE\n",
"\n",
"@ Zed File Edit Selection View Go Window Help\n",
"\n",
"ma BmeOorrtktoewwwn F-<ase\n",
"\n",
"Tue Dec 17 16:52\n",
"\n",
"@ © @ = multiqc_datajson\n",
"\n",
"report_data_sources <>\n",
"FastQc\n",
"all_sections multiqc_data.json\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"GLDS-251_rna-seq_13JUN2017H\n",
"report_general_stats_data\n",
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
"percent_gc\n",
"avg_sequence_length\n",
"median_sequence_length\n",
"total_sequences\n",
"percent_duplicates\n",
"percent_fails\n",
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
"percent_gc\n",
"avg_sequence_length\n",
"median_sequence_length\n",
"total_sequences\n",
"percent_duplicates\n",
"percent_fails\n",
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
"percent_gc\n",
"avg_sequence_length\n",
"median_sequence_length\n",
"total_sequences\n",
"\n",
"percent_duplicates\n",
"Filter... z\n",
"\n",
"st & v\n",
"\n",
"Click to restart and update Zed\n",
"\n",
"multiqc_data.json\n",
"\n",
"PELCSNL_LaLLsS 2 7.UFUFTUTUIVUIVIVIZ\n",
"\n",
"Bo\n",
"\n",
"Sign in\n",
"\n",
"+\n",
"\n",
"oO\n",
"\n",
"Q*® I\n",
"\n",
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_235_239_UMISS_Hoeksema_GTTTCG_L0@3_R1_001_1M\": {\n",
"\n",
"\"percent_gc\": 46.0,\n",
"\"avg_sequence_length\": 125.0,\n",
"\"median_sequence_length\": 125,\n",
"\"total_sequences\": 1000000.0,\n",
"\"percent_duplicates\": 23.347216247708587,\n",
"\"percent_fails\": 9.090909090909092\n",
"\n",
"Bo\n",
"\n",
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_120_UMISS_Hoeksema_TGACCA_L001_R1_001_1M\":\n",
"\n",
"\"percent_gc\": 49.0,\n",
"\"avg_sequence_length\": 125.0,\n",
"\"median_sequence_length\": 125,\n",
"\"total_sequences\": 1000000.0,\n",
"\"percent_duplicates\": 52.07411329479328,\n",
"\"percent_fails\": 18.181818181818183\n",
"\n",
"Bo\n",
"\n",
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_175_UMISS_Hoeksema_AGTTCC_L00Q2_R1_001_1M\":\n",
"\n",
"\"percent_gc\": 47.0,\n",
"\"avg_sequence_length\": 125.0,\n",
"\"median_sequence_length\": 125,\n",
"\"total_sequences\": 1000000.0,\n",
"\"percent_duplicates\": 30.77778969527732,\n",
"\"percent_fails\": 9.090909090909092\n",
"\n",
"Bo\n",
"\n",
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_179_UMISS_Hoeksema_CCGTCC_LO0Q3_R1_001_1M\":\n",
"\n",
"\"percent_gc\": 45.0,\n",
"\"avg_sequence_length\": 125.0,\n",
"\"median_sequence_length\": 125,\n",
"\n",
"Nt ata enniianene \". ANNNANAAN A\n",
"\n",
"Updated to Zed 0.163.2\n",
"View the release notes\n",
"\n",
"algal JSON\n",
"\n",
"v\n",
"\n",
"aman@Laptop-von-Aman juicer_hpro % docker build -t juicer_hicpro .\n",
"\n",
"[+] Building 2.3s (16/18)\n",
"\n",
"=> [internal] load build definition from Dockerfile\n",
"\n",
"=> transferring dockerfile: 2.07kB\n",
"\n",
"[internal] load metadata for docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04\n",
"\n",
"[auth] nvidia/cuda:pull token for registry-1.docker.io\n",
"\n",
"[internal] load .dockerignore\n",
"\n",
"=> transferring context: 2B\n",
"\n",
"CANCELED [ 1/13] FROM docker.io/nvidia/cuda:11.7.1-devel—ubuntu22.04@sha256 : 18aade8cf@2eede9d4db5d8a8a73d4505bb2322e91cd54e4c601e5ae100ed691\n",
"=> resolve docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04@sha256: 18aade8c f02eede9d4db5d8a8a73d4505bb2322e91cd54e4c601e5ae100ed691\n",
"[internal] load build context\n",
"\n",
"=> transferring context: 2B\n",
"\n",
"CACHED [ 3/13] RUN locale-gen en_US.UTF-8\n",
"\n",
"CACHED [ 4/13] RUN wget https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh -O /tmp/miniconda.sh && bash /tmp/miniconda.sh -b -p /usr/local/anaconda &&\n",
"ERROR [ 5/13] COPY environment.yml /\n",
"\n",
"CACHED [ 6/13] RUN conda env create -f /environment.yml && conda clean -a\n",
"\n",
"CACHED [ 7/13] RUN cd /opt && wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && unzip hicpro_latest.zip && cd HiC-Pro-master &&\n",
"\n",
"> CACHED [ 8/13] WORKDIR /opt\n",
"\n",
"ERROR [ 9/13] COPY install-dependencies.sh /opt/install-dependencies.sh\n",
"\n",
"> ERROR [10/13] COPY download-and-run-demo.sh /aidenlab/\n",
"\n",
"> ERROR [11/13] COPY download-demo.txt /aidenlab/\n",
"\n",
"v\n",
"\n",
"COPY install-dependencies.sh /opt/install-dependencies.sh:\n",
"\n",
"COPY download-and-run-demo.sh /aidenlab/:\n",
"\n",
"COPY download-demo.txt /aidenlab/:\n",
"\n",
"COPY install-dependencies.sh /opt/install-dependencies.sh\n",
"\n",
"COPY download-and-run-demo.sh /aidenlab/\n",
"\n",
"COPY download-demo.txt /aidenlab/\n",
"\n",
"RUN chmod +x /opt/install-dependencies.sh && /opt/install-dependencies.sh && \\\n",
"chmod +x /aidenlab/download-and-run-demo.sh && \\\n",
"\n",
"ERROR: failed to solve: failed to compute cache key: failed to calculate checksum of ref mh9tt@9a7urz4xt51386tebzw: :xdsz6f9f1g9z1t18j4ipud5@bf: \"/download-demo.txt\": not found\n",
"\n",
"View build details: docker-desktop://dashboard/build/desktop-—linux/desktop—linux/4aiwsé6vrixqnjrre@férxiuzt4\n",
"aman@Laptop-von-Aman juicer_hpro % I\n",
"\n",
"docker:desktop-—linux\n",
"\n",
"CACHED [ 2/13] RUN apt-get update && apt-get install -y build-essential wget unzip bzip2 gcc gt+ openjdk-11-jdk git curl make ca-certificates vim\n",
"\n",
"rm /tmp/minicon\n",
"\n",
"make configure pref\n",
"\n",
"Q.\n",
"-@s\n",
"-1s\n",
"-@s\n",
"-@s\n",
"-@s\n",
"«1s\n",
"-@s\n",
"-1s\n",
"-@s\n",
"-@s\n",
"-@s\n",
"-@s\n",
"-Os\n",
"-@s\n",
"-@s\n",
"-@s\n",
"-Os\n",
"-Os\n",
"-Os\n",
"\n",
"PBVVVWWWVVVVVVVGTGVONO\n",
"\n",
"Qs\n",
"\n",
"Last login: Wed Sep 18 15:46:07 on ttys@0ee\n",
"aman@Laptop-von-Aman ~ % ssh amnala@base.hpc.taltech.ee\n",
"amnala@base.hpc.taltech.ee's password:\n",
"\n",
"Last login: Wed Sep 18 16:49:35 2024 from 193.40.250.119\n",
"\n",
"Welcome to base.hpc.taltech.ee.\n",
"It has been freshly upgraded to Rocky 8!\n",
"\n",
"This is HPC Centre's main batch cluster.\n",
"If you run into any trouble, let us know in Teams 'HPC Support Chat' or write to us: hpcsupport@taltech.ee\n",
"\n",
"User guides: https://hpc.pages.taltech.ee/user-guides\n",
"\n",
"NEW MODULES:\n",
"\n",
"module load rocky8/all\n",
"\n",
"module load rocky8-spack/master\n",
"\n",
"module load openmpi/4.1.1-gcc-10.3.0-r8\n",
"\n",
"URGENT ==\n",
"\n",
". The module system has changed so your job submission scripts need to be changed\n",
"\n",
"-— amp*, green* and gray* modules have been replaced by rocky8* modules.\n",
"\n",
"-— most of the module names have changed, use module avail to see the available ones\n",
"\n",
"- Infiniband is not available currently, for MPI jobs use the openmpi/4.1.1-gcc-10.3.@-r8-tcp module\n",
"\n",
"2. We are missing some software currently, it will become available in the coming weeks\n",
"\n",
"3. The user-guide will be updated in the coming weeks and the example scripts and modules do not yet reflect the current module structure/naming\n",
"4. user-guides have been moved to https://docs.hpc.taltech.ee\n",
"\n",
"If you run into any trouble, let us know in Teams 'HPC Support Chat' or e-mail us: hpcsupport@taltech.ee\n",
"\n",
"[amnala@base ~]$ ls\n",
"\n",
"fruitsalad.txt fruitsalad_cleaned.txt history_aman.txt\n",
"\n",
"[amnala@base ~]$ cat history_aman.txt\n",
"14 cd smbgroup/bioinf-students/\n",
"\n",
"15 s -ltr\n",
"16 clear\n",
"17 s -ltr\n",
"\n",
"18 echo $HOME\n",
"\n",
"19 cp fruitsalad.txt $HOME\n",
"\n",
"20 cd $HOME\n",
"\n",
"21 s\n",
"\n",
"22 cat fruitsalad.txt\n",
"\n",
"23 uniq fruitsalad.txt\n",
"\n",
"24 cat fruitsalad.txt | sort | uniq -u\n",
"25 s\n",
"26 cat fruitsalad.txt\n",
"\n",
"27 cat fruitsalad.txt | sort | uniq -u > fruitsalad_cleaned.txt\n",
"\n",
"28 s\n",
"29 cat fruitsalad_cleaned.txt\n",
"3@ we -h\n",
"\n",
"31 we --help\n",
"\n",
"32 we -l1 fruitsalad_cleaned.txt\n",
"\n",
"33 cat fruitsalad_cleaned.txt\n",
"\n",
"34 history | less\n",
"\n",
"35 history | tail\n",
"\n",
"36 history\n",
"\n",
"37 history | tail -n +14 > history_aman.txt\n",
"[amnala@base ~]$\n",
"\n",
"\n",
"Genome vv Tracks ¥ Sample Info v Session v Share Bookmark Save Image Circular View v Help v\n",
"\n",
"IGV oxford_e...me.fasta tig00000002:1,989,819-1,993,234 Q 3,416 bp (Select Tracks ) (Crosshairs )(_Center Line )(TrackLabels) @ +)\n",
"1,990 kb j 1,991 kb j 1,992 kb j 1,993 kb\n",
"AQ 0 EA A MY TAY A AY a\n",
"|= SS SS en |\n",
"tnaB tnaA mnmE_1\n",
"\n",
"INSTITUTE\n",
"\n",
"Heng igv.org UCSan Diego fe BROAD\n",
"\n",
"\n",
"@FastQC Report\n",
"\n",
"Summary\n",
"\n",
"Qeasic Statistics\n",
"Ore base sequence quality\n",
"\n",
"Ober sequence quality scores\n",
"\n",
"Ober base sequence content\n",
"Qeer sequence GC content\n",
"Oeer base N content\n",
"\n",
"Q sequence Length Distribution\n",
"Qseauence Duplication Levels\n",
"Qoverrepresented sequences\n",
"Qadapter Content\n",
"\n",
"Qrxmmer Content\n",
"\n",
"Qbasic Statistics\n",
"\n",
"a\n",
"\n",
"Filename\n",
"\n",
"File type\n",
"\n",
"Encoding\n",
"\n",
"Total Sequences\n",
"\n",
"Sequences flagged as poor quality\n",
"Sequence length\n",
"\n",
"%GC\n",
"\n",
"wood_sample_3_forward_paired. fq.gz\n",
"Conventional base calls\n",
"\n",
"Sanger / Illumina 1.9\n",
"\n",
"185642\n",
"\n",
")\n",
"\n",
"30-150\n",
"\n",
"36\n",
"\n",
"@per base sequence quality\n",
"\n",
"Quality scores across all bases (Sanger / Illumina 1.9 encoding)\n",
"\n",
"40\n",
"\n",
"16\n",
"\n",
"14\n",
"12\n",
"10\n",
"\n",
"oN B&O\n",
"\n",
"12345 67 8 9 1519\n",
"\n",
"30-34 45-49 60-64 75-79 90-94 105-109 120-124 135-139 150\n",
"\n",
"@FastQC Report\n",
"\n",
"Summary\n",
"\n",
"Qbasic Statistics\n",
"\n",
"Ober base sequence quality\n",
"\n",
"Ober sequence quality scores\n",
"Ober base sequence content\n",
"OQer sequence GC content\n",
"Ober base N content\n",
"Osequence Length Distribution\n",
"Osequence Duplication Levels\n",
"Q overrepresented sequences\n",
"Qoaaapter Content\n",
"\n",
"Tue 8 Oct 2024\n",
"\n",
"5_merged_2_paired.fastq\n",
"\n",
"Oper base sequence content\n",
"\n",
"100\n",
"\n",
"90\n",
"\n",
"80\n",
"\n",
"70\n",
"\n",
"60\n",
"\n",
"50\n",
"\n",
"40\n",
"\n",
"30\n",
"\n",
"20\n",
"\n",
"10\n",
"\n",
"123456789\n",
"\n",
"Sequence content across all bases\n",
"\n",
"11 $13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 65\n",
"Position in read (bp)\n",
"\n",
"%T\n",
"%C\n",
"\n",
"%G\n",
"\n",
"eoo <— > OQ VD G monkeytype.com Ws Search SEARXNG-NALAKATH eave @ @ ~™@®\n",
"ay & a Ab New merch store now open, including a limited edition metal keycap! monkeytype.store x\n",
"\n",
"monkeytype\n",
"\n",
"73\n",
"96%\n",
"\n",
"cautich 76 182/3/1/0 84% 30s\n",
"\n",
"GCO@Qe2® ag Avnud9g HSBTOC BD\n",
"\n",
"english\n",
"&\n",
"S Workspaces v (mi) Monkeytype | A minimalisti + Vv\n",
"0&8 S CI CQ Reset Om 100% 11:12\n",
"\n",
"Labs/Group Leaders that interest\n",
"you (up to 5)\n",
"\n",
"Labs/Group Leaders that interest\n",
"you (up to 5)\n",
"\n",
"Labs/Group Leaders that interest\n",
"you (up to 5)\n",
"\n",
"Labs/Group Leaders that interest\n",
"you (up to 5)\n",
"\n",
"Christa Buecker\n",
"\n",
"Daniel Gerlich\n",
"\n",
"Marco Hein\n",
"\n",
"Yan Ma\n",
"\n",
"@FastQC Report\n",
"\n",
"Summary\n",
"\n",
"Qeasic Statistics\n",
"Ore base sequence quality\n",
"\n",
"Ober sequence quality scores\n",
"\n",
"Ober base sequence content\n",
"Qeer sequence GC content\n",
"Oeer base N content\n",
"\n",
"Q sequence Length Distribution\n",
"Qseauence Duplication Levels\n",
"Qoverrepresented sequences\n",
"Qadapter Content\n",
"\n",
"Okmmer Content\n",
"\n",
"Qbasic Statistics\n",
"\n",
"a\n",
"\n",
"Filename\n",
"\n",
"File type\n",
"\n",
"Encoding\n",
"\n",
"Total Sequences\n",
"\n",
"Sequences flagged as poor quality\n",
"Sequence length\n",
"\n",
"%GC\n",
"\n",
"wood_sample_5_forward_paired. fq.gz\n",
"Conventional base calls\n",
"\n",
"Sanger / Illumina 1.9\n",
"\n",
"179506\n",
"\n",
")\n",
"\n",
"30-150\n",
"\n",
"37\n",
"\n",
"@per base sequence quality\n",
"\n",
"Quality scores across all bases (Sanger / Illumina 1.9 encoding)\n",
"\n",
"40\n",
"\n",
"16\n",
"\n",
"14\n",
"12\n",
"10\n",
"\n",
"oN B&O\n",
"\n",
"12345 67 8 9 1519\n",
"\n",
"30-34 45-49 60-64 75-79 90-94 105-109 120-124 135-139 150\n",
"\n",
"In [36]:\n",
"\n",
"%%sbash\n",
"\n",
"head /mnt/storage3/aman/wdbasejuicer_new/hiccups_output/postprocessed_pixels_10000.bedpe\n",
"\n",
"#chr1— x1 x2 chr2\n",
"expectedDonut expectedH\n",
"centroid2 radius\n",
"\n",
"# juicer_tools version 2.20.00\n",
"10 6090000 6100000 10\n",
"6.738838 8.369542\n",
"\n",
"6098333 6208333 7454\n",
"\n",
"10 139920000 13993000\n",
"55,255 62.0 8.725843\n",
"455184E-15 9.31793E-40\n",
"\n",
"10 76000000 76010000\n",
"55,255 57.0 9.344456\n",
"203114E-17 2.29482E-25\n",
"\n",
"10 149390000 14940000\n",
"55,255 56.0 5.521386\n",
"702141E-16 2.387457E-16\n",
"\n",
"10 136480000 13649000\n",
"55,255 56.0 5 8624353\n",
"23398E-20 1.2297154E-24\n",
"\n",
"10 148200000 14821000\n",
"55,255 55.0 78222165\n",
"\n",
"19 8.71397@5E-12 2\n",
"\n",
"10 145390000 14540000\n",
"55,255 52.0 9.858375\n",
"\n",
"17 1.6487045E-21 2\n",
"\n",
"10 143300000 14331000\n",
"55,255 48.0 7.270913\n",
"923472E-15 8.827955E-12\n",
"\n",
"yl y2 name score strand1 strand2 color observed expectedBL\n",
"expectedV fdrBL fdrDonut fdrH fdrv numCollapsed centroid1\n",
"6200000 6210000 . . : : @,255,255 69.0 7.9115663\n",
"13.515236 1.45373255E-30 5.202941E-36 3.1267008E-30 1.2960435E-19 3\n",
"0 10 139980000 139990000 : : : . 0,2\n",
"7.795326 15.521655 4.7749968 5.0803407E-25 6.842732E-30 1.4\n",
"3 139925000 139985000 10000\n",
"\n",
"10 76080000 76090000 : : : . 0,2\n",
"8.861963 11.599155 7.0608373 2.5698042E-21 1.734446E-21 4.3\n",
"6 76006666 76076666 14337\n",
"0 10 149450000 149460000 : : : . 0,2\n",
"7.006336 10.389031 11.967166 2.12049@5E-29 6.3991415E-25 1.6\n",
"4 149390000 149450000 7071\n",
"0 10 136880000 136890000 : : : . 0,2\n",
"4.0235314 9.664011 6.9882493 2.1204905E-29 8.194439E-34 2.8\n",
"7 136483571 136879285 16659\n",
"0 10 148260000 148270000 : : : . 0,2\n",
"9.238162 9.26983 14.654494 6.932115E-24 3.9216012E-20 1.4314703E-\n",
"148205000 148260000 5000\n",
"0 10 145440000 145450000 : : : . 0,2\n",
"6.957423 8.590018 6.5711 5.5672264E-14 6.6677316E-22 1.1138844E-\n",
"145395000 145450000 5000\n",
"0 10 143360000 143370000 : : : . 0,2\n",
"55802155 8.395383 12.302593 1.2983397E-18 1.498726E-22 4.4\n",
"2 143310000 143365000 5000\n",
"\n"
]
}
],
"source": [
"screenshots = get_screenshots(\"/Users/aman/Pictures\")\n",
"texts = extract_text(screenshots)\n",
"embeddings = create_and_index(texts)\n",
"results = query_embedding(embeddings, \"hic\")\n",
"for r in results:\n",
" print(r)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a613a361",
"metadata": {},
"outputs": [],
"source": [
"# # create embeddings\n",
"# embeddings = Embeddings({\n",
"# \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n",
"# \"content\": True,\n",
"# \"graph\": True,\n",
"# \"hybrid\": True, \n",
"# \"scoring\": True\n",
"# })\n",
"\n",
"# # do indexing\n",
"# embeddings.index(txt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f94de70",
"metadata": {},
"outputs": [],
"source": [
"# embeddings search\n",
"print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n",
"print(\"-\" * 50)\n",
"\n",
"for query in [\"genome\"]:\n",
" results = embeddings.search(query, 100)\n",
" for r in results:\n",
" print(r[\"text\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10c81e27",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"OPENROUTER_API_KEY\"] = \"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\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e9519cf2",
"metadata": {},
"outputs": [],
"source": [
"from txtai import LLM"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58bce2ae",
"metadata": {},
"outputs": [],
"source": [
"OPENROUTER_API_KEY = os.getenv(\"OPENROUTER_API_KEY\")\n",
"OPENROUTER_BASE_URL = os.getenv(\"OPENROUTER_API_BASE\", \"https://openrouter.ai/api/v1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8e20bf7e",
"metadata": {},
"outputs": [],
"source": [
"messages = \"What is Hi-C and how does it work?\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41f0f066",
"metadata": {},
"outputs": [],
"source": [
"import litellm\n",
"\n",
"response = litellm.completion(\n",
" model=\"openrouter/minimax/minimax-m2.5:free\",\n",
" messages=[\n",
" {\"role\": \"user\", \"content\": \"How do population size fluctuations affect effective population size??\"}\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "879c7011",
"metadata": {},
"outputs": [],
"source": [
"# Just the answer\n",
"print(\"Answer:\", response.choices[0].message.content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2f7af13",
"metadata": {},
"outputs": [],
"source": [
"# The reasoning/thinking\n",
"print(\"Reasoning:\", response.choices[0].message.reasoning_content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e29bc4c",
"metadata": {},
"outputs": [],
"source": [
"# Token usage\n",
"print(\"Tokens used:\", response.usage.total_tokens)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c5ca3c7",
"metadata": {},
"outputs": [],
"source": [
"# do embedding search\n",
"question = \"How do population size fluctuations affect effective population size?\"\n",
"results = embeddings.search(question, 3)\n",
"context = \"\\n\\n\".join([r[\"text\"] for r in results]) # pass to llm\n",
"\n",
"# verify\n",
"print(\"Retrieved from docs\")\n",
"for r in results:\n",
" print(f\"[Score: {r['score']:.3f}] {r['text'][:150]}...\")\n",
" print()\n",
"\n",
"# send with context\n",
"response = litellm.completion(\n",
" model=\"openrouter/minimax/minimax-m2.5:free\",\n",
" messages=[\n",
" {\n",
" \"role\": \"system\",\n",
" \"content\": \"Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'.\"\n",
" },\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": f\"Context from my documents:\\n{context}\\n\\nQuestion: {question}\"\n",
" }\n",
" ]\n",
")\n",
"print(\"\\nllm ans\")\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}