3618 lines
113 KiB
Plaintext
3618 lines
113 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "e58ed372",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import platform\n",
|
||
"from pathlib import Path\n",
|
||
"import pytesseract\n",
|
||
"from PIL import Image\n",
|
||
"from txtai.embeddings import Embeddings\n",
|
||
"from txtai import LLM\n",
|
||
"import litellm\n",
|
||
"from dotenv import load_dotenv\n",
|
||
"import os"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "10a7eff9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# default paths, sys agnostic\n",
|
||
"def_paths = {\n",
|
||
" \"Darwin\": Path.home() / \"Desktop\",\n",
|
||
" \"Windows\": Path.home() / \"Pictures\" / \"Screenshots\",\n",
|
||
" \"Linux\": Path.home() / \"Pictures\",\n",
|
||
"}\n",
|
||
"\n",
|
||
"# os spec. default filenames when it comes to screen shots\n",
|
||
"sc_pathpatterns = {\n",
|
||
" \"Darwin\": [\"SCR*.png\", \"Screenshot*.png\"],\n",
|
||
" \"Windows\": [\"Screenshot*.png\"],\n",
|
||
" \"Linux\": [\"Screenshot*.png\", \"scrot*.png\", \"screenshot*.png\"],\n",
|
||
"}\n",
|
||
"\n",
|
||
"# functions\n",
|
||
"\n",
|
||
"def get_screenshots(path: str | Path | None = None) -> list[str]:\n",
|
||
" \"\"\"Find screenshot files for the current OS\"\"\"\n",
|
||
" if path is None:\n",
|
||
" path = def_paths.get(platform.system(), Path.home())\n",
|
||
" path = Path(path)\n",
|
||
" patterns = sc_pathpatterns.get(platform.system(), [\"SCR*.png\"])\n",
|
||
" results = []\n",
|
||
" for pattern in patterns:\n",
|
||
" results.extend(str(f.absolute()) for f in path.glob(pattern))\n",
|
||
" return sorted(set(results))\n",
|
||
"\n",
|
||
"def extract_text(images: list[str]) -> list[str]:\n",
|
||
" \"\"\"OCR a list of image paths into text\"\"\"\n",
|
||
" return [pytesseract.image_to_string(Image.open(img)) for img in images]\n",
|
||
"\n",
|
||
"def create_and_index(data: list[str], model=\"sentence-transformers/all-MiniLM-L6-v2\") -> Embeddings:\n",
|
||
" \"\"\"Create and index embeddings from text\"\"\"\n",
|
||
" embeddings = Embeddings({\n",
|
||
" \"path\": model,\n",
|
||
" \"content\": True,\n",
|
||
" # \"graph\": True,\n",
|
||
" \"hybrid\": True,\n",
|
||
" \"scoring\": \"bm25\",\n",
|
||
" })\n",
|
||
" embeddings.index(data)\n",
|
||
" return embeddings\n",
|
||
"\n",
|
||
"def query_embedding(embeddings: Embeddings, query: str, limit: int = 100) -> list[str]:\n",
|
||
" \"\"\"Search embeddings and return matching texts\"\"\"\n",
|
||
" results = embeddings.search(query, limit)\n",
|
||
" return [r[\"text\"] for r in results]\n",
|
||
"\n",
|
||
"def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]:\n",
|
||
" \"\"\"Search embeddings and return results with scores\"\"\"\n",
|
||
" return embeddings.search(query, limit)\n",
|
||
"\n",
|
||
"def ask_wllm(embeddings: Embeddings, question: str, model: str = \"openrouter/minimax/minimax-m2.5:free\", limit: int = 3) -> str:\n",
|
||
" \"\"\"RAG: retrieve context from embeddings, then answer with an LLM.\"\"\"\n",
|
||
" results = retrieve(embeddings, question, limit)\n",
|
||
" context = \"\\n\\n\".join([r[\"text\"] for r in results])\n",
|
||
"\n",
|
||
" response = litellm.completion(\n",
|
||
" model=model,\n",
|
||
" messages=[\n",
|
||
" {\n",
|
||
" \"role\": \"system\",\n",
|
||
" \"content\": \"Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'.\"\n",
|
||
" },\n",
|
||
" {\n",
|
||
" \"role\": \"user\",\n",
|
||
" \"content\": f\"Context from my documents:\\n{context}\\n\\nQuestion: {question}\"\n",
|
||
" }\n",
|
||
" ]\n",
|
||
" )\n",
|
||
"\n",
|
||
" return response.choices[0].message.content"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "e73d6386",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "41d3c1089d7346229f5c9ff7b31068a1",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Loading weights: 0%| | 0/103 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"\u001b[1mBertModel LOAD REPORT\u001b[0m from: sentence-transformers/all-MiniLM-L6-v2\n",
|
||
"Key | Status | | \n",
|
||
"------------------------+------------+--+-\n",
|
||
"embeddings.position_ids | UNEXPECTED | | \n",
|
||
"\n",
|
||
"\u001b[3mNotes:\n",
|
||
"- UNEXPECTED\u001b[3m\t:can be ignored when loading from different task/architecture; not ok if you expect identical arch.\u001b[0m\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"SPRINGER NATURE Link\n",
|
||
"\n",
|
||
"Find ajournal Publishwithus Track your research Q Search\n",
|
||
"\n",
|
||
"Home > Genome Biology > Article\n",
|
||
"\n",
|
||
"HiC-Pro: an optimized and flexible pipeline\n",
|
||
"for Hi-C data processing\n",
|
||
"\n",
|
||
"Software | Openaccess | Published: 01 December 2015\n",
|
||
"Volume 16, articlenumber 259, (2015) Cite this article\n",
|
||
"\n",
|
||
"Download PDF @ You have full access to this open access article\n",
|
||
"\n",
|
||
"Nicolas Servant 4, Nelle Varoquaux, Bryan R. Lajoie, Eric Viara, Chong-Jian Chen, Jean-Philippe Vert,\n",
|
||
"Edith Heard, Job Dekker & Emmanuel Barillot\n",
|
||
"\n",
|
||
"S) 65k Accesses f) 1404 Citations & 19 Altmetric & 3 Mentions Exploreall metrics >\n",
|
||
"\n",
|
||
"Abstract\n",
|
||
"\n",
|
||
"\n",
|
||
"o-Yof = AinTiating: H1iv—-rro-master/scripts/srce/cutsite_trimming.cpp\n",
|
||
"5.037 creating: HiC-Pro-master/test-op/\n",
|
||
"\n",
|
||
"5.038 inflating: HiC-Pro-master/test-op/config_test_as.txt\n",
|
||
"\n",
|
||
"5.038 inflating: HiC-Pro-master/test-op/config_test_cap.txt\n",
|
||
"\n",
|
||
"5.038 inflating: HiC-Pro-master/test-op/config_test_dnase.txt\n",
|
||
"\n",
|
||
"5.038 inflating: HiC-Pro-master/test-op/config_test_latest.txt\n",
|
||
"5.038 inflating: HiC-Pro-master/test-op/run-test-op.sh\n",
|
||
"\n",
|
||
"5.038 finishing deferred symbolic links:\n",
|
||
"\n",
|
||
"5.038 HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
|
||
"5.095 Make sure internet connection works for your shell prompt under current user's privilege ...\n",
|
||
"5.096 Starting HiC-Pro installation !\n",
|
||
"\n",
|
||
"5.122 Exit - Error : Configuration file not found\n",
|
||
"\n",
|
||
"41 # Install HiC-Pro\n",
|
||
"\n",
|
||
"42 | >>> RUN cd /opt && \\\n",
|
||
"\n",
|
||
"43 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
|
||
"\n",
|
||
"44 | >>> unzip hicpro_latest.zip && \\\n",
|
||
"\n",
|
||
"45 | >>> cd HiC-Pro-master/scripts/install && \\\n",
|
||
"\n",
|
||
"46 | >>> bash install_dependencies.sh -c config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.@ -q && \\\n",
|
||
"47 | >>> cd /opt/HiC-Pro-master && \\\n",
|
||
"\n",
|
||
"48 | >>> make install && \\\n",
|
||
"\n",
|
||
"49 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
|
||
"\n",
|
||
"5@ | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"\n",
|
||
"corrected Hi-C counts\n",
|
||
"\n",
|
||
"10!\n",
|
||
"\n",
|
||
"10°\n",
|
||
"\n",
|
||
"107?\n",
|
||
"\n",
|
||
"10°\n",
|
||
"genomic distance\n",
|
||
"\n",
|
||
"—— data_mcool.h5\n",
|
||
"\n",
|
||
"> Decay curve\n",
|
||
"\n",
|
||
"> First converted into .h5\n",
|
||
"format\n",
|
||
"\n",
|
||
"> HiCExplorer—-\n",
|
||
"\n",
|
||
"hicPlotDistVsCounts()\n",
|
||
"> Data quality and\n",
|
||
"\n",
|
||
"comparison\n",
|
||
"\n",
|
||
"4.524 HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
|
||
"\n",
|
||
"4.575 Make sure internet connection works for your shell prompt under current user's privilege ...\n",
|
||
"\n",
|
||
"4.575 Starting HiC-Pro installation !\n",
|
||
"\n",
|
||
"4.976 Checking dependencies\n",
|
||
"\n",
|
||
"4.976 - Python libraries ...0K\n",
|
||
"\n",
|
||
"6.765 — R installation ...0K\n",
|
||
"\n",
|
||
"9.515 - Bowtie2 installation ...0K\n",
|
||
"\n",
|
||
"9.531 - Samtools installation ...0K\n",
|
||
"\n",
|
||
"9.590\n",
|
||
"\n",
|
||
"9.598 Checking HiC-Pro configuration\n",
|
||
"\n",
|
||
"9.758 - Configuration for TORQUE/PBS system ...0K\n",
|
||
"\n",
|
||
"9.758\n",
|
||
"\n",
|
||
"9.758 done !\n",
|
||
"\n",
|
||
"9.844 (g++ -Wall -02 -std=c++@x -o build_matrix /opt/HiC-Pro-master/scripts/src/build_matrix.cpp; mv build_matrix /opt/HiC-Pro-master/scripts)\n",
|
||
"16.47 (g++ -Wall -02 -std=c++@x -o cutsite_trimming /opt/HiC-Pro-master/scripts/src/cutsite_trimming.cpp; mv cutsite_trimming /opt/HiC-Pro-master/scripts)\n",
|
||
"19.24 realpath: /opt/hicpro/HiC-Pro_3.1.@: No such file or directory\n",
|
||
"\n",
|
||
"19.25 cp -Ri /opt/HiC-Pro-master /opt/hicpro/HiC-Pro_3.1.0\n",
|
||
"\n",
|
||
"19.26 cp: cannot create directory '/opt/hicpro/HiC-Pro_3.1.@': No such file or directory\n",
|
||
"\n",
|
||
"19.27 make: *** [Makefile:78: cp] Error 1\n",
|
||
"\n",
|
||
"Dockerfile:42\n",
|
||
"\n",
|
||
"# Install HiC-Pro\n",
|
||
"\n",
|
||
"41 |\n",
|
||
"42 | >>> RUN cd /opt && \\\n",
|
||
"43 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
|
||
"44 | >>> unzip hicpro_latest.zip && \\\n",
|
||
"45 | >>> cd HiC-Pro-master && \\\n",
|
||
"46 | >>> bash scripts/install/install_dependencies.sh -c config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.0 -q && \\\n",
|
||
"47 | >>> make install && \\\n",
|
||
"48 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
|
||
"49 | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
|
||
"|\n",
|
||
"\n",
|
||
"5@\n",
|
||
"\n",
|
||
"\n",
|
||
"Visualization: HiGlass\n",
|
||
"\n",
|
||
"HICCUPs juicer_tools:\n",
|
||
"\n",
|
||
"bedpe file\n",
|
||
"\n",
|
||
"¥\n",
|
||
"\n",
|
||
"Enrichmnet: Juicer\n",
|
||
"APA,\n",
|
||
"TADs: Arrowhead\n",
|
||
"\n",
|
||
"Juicer\n",
|
||
"\n",
|
||
"v\n",
|
||
"\n",
|
||
"Visualization: JuiceBox\n",
|
||
"Analysis: HiC Straw\n",
|
||
"\n",
|
||
"Trimmomatic, FostQC\n",
|
||
"\n",
|
||
"HIC-Pro\n",
|
||
"(Current)\n",
|
||
"\n",
|
||
"validpairs file\n",
|
||
"\n",
|
||
"¥\n",
|
||
"\n",
|
||
"Analysis: Cooler\n",
|
||
"library python\n",
|
||
"\n",
|
||
">\n",
|
||
"\n",
|
||
"FitHiC2 loop caller\n",
|
||
"\n",
|
||
"Enrichment:\n",
|
||
"coolpup.py\n",
|
||
"\n",
|
||
"HiC - Pro Juicer\n",
|
||
"\n",
|
||
"Parailel Computing Hi-C Fragment\n",
|
||
"A Sequenced Alignment and Duplicate Map creation\n",
|
||
"Hi-C Reads Chimera Handling Merge Sort removal\n",
|
||
"a on\n",
|
||
"==\" a —— RI R2\n",
|
||
"Sequencing © ———— SSS EES ESS\n",
|
||
"Ey SSS SSS . > .\n",
|
||
"\n",
|
||
"ae a ee : -.\n",
|
||
"\n",
|
||
"\n",
|
||
"Visualization: HiGlass,\n",
|
||
"JuicaBox\n",
|
||
"\n",
|
||
"HICCUPS juicer_tools:\n",
|
||
"\n",
|
||
"-bedpe file\n",
|
||
"\n",
|
||
"Enrichmnet Juicer\n",
|
||
"\n",
|
||
"APA,\n",
|
||
"TADS: Arrowhead\n",
|
||
"\n",
|
||
"Juicer\n",
|
||
"\n",
|
||
"‘Timmomatic, FastQC\n",
|
||
"\n",
|
||
"Hic-Pro,\n",
|
||
"\n",
|
||
"tbedpe ~—————>_GenomicLinks\n",
|
||
"\n",
|
||
"Visualization: JuiceBox\n",
|
||
"Anolysis: Hic Straw\n",
|
||
"\n",
|
||
"Juicer\n",
|
||
"dump\n",
|
||
"\n",
|
||
"Hic-Pro -\n",
|
||
"build_matrix\n",
|
||
"\n",
|
||
"Individual Matrices <——\n",
|
||
"\n",
|
||
"Analysis: Cooler\n",
|
||
"liorary python,\n",
|
||
"\n",
|
||
"> FitHiC2 loop caller\n",
|
||
"\n",
|
||
"Enrichment:\n",
|
||
"‘coolpup.ey\n",
|
||
"\n",
|
||
"Visualization: HiGloss\n",
|
||
"\n",
|
||
"Visualization: HiGlass\n",
|
||
"\n",
|
||
"HICCUPS juicer_tools:\n",
|
||
"\n",
|
||
"bbedpe file\n",
|
||
"\n",
|
||
"Enrichmnet: Juicer\n",
|
||
"APA,\n",
|
||
"‘TADs: Arrowhead\n",
|
||
"\n",
|
||
"Juicer\n",
|
||
"\n",
|
||
"Visualization: JuiceBox\n",
|
||
"Analysis: HIC Straw\n",
|
||
"\n",
|
||
"Tiimmomatic, FastQc\n",
|
||
"\n",
|
||
"Hic-Pro\n",
|
||
"(Current)\n",
|
||
"\n",
|
||
"validpairs file\n",
|
||
"\n",
|
||
"Analysis: Cooler\n",
|
||
"library python\n",
|
||
"\n",
|
||
">\n",
|
||
"\n",
|
||
"FithiC2 loop caller\n",
|
||
"\n",
|
||
"Enrichment:\n",
|
||
"coolpup.py\n",
|
||
"\n",
|
||
"corrected Hi-C counts\n",
|
||
"\n",
|
||
"10!\n",
|
||
"\n",
|
||
"10°\n",
|
||
"\n",
|
||
"107}\n",
|
||
"\n",
|
||
"104\n",
|
||
"\n",
|
||
"10°\n",
|
||
"genomic distance\n",
|
||
"\n",
|
||
"10®\n",
|
||
"\n",
|
||
"—— data_mcool.h5\n",
|
||
"\n",
|
||
"\n",
|
||
"Aman\n",
|
||
"_——\n",
|
||
"\n",
|
||
"Parallel Computing Hi-C Fragment\n",
|
||
"\n",
|
||
"——\n",
|
||
"\n",
|
||
"—_—_—\n",
|
||
"—\n",
|
||
"—4 Sequencing ——\n",
|
||
"\n",
|
||
"Singleton\n",
|
||
"Low MAPQ\n",
|
||
"\n",
|
||
"Dumped Pairs\n",
|
||
"\n",
|
||
"\n",
|
||
"706883\n",
|
||
"706884\n",
|
||
"706886\n",
|
||
"706885\n",
|
||
"706887\n",
|
||
"706888\n",
|
||
"706890\n",
|
||
"706891\n",
|
||
"706892\n",
|
||
"706889\n",
|
||
"706875\n",
|
||
"706873\n",
|
||
"706876\n",
|
||
"706874\n",
|
||
"1\n",
|
||
"\n",
|
||
"1321\n",
|
||
"\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"root\n",
|
||
"\n",
|
||
"messagebu\n",
|
||
"\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"20\n",
|
||
"\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"2866M\n",
|
||
"23440\n",
|
||
"23448\n",
|
||
"19992\n",
|
||
"19992\n",
|
||
"\n",
|
||
"164M\n",
|
||
"\n",
|
||
"9456\n",
|
||
"\n",
|
||
"2391M\n",
|
||
"2393M\n",
|
||
"2391M\n",
|
||
"2393M\n",
|
||
"2393M\n",
|
||
"2391M\n",
|
||
"2391M\n",
|
||
"2393M\n",
|
||
"2391M\n",
|
||
"2393M\n",
|
||
"7344\n",
|
||
"7344\n",
|
||
"2252\n",
|
||
"2136\n",
|
||
"11788\n",
|
||
"3364\n",
|
||
"\n",
|
||
"2412\n",
|
||
"2324\n",
|
||
"2412\n",
|
||
"2324\n",
|
||
"2324\n",
|
||
"2412\n",
|
||
"2412\n",
|
||
"2324\n",
|
||
"2412\n",
|
||
"2324\n",
|
||
"2372\n",
|
||
"2372\n",
|
||
"1720\n",
|
||
"1604\n",
|
||
"6216\n",
|
||
"1908\n",
|
||
"\n",
|
||
"NANNNNHNDDDDDDDDANN\n",
|
||
"\n",
|
||
"400.\n",
|
||
"400.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"10@.\n",
|
||
"\n",
|
||
"DOAAD\n",
|
||
"\n",
|
||
"PLCTCTDPSORPRRPRRPRREBRBR\n",
|
||
"POSCTCT®VVDDVD0D0090 0\n",
|
||
"\n",
|
||
"VPVTVTAAAD\n",
|
||
"\n",
|
||
"45h22:\n",
|
||
"45h22:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"11h20:\n",
|
||
"Q4.\n",
|
||
"13.\n",
|
||
"40.\n",
|
||
"44.\n",
|
||
":1@.\n",
|
||
"18.\n",
|
||
"\n",
|
||
"51:\n",
|
||
"50:\n",
|
||
"39:\n",
|
||
"39:\n",
|
||
"\n",
|
||
"10:\n",
|
||
"\n",
|
||
"46\n",
|
||
"32\n",
|
||
"32\n",
|
||
"32\n",
|
||
"32\n",
|
||
"32\n",
|
||
"31\n",
|
||
"31\n",
|
||
"32\n",
|
||
"81\n",
|
||
"12\n",
|
||
"59\n",
|
||
"96\n",
|
||
"44\n",
|
||
"96\n",
|
||
"\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.\n",
|
||
"perl /usr/local/anaconda/envs/HiC-Pro_\n",
|
||
"\n",
|
||
"VPV®VVVVVVOVO\n",
|
||
"\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"-0/\n",
|
||
"\n",
|
||
"v3.\n",
|
||
"\n",
|
||
"perl /usr/local/anaconda/envs/HiC-Pro_v3.\n",
|
||
"/anaconda/envs/HiC-Pro_v3.0.0/\n",
|
||
"/anaconda/envs/HiC-Pro_v3.0.0/\n",
|
||
"/lib/systemd/systemd --system --deserialize 33\n",
|
||
"@dbus—daemon --system —-address=systemd:\n",
|
||
"\n",
|
||
"/usr/\n",
|
||
"/usr/\n",
|
||
"\n",
|
||
"oca\n",
|
||
"oca\n",
|
||
"\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"bin/bowtie2-align-s --wrapper\n",
|
||
"\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"basic-®\n",
|
||
"\n",
|
||
"--very-sensitive\n",
|
||
"—-very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"--very-sensitive\n",
|
||
"—-very-sensitive\n",
|
||
"—-very-sensitive\n",
|
||
"\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"30\n",
|
||
"\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"--score-min\n",
|
||
"—-score-min\n",
|
||
"\n",
|
||
"DAARBAAADH\n",
|
||
"NNNNNNNNN\n",
|
||
"\n",
|
||
"Pere rere\n",
|
||
"SoooKoKOOOO\n",
|
||
"\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"—-end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"--end-to-end\n",
|
||
"\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"—-reo\n",
|
||
"\n",
|
||
"@.0/bin/bowtie2 --very-sensitive -L 3@ --score-min L,-@.6,-@.2 --end-to- end --reorder --un bowtie_resu\n",
|
||
"@.0/bin/bowtie2 --very-sensitive -L 3@ --score-min L,-@.6,-@.2 --end-to-end --reorder --un bowtie_resu\n",
|
||
"\n",
|
||
"bin/samtools view -F 4 -bS —\n",
|
||
"bin/samtools view -F 4 -bS —\n",
|
||
"\n",
|
||
"--nofork --nopidfile --systemd-activation --syslog-only\n",
|
||
"\n",
|
||
"Fig x: Visualization in Juicebox for two HiC datasets\n",
|
||
"\n",
|
||
"The 10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files\n",
|
||
"locally. The left panel shows the matrix from the cis-regulatory elements in Maize study and the one\n",
|
||
"on the right is from (7). Even though the raw hic sequencing data was trimmed correctly the second\n",
|
||
"dataset showed poor quality as is evident from the figure. The noise was high and HiCCUPs couldn't\n",
|
||
"find loops correctly.\n",
|
||
"\n",
|
||
"-hic & .cooV.mcool:; Binary formats for Hi-C data\n",
|
||
"> Compressed contact matrices at multiple resolutions\n",
|
||
"Genomic intervals for binned data\n",
|
||
"\n",
|
||
">\n",
|
||
"> Interaction frequencies between loci\n",
|
||
"> Supports multiple bin sizes & corrections in one file\n",
|
||
"\n",
|
||
"\n",
|
||
"GQAAAGP RPP PPP PPP PRP PPP PPP PP RS\n",
|
||
"\n",
|
||
"«/16\n",
|
||
"-717\n",
|
||
"-717\n",
|
||
"-717\n",
|
||
"-718\n",
|
||
"-718\n",
|
||
"-718\n",
|
||
"-718\n",
|
||
"-718\n",
|
||
"-719\n",
|
||
"-719\n",
|
||
"-719\n",
|
||
"-719\n",
|
||
"-720\n",
|
||
"-720\n",
|
||
"-720\n",
|
||
"-720\n",
|
||
"+721\n",
|
||
"- 786\n",
|
||
"-814\n",
|
||
"+917\n",
|
||
"-969\n",
|
||
"-969\n",
|
||
"-340\n",
|
||
"-341\n",
|
||
"-342\n",
|
||
"-343\n",
|
||
"-346\n",
|
||
"\n",
|
||
"inflating: hiC-Pro-master/scripts/onlarget.py\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/plot_hic_contacts.R\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/plot_hic_fragment.R\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/plot_mapping_portion.R\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/plot_pairing_portion.R\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/split_valid_interactions.py\n",
|
||
"\n",
|
||
"creating: HiC-Pro-master/scripts/src/\n",
|
||
"\n",
|
||
"extracting: HiC-Pro-master/scripts/src/README\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/src/build_matrix.cpp\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/scripts/src/cutsite_trimming.cpp\n",
|
||
"\n",
|
||
"creating: HiC-Pro-master/test-op/\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/test-op/config_test_as.txt\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/test-op/config_test_cap.txt\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/test-op/config_test_dnase.txt\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/test-op/config_test_latest.txt\n",
|
||
"\n",
|
||
"inflating: HiC-Pro-master/test-op/run-test-op.sh\n",
|
||
"finishing deferred symbolic links:\n",
|
||
"\n",
|
||
"HiC-Pro-master/doc/themes/paris/logos -> ../../_static/logos/\n",
|
||
"make -f ./scripts/install/Makefile CONFIG_SYS=./config-install.txt prefix=/opt/hicpro\n",
|
||
"make[1]: Entering directory '/opt/HiC-Pro-master'\n",
|
||
"./scripts/install/install_dependencies.sh -c ./config-install.txt -p /opt/hicpro -o /opt/hicpro/HiC-Pro_3.1.@ -q\n",
|
||
"Make sure internet connection works for your shell prompt under current user's privilege ...\n",
|
||
"Starting HiC-Pro installation !\n",
|
||
"Directory /opt/hicpro does not exist!\n",
|
||
"Exit - Error - unable to install/check dependancies !\n",
|
||
"make[1]: **x* [scripts/install/Makefile:41: configure] Error 1\n",
|
||
"make[1]: Leaving directory '/opt/HiC-Pro-master'\n",
|
||
"make: **x* [Makefile:38: configure] Error 2\n",
|
||
"\n",
|
||
"40 | # Install HiC-Pro\n",
|
||
"\n",
|
||
"41 | >>> RUN cd /opt && \\\n",
|
||
"\n",
|
||
"42 | >>> wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && \\\n",
|
||
"\n",
|
||
"43 | >>> unzip hicpro_latest.zip && \\\n",
|
||
"\n",
|
||
"44 | >>> cd HiC-Pro-master && \\\n",
|
||
"\n",
|
||
"45 | >>> make configure prefix=/opt/hicpro && \\\n",
|
||
"\n",
|
||
"46 | >>> make install && \\\n",
|
||
"\n",
|
||
"47 | >>> 1n -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && \\\n",
|
||
"\n",
|
||
"48 | >>> rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\n",
|
||
"\n",
|
||
"49 |\n",
|
||
"ERROR: failed to solve: process \"/bin/sh -c cd /opt && wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && unzip hicpro_latest.zip && cd HiC-Pro-master && make\n",
|
||
"configure prefix=/opt/hicpro && make install && ln -s /opt/hicpro/bin/HiC-Pro /usr/local/bin/HiC-Pro && rm -rf /opt/hicpro_latest.zip /opt/HiC-Pro-master\" did not complete successfully: exit code:\n",
|
||
"\n",
|
||
"2\n",
|
||
"\n",
|
||
"A Sequenced\n",
|
||
"Hi-C Reads\n",
|
||
"\n",
|
||
"Alignment and\n",
|
||
"Chimera Handling Merge Sort\n",
|
||
"\n",
|
||
"SS Sass = SS\n",
|
||
"—_ oo i\n",
|
||
"—\n",
|
||
"SSS oo\n",
|
||
"a\n",
|
||
"\n",
|
||
"oe OT\n",
|
||
"\n",
|
||
"Duplicate\n",
|
||
"removal\n",
|
||
"\n",
|
||
"Map creation\n",
|
||
"\n",
|
||
"i\n",
|
||
"—————\n",
|
||
"\n",
|
||
"\n",
|
||
"-hic & .cool/.mcool: Binary formats for Hi-C data\n",
|
||
"Compressed contact matrices at multiple resolutions\n",
|
||
"\n",
|
||
"Genomic intervals for binned data\n",
|
||
"Interaction frequencies between loci\n",
|
||
"Supports multiple bin sizes & corrections in one file\n",
|
||
"\n",
|
||
"\n",
|
||
"Overall Interpretation\n",
|
||
"\n",
|
||
"e The data show a good proportion of valid Hi-C contacts (17.40%), but a large number of reads\n",
|
||
"(64.87%) are excluded due to low quality (MAPQ). This could be due to sequence complexity,\n",
|
||
"\n",
|
||
"genome alignment issues, or technical problems during sequencing.\n",
|
||
"\n",
|
||
"e The balance in pair types and dominance of intra-chromosomal contacts indicate proper\n",
|
||
"\n",
|
||
"library preparation and plausible results for downstream analysis.\n",
|
||
"\n",
|
||
"e Long-range contacts provide meaningful insights into chromatin organization and can be\n",
|
||
"\n",
|
||
"used for modeling chromosomal structure.\n",
|
||
"\n",
|
||
"workflow_aman\n",
|
||
"\n",
|
||
"a i\n",
|
||
"a\n",
|
||
"ToDo\n",
|
||
"\n",
|
||
"hic hic2cool cool\n",
|
||
"\n",
|
||
"plot\n",
|
||
"\n",
|
||
"matrix (exported from\n",
|
||
"\n",
|
||
"juicerbon) Python script Plot\n",
|
||
"\n",
|
||
"\n",
|
||
"QW 6B github.com/kuikui-C/DconnLoop W © Search Startpage\n",
|
||
"\n",
|
||
"(1) README o\n",
|
||
"\n",
|
||
"pip install matplotlib\n",
|
||
"conda install hicexplorer\n",
|
||
"conda activate DconnLoop\n",
|
||
"\n",
|
||
"Usage\n",
|
||
"\n",
|
||
"The input data used can be downloaded in the supplementary materials of the paper. The input contact maps use\n",
|
||
"the cool file format, which, if needed, can be converted and normalized using the HiCExplorer's hicConvertFormat\n",
|
||
"command.\n",
|
||
"\n",
|
||
"HiC to cool\n",
|
||
"\n",
|
||
"hicConvertFormat -m ./ENCFFQ97SKJ.hic --inputFormat hic --outputFormat cool -o ./ENCFF@97SKJ.c oO\n",
|
||
"hicConvertFormat -m ./ENCFFQ97SKJ_10000.cool --inputFormat cool —-outputFormat cool -o ./ENCFF\n",
|
||
"\n",
|
||
"Generate positive and negative samples\n",
|
||
"\n",
|
||
"python PosNeg_Samp_Gen.py -p ./input/gm12878/Ra02014—GM12878-MboI-allreps—filtered.1@kb.cool — oO\n",
|
||
"\n",
|
||
"Training\n",
|
||
"\n",
|
||
"python leave_one_train.py -d ./PosNeg_samp/ -g 1,2,3 —b 256 -lr @.001 -e 3@ -w 0.0005 -c ./mod oO\n",
|
||
"\n",
|
||
"Testing\n",
|
||
"\n",
|
||
"python leave_one_test.py -d ./PosNeg_samp/ -g 1,2,3 -c ./model/ -f ./model/chri5-record_test. oO\n",
|
||
"\n",
|
||
"Score\n",
|
||
"\n",
|
||
"python score_chromosome.py -p ./input/gm12878/Ra02014—GM12878-MboI-allreps—filtered.1@kb.cool oO\n",
|
||
"\n",
|
||
"Cluster\n",
|
||
"\n",
|
||
"python cluster.py -d 5 -i ./scores/chr15.bed -r 10000 -m 0.97 -p 75 -e 10 -o ./cluster/chr15-L oO\n",
|
||
"\n",
|
||
"@ Terminal Shell Edit View Window Help SU GB O+ 8 © & WD ® F Q B® SatFeb15 12:24\n",
|
||
"\n",
|
||
"ee@ aman — aman@unicorn: ~/fihic_bias — ssh -L 9005:localhost:9005 aman@10.162.143.69 — 208x63\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 1.5 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 500000 -f @.15 -p 1 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 100000 -f @.2 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 1000000 -f @.2 -p 2 -i 12 -d 250000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.0@.jar hiccups --cpu —-threads 16 -r 25000 -f @.2 -p 2 -i 12 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@0kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu —-threads 16 -r 50000 -f @.2 -p 2 -i 12 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@0kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 25000 -f @.25 -p 2 -i 14 -d 25000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"-20.00.jar hiccups --cpu --threads 16 -r 50000 -f @.25 -p 2 -i 14 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_5@@kb/\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too -20.00.jar hiccups --cpu --threads 16 -r 5000,10000,25000 -f 0.30 -p 1.5 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too -20.00.jar hiccups --cpu --threads 16 -r 5000, 25000 -f @.3@ -p 1 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 25000 -f @.3@ -p 1 -i 10 -d 50000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_results/\n",
|
||
"\n",
|
||
"nano /home/aman/hiccups_results/enriched_pixels_25000.bedpe\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000,20000,50000 /mnt/storage3/aman/hicpro2jui\n",
|
||
"cebox/data.allValidPairs.hic ~/hiccups_optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000, 25000,50000 /mnt/storage3/aman/hicpro2jui\n",
|
||
"cebox/data.allValidPairs.hic ~/hiccups_optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -m 512 -c all -r 5000,10000 -k KR -f .1,.1 -p 4,2 -i 7,5 -t @.02,1.5,1.75,2 -d 20000,50000 /mnt/storage3/aman/hicpro2juicebox/\n",
|
||
"data.allValidPairs.hic ~/hiccups_optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -k KR -f .1 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hi\n",
|
||
"ccups_optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f .1 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_\n",
|
||
"optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f 2 -p 4 -i 7 -t @.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccups_o\n",
|
||
"ptimized_results/\n",
|
||
"\n",
|
||
"cd ~/hiccups_optimized_results/\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 5000,10000 -f @.2 -p 4 -i 7 -t 0.02,1.5,1.75,2 -d 20000 /mnt/storage3/aman/hicpro2juicebox/data.allValidPairs.hic ~/hiccup\n",
|
||
"_optimized_results/\n",
|
||
"\n",
|
||
"~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups -h\n",
|
||
"\n",
|
||
"~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups\n",
|
||
"\n",
|
||
"cat ~/.bash_history | hiccups\n",
|
||
"\n",
|
||
"cat ~/.bash_history | grep hiccups\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_1@kb/\n",
|
||
"\n",
|
||
"mkdir hiccups2_1@kb\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"1s -lh ~/hiccups2_1@kb/\n",
|
||
"\n",
|
||
"we -l ~/hiccups2_10kb/fdr_thresholds_10000\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"1s ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"we -l1 ~/hiccups2_10kb/*\n",
|
||
"\n",
|
||
"cat ~/hiccups2_10kb/fdr_thresholds_5000\n",
|
||
"\n",
|
||
"1s -ltrh ~/hiccups2_10kb/\n",
|
||
"\n",
|
||
"(base) aman@unicorn:~/fihic_bias$ java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 1000@ -i /mnt/storage3/aman/data.allValidPairs.hic ~/hiccupsfinal_10kb/\n",
|
||
"\n",
|
||
"WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n",
|
||
"\n",
|
||
"WARN [2025-@2-15T11:23:46,503] [Globals.java:138] [main] Development mode is enabled\n",
|
||
"\n",
|
||
"Usage: juicer_tools hiccups [-m matrixSize] [-k normalization (NONE/VC/VC_SQRT/KR)] [-c chromosome(s)] [-r resolution(s)] [--restrict] [-f fdr] [-p peak width] [-i window] [-t thresholds] [-d centroid dista\n",
|
||
"neces] <hicFile> <outputDirectory> [specified_loop_list]\n",
|
||
"\n",
|
||
"(base) aman@unicorn:~/fihic_bias$ java -jar ~/juicer/CPU/common/juicer_tools.2.20.0@.jar hiccups --cpu --threads 16 -r 10000 /mnt/storage3/aman/data.allValidPairs.hic ~/hiccupsfinal_10kb/\n",
|
||
"\n",
|
||
"WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n",
|
||
"\n",
|
||
"WARN [2025-@2-15T11:24:14,443] [Globals.java:138] [main] Development mode is enabled\n",
|
||
"\n",
|
||
"Reading file: /mnt/storage3/aman/data.allValidPairs.hic\n",
|
||
"\n",
|
||
"Using the following configurations for HiCCUPS:\n",
|
||
"\n",
|
||
"Config res: 10000 peak: 2 window: 5 fdr: 10% radius: 20000\n",
|
||
"\n",
|
||
"WARNING - You are using the CPU version of HiCCUPS.\n",
|
||
"\n",
|
||
"The GPU version of HiCCUPS is the official version and has been tested extensively.\n",
|
||
"\n",
|
||
"The CPU version only searches for loops within 8MB (by default) of the diagonal and is still experimental.\n",
|
||
"\n",
|
||
"Using 16 CPU thread(s) for primary task\n",
|
||
"\n",
|
||
"Warning Hi-C map may be too sparse to find many loops via HiCCUPS.\n",
|
||
"\n",
|
||
"Running HiCCUPS for resolution 10000\n",
|
||
"\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"java -jar ~/juicer/CPU/common/juicer_too\n",
|
||
"\n",
|
||
"DHOHDHHHHHHHD\n",
|
||
"NNNNNNNNNNN\n",
|
||
"\n",
|
||
"\n",
|
||
"Upregulated Downregulated\n",
|
||
"\n",
|
||
"H3K27me’ signal(cp) at upregulated genes HoKzrmed slgnalcotrt at uprepuaes genes nes\n",
|
||
"3x27 me3 signal HaK2763 signal\n",
|
||
"\n",
|
||
"‘ene astce (9) ‘ge dace\n",
|
||
"\n",
|
||
"In [54]: import fanc\n",
|
||
"import fanc.peaks\n",
|
||
"import fanc.plotting as fancplot\n",
|
||
"\n",
|
||
"import logging\n",
|
||
"logging. basicConfig(level=logging. INFO, format=\"%(asctime)s %(levelname)s %(message)s\")\n",
|
||
"\n",
|
||
"hic_data = fanc. load('/mnt/storage3/aman/wdbasejuicer_new/aligned/inter_3@.hic')\n",
|
||
"loop_caller = fanc.RaoPeakCaller()\n",
|
||
"\n",
|
||
"/home/aman/. lLocal/lib/python3.10/site-packages/fanc/compatibility/juicer.py:330: UserWarning: No resolution chosen\n",
|
||
"for Juicer Hic - using 2500000bp. Specify a custom resolution using <.hic file>@<resolution>\n",
|
||
"warnings.warn(\"No resolution chosen for Juicer Hic - using {}bp. \"\n",
|
||
"/home/aman/. lLocal/lib/python3.10/site-packages/fanc/compatibility/juicer.py:353: UserWarning: Support for Juicer .h\n",
|
||
"ic v9 is still in beta. Please report any issues to https://github.com/vaquerizas lab/fanc/issues/92\n",
|
||
"warnings.warn(f\"Support for Juicer .hic v{self.version} is still in beta. \"\n",
|
||
"\n",
|
||
"ne...@broadinstitute.org Jan 18, 2019, 8:16:32PM y+ roN\n",
|
||
"to AS, 3D Genomics\n",
|
||
"\n",
|
||
"Hello,\n",
|
||
"You can just run HiCCUPS or Arrowhead on the hic file using the latest jar: https://github.com/aidenlab/juicer/wiki/Download\n",
|
||
"There is extensive documentation here: https://github.com/aidenlab/juicer/wiki/CPU-HiCCUPS\n",
|
||
"\n",
|
||
"Please note: 300 million reads is not enough to reliably call loops. We also do not recommend domain calling at this depth. The ENCODE standard for loop\n",
|
||
"calling is 2 billion reads.\n",
|
||
"\n",
|
||
"Best\n",
|
||
"Neva\n",
|
||
"\n",
|
||
"You received this message because you are subscribed to the Google Groups \"3D Genomics\" group.\n",
|
||
"\n",
|
||
"To unsubscribe from this group and stop receiving emails from it, send an email to 3d-genomics...@googlegroups.com.\n",
|
||
"To view this discussion on the web visit https://groups.google.com/d/msgid/3d-genomics/761 6da19-9387-4c46-99d6-\n",
|
||
"ef852e2b0170%40googlegroups.com.\n",
|
||
"\n",
|
||
"For more options, visit https://groups.google.com/d/optout.\n",
|
||
"\n",
|
||
"Neva Cherniavsky Durand, Ph.D.\n",
|
||
"Staff Scientist, Aiden Lab\n",
|
||
"www.aidenlab.org\n",
|
||
"\n",
|
||
"> ValidPairs file from HiC-Pro used\n",
|
||
"as pre-input. 78M entries. Format:\n",
|
||
"\n",
|
||
"chri start1l endl chr2 start2 end2 readID strand1 strand2\n",
|
||
"\n",
|
||
"> .bedpe format (input):\n",
|
||
"\n",
|
||
"chri start1l endl chr2 start2 end2\n",
|
||
"\n",
|
||
"> Output csv\n",
|
||
"format:\n",
|
||
"\n",
|
||
"chr sl el chr s2 e2 prob interacted\n",
|
||
"\n",
|
||
"> 50000 entry bedpe file - 11249\n",
|
||
"with interacted score 1\n",
|
||
"\n",
|
||
"\n",
|
||
"Hi-C Signal\n",
|
||
"\n",
|
||
"25\n",
|
||
"\n",
|
||
"N\n",
|
||
"°\n",
|
||
"\n",
|
||
"rR\n",
|
||
"uw\n",
|
||
"\n",
|
||
"10\n",
|
||
"\n",
|
||
"Interaction Decay\n",
|
||
"\n",
|
||
"—— Row Sum (interactions by position)\n",
|
||
"—— Column Sum (Interactions by position)\n",
|
||
"\n",
|
||
"2 3 4 5 6\n",
|
||
"Position Relative to Anchor\n",
|
||
"\n",
|
||
"EXPLORER\n",
|
||
"\n",
|
||
"\\ AMAN [SSH: SCC]\n",
|
||
"\n",
|
||
"> hic-pro-git\n",
|
||
"\n",
|
||
"> mustache-git\n",
|
||
"\n",
|
||
"chrom.sizes\n",
|
||
"\n",
|
||
"cool_balance.sh\n",
|
||
"GEO2457_5kb_mustache_loops.bedpe\n",
|
||
"GEO2457_5kb.cool\n",
|
||
"GEO2457_dots_5kb.bedpe\n",
|
||
"GEO2457_expected_1kb.tsv\n",
|
||
"GEO2457_expected_5kb.tsv\n",
|
||
"GEO2457_v2.mcool\n",
|
||
"\n",
|
||
"GEO2457.hic\n",
|
||
"GEO2459_5kb_mustache_loops.bedpe\n",
|
||
"GEO2459_5kb.cool\n",
|
||
"GEO2459_expected_5kb.tsv\n",
|
||
"GEO2459_v2_expected_cis.tsv\n",
|
||
"GEO2459_v2.mcool\n",
|
||
"\n",
|
||
"GEO2459.hic\n",
|
||
"\n",
|
||
"$ hic2cool_aman.sh\n",
|
||
"\n",
|
||
"$ test.sh\n",
|
||
"\n",
|
||
"mY 6 oO DB\n",
|
||
"\n",
|
||
"6]\n",
|
||
"\n",
|
||
"@\n",
|
||
"\n",
|
||
"hy «D “OUTLINE\n",
|
||
"\n",
|
||
"PP aman [SSH: SCC]\n",
|
||
"\n",
|
||
"Show All Commands\n",
|
||
"Go to File\n",
|
||
"\n",
|
||
"Find in Files\n",
|
||
"\n",
|
||
"Toggle Full Screen\n",
|
||
"\n",
|
||
"Show Settings\n",
|
||
"\n",
|
||
"Veeb dls MOODLE E-mail Help\n",
|
||
"\n",
|
||
"Tah, Catalogue Dashboard My courses Q Dp Aman Shamil Nalakath © aa\n",
|
||
"\n",
|
||
"Bioinformatics Il MOOC: View: Overview report\n",
|
||
"\n",
|
||
"Bioinformatics Il information 2024 Course Participants Grades\n",
|
||
"\n",
|
||
"General Introduction to Bioinformatics II - Ol...\n",
|
||
"\n",
|
||
"Introduction to the course\n",
|
||
"\n",
|
||
"Course info (link to study information syste... Overview report\n",
|
||
"\n",
|
||
"Teacher's announcements\n",
|
||
"\n",
|
||
"Course participant's forum (ask questions fr... Aman Shamil Nalakath\n",
|
||
"\n",
|
||
"Project Work 1 - Genome project plan\n",
|
||
"\n",
|
||
"Grade\n",
|
||
"\n",
|
||
"Bioinformatics group project example from ... Course name\n",
|
||
"\n",
|
||
"Week 1 Bioinformatics II MOOC 97.00\n",
|
||
"\n",
|
||
"Week 1 general discussion\n",
|
||
"\n",
|
||
"Lecture 1 A - Introduction\n",
|
||
"\n",
|
||
"Video: Lecture 1 A - Introduction\n",
|
||
"\n",
|
||
"Students introduction and aims (DL 12.09. 2...\n",
|
||
"\n",
|
||
"How to (seriously) read a scientific paper\n",
|
||
"\n",
|
||
"Article 1\n",
|
||
"\n",
|
||
"Lecture 1B - Setting up HPC Access\n",
|
||
"\n",
|
||
"Video: Lecture 1 B - Setting up HPC access TAL\n",
|
||
"\n",
|
||
"Meet and greet\n",
|
||
"\n",
|
||
"Get the mobile app\n",
|
||
"\n",
|
||
"Coursework 1 on Article 1: Aspects of geno... Policies\n",
|
||
"\n",
|
||
"\n",
|
||
"Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n",
|
||
"\n",
|
||
"face mar rapa mat\n",
|
||
"\n",
|
||
"eQTL-gene\n",
|
||
"links >20 kb\n",
|
||
"\n",
|
||
"shuffled pairs :\n",
|
||
"\n",
|
||
"\n",
|
||
"Sequencing\n",
|
||
"\n",
|
||
"Sequenced Reads: 547812856\n",
|
||
"\n",
|
||
"Duplication and Complexity (% Sequenced Reads)\n",
|
||
"\n",
|
||
"Analysis of Unique Reads (% Sequenced Reads / % Unique Reads)\n",
|
||
"\n",
|
||
"Intra-fragment Reads: 34,307,600\n",
|
||
"\n",
|
||
"Below MAPQ Threshold: 355,353,763 (64.87% / 73.27%)\n",
|
||
"\n",
|
||
"Hi-C Contacts: 95,311,495 (17.40% / 19.65%)\n",
|
||
"3' Bias (Long Range): 97% - 3%\n",
|
||
"\n",
|
||
"Pair Type % (L-I-O-R): 25% - 25% - 25% - 25%\n",
|
||
"\n",
|
||
"Analysis of Hi-C Contacts (% Sequenced Reads / % Unique Reads)\n",
|
||
"\n",
|
||
"Inter-chromosomal: 22,195,088 (4.05% / 4.58%)\n",
|
||
"Intra-chromosomal: 73,116,407 (13.35% / 15.08%)\n",
|
||
"Long Range (>20Kb): 35,425,148 (6.47% / 7.30%)\n",
|
||
"\n",
|
||
"Solving environment: ...working... INFO conda.cc\n",
|
||
"INFO conda.conda_libmamba_solver.solver:_solve_é\n",
|
||
"{\n",
|
||
"\n",
|
||
"\"INSTALL\": [\n",
|
||
"\n",
|
||
"\"hicexplorer\"\n",
|
||
"\n",
|
||
"]\n",
|
||
"}\n",
|
||
"info libmamba Parsing MatchSpec hicexplorer\n",
|
||
"info libmamba Parsing MatchSpec hicexplorer\n",
|
||
"info libmamba Adding job: hicexplorer\n",
|
||
"\n",
|
||
".\n",
|
||
"\n",
|
||
"@ MainWindow Mon Nov 4 21:17\n",
|
||
"\n",
|
||
"eee [Juicebox 2.17.00] Hi-C Map <9>: inter.hic\n",
|
||
"\n",
|
||
"File View Bookmarks Assembly Dev\n",
|
||
"Chromosomes\n",
|
||
"\n",
|
||
"All All Be\n",
|
||
"\n",
|
||
"Show\n",
|
||
"\n",
|
||
"Normalization (Obs | Ctrl) Color Range\n",
|
||
"2 I Tr\n",
|
||
"\n",
|
||
"3773\n",
|
||
"\n",
|
||
"Observed None None\n",
|
||
"\n",
|
||
"I I I I I I It\n",
|
||
"2.5MB 500 KB 100KB 25KB 5KB 1KB 200BP\n",
|
||
"\n",
|
||
"LayerO << oO\n",
|
||
"\n",
|
||
"Show Annotation Panel J\n",
|
||
"\n",
|
||
"\n",
|
||
"GSM3398051: HiC maize Leaf-HiC rep2; Zea mays; Hi-C\n",
|
||
"\n",
|
||
"1 ILLUMINA (NextSeq 500) run: 528.9M spots, 80.4G bases, 30.8Gb downloads\n",
|
||
"Accession: SRX4727418\n",
|
||
"\n",
|
||
"GSM3398050: HiC maize Leaf-HiC rep1; Zea mays; Hi-C\n",
|
||
"\n",
|
||
"1 ILLUMINA (NextSeq 500) run: 89.8M spots, 13.7G bases, 4.5Gb downloads\n",
|
||
"Accession: SRX4727417\n",
|
||
"\n",
|
||
"(mustache_aman) [papantonis1@gwdu1@1 aman]$ awk '$1 == $9 {print $1}' GE02457_dots_5kb.bedpe | sort | unig -c && wc -1 GE02457_dots_5kb.bedpe\n",
|
||
"842 chri\n",
|
||
"413 chr1e\n",
|
||
"465 chri1\n",
|
||
"442 chr12\n",
|
||
"244 chri3\n",
|
||
"254 chri4\n",
|
||
"234 chris\n",
|
||
"174 chri16\n",
|
||
"248 chr17\n",
|
||
"196 chri8\n",
|
||
"122 chri9\n",
|
||
"817 chr2\n",
|
||
"196 chr2e\n",
|
||
"\n",
|
||
"81 chr21\n",
|
||
"78 chr22\n",
|
||
"731 chr3\n",
|
||
"\n",
|
||
"594 chr4\n",
|
||
"609 chr5\n",
|
||
"631 chr6é\n",
|
||
"478 chr7\n",
|
||
"\n",
|
||
"505 chr8&\n",
|
||
"349 chr9\n",
|
||
"\n",
|
||
"184 chrx\n",
|
||
"\n",
|
||
"8888 GE02457_dots_5kb.bedpe\n",
|
||
"\n",
|
||
"@ Mainwindow @®@@6OeOr+ezek@ee =) FS Q SS MonDec30 1\n",
|
||
"\n",
|
||
"[ Rem ) [Juicebox 2.17.00] Hi-C Map <9>: data.allValidPairs.hic\n",
|
||
"\n",
|
||
"File View Bookmarks Assembly Dev\n",
|
||
"Chromosomes\n",
|
||
"\n",
|
||
"6 @ « Ge\n",
|
||
"\n",
|
||
"Normalization (Obs | Ctrl) Resolution (BP) Color Range\n",
|
||
"\n",
|
||
"6:113,950,001-114,000,000\n",
|
||
"\n",
|
||
"100 MB\n",
|
||
"\n",
|
||
"merge... <p> [> oO\n",
|
||
"\n",
|
||
"10000... <—@\n",
|
||
"LayerO <<\n",
|
||
"\n",
|
||
"Show Annotation Panel\n",
|
||
"\n",
|
||
"\n",
|
||
"% TADS\n",
|
||
"\n",
|
||
"[papantonis1@gwdu101 mustache_results]$\n",
|
||
"BIN1_CHR BIN1_START BIN1_END\n",
|
||
"chr 5510000 5515000 chr1 5610000\n",
|
||
"chr1 5505000 5510000 chr1 5745000\n",
|
||
"chr1 5635000 5640000 chr1 5745000\n",
|
||
"chr1 7665000 7670000 chr1 7750000\n",
|
||
"chr1 7985000 7990000 chr1 8325000\n",
|
||
"chri1 7990000 7995000 chr1 8105000\n",
|
||
"chr1 8020000 8025000 chr1 8310000\n",
|
||
"chri1 8020000 8025000 chr1 8240000\n",
|
||
"chr 8560000 8565000 chr1 8725000\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"12007 anchor_bed_try2/rbp1_anchors_final\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"13598 anchor_bed_try2/ctrl_anchors_final\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"\n",
|
||
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/C!\n",
|
||
"> -u > anchor_bed_try2/rbp1_anchors_wi\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"1153 anchor_bed_try2/rbp1_anchors_with_C\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/C.\n",
|
||
"> -u > anchor_bed_try2/ctrl_anchors_wi\n",
|
||
"\n",
|
||
"1767 anchor_bed_try2/ctrl_anchors_with_C\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/Cl\n",
|
||
"> -w 500@ -u > anchor_bed_try2/rbp1_an\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/C.\n",
|
||
"> -w 50@@ -u > anchor_bed_try2/ctrl_an\n",
|
||
"\n",
|
||
"1833 anchor_bed_try2/rbp1_anchors_near5k\n",
|
||
"\n",
|
||
"2689 anchor_bed_try2/ctrl_anchors_near5k\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"> -a anchor_bed_try2/rbp1_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/Cl\n",
|
||
"> -w 10000 -u > anchor_bed_try2/rbp1_a\n",
|
||
"papantonis1@gwdu101 mustache_results]$\n",
|
||
"> -a anchor_bed_try2/ctrl_anchors_fina\n",
|
||
"> -b ~/aman/microc_data/nadine_macro/C.\n",
|
||
"> -w 10000 -u > anchor_bed_try2/ctrl_a\n",
|
||
"\n",
|
||
"2046 anchor_bed_try2/rbp1_anchors_near1®\n",
|
||
"\n",
|
||
"3055 anchor_bed_try2/ctrl_anchors_near1@\n",
|
||
"\n",
|
||
"head rbp1_loops_5k.bedpe\n",
|
||
"BIN2_CHROMOSOME BIN2_START BIN2_END\n",
|
||
"5615000\n",
|
||
"5750000\n",
|
||
"5750000\n",
|
||
"7755000\n",
|
||
"8330000\n",
|
||
"8110000\n",
|
||
"8315000\n",
|
||
"8245000\n",
|
||
"8730000\n",
|
||
"tail -n +2 rbp1_loops_5k.bedpe | cut -f1-3 > anchor_bed_try2/rbp1_anchor1.bed\n",
|
||
"tail -n +2 rbp1_loops_5k.bedpe | cut -f4-6 > anchor_bed_try2/rbp1_anchor2.bed\n",
|
||
"cat anchor_bed_try2/rbp1_anchor1.bed anchor_bed_try2/rbp1_anchor2.bed | sort -k1,1 -k2,2n | uniq > anchor_bed_try2/rbp1_anchors_final_tab.bed\n",
|
||
"tail -n +2 ctrl_loops_5k.bedpe | cut -f1-3 > anchor_bed_try2/ctrl_anchor1.bed\n",
|
||
"tail -n +2 ctrl_loops_5k.bedpe | cut -f4-6 > anchor_bed_try2/ctrl_anchor2.bed\n",
|
||
"cat anchor_bed_try2/ctrl_anchor1.bed anchor_bed_try2/ctrl_anchor2.bed | sort -k1,1 -k2,2n | uniq > anchor_bed_try2/ctrl_anchors_final_tab.bed\n",
|
||
"we -l anchor_bed_try2/rbp1_anchors_final_tab.bed\n",
|
||
"\n",
|
||
"_tab.bed\n",
|
||
"\n",
|
||
"we -l anchor_bed_try2/ctrl_anchors_final_tab.bed\n",
|
||
"_tab.bed\n",
|
||
"\n",
|
||
"bedtools intersect \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
|
||
"th_CTCF.bed\n",
|
||
"\n",
|
||
"we -l anchor_bed_try2/rbp1_anchors_with_CTCF.bed\n",
|
||
"TCF. bed\n",
|
||
"\n",
|
||
"bedtools intersect \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
|
||
"th_CTCF.bed\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$ we -l anchor_bed_try2/ctrl_anchors_with_CTCF.bed\n",
|
||
"\n",
|
||
"TCF. bed\n",
|
||
"\n",
|
||
"bedtools window \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
|
||
"chors_near5kb_CTCF.bed\n",
|
||
"\n",
|
||
"bedtools window \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
|
||
"chors_near5kb_CTCF.bed\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/rbp1_anchors_near5kb_CTCF.bed\n",
|
||
"\n",
|
||
"b_CTCF.bed\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/ctrl_anchors_near5kb_CTCF.bed\n",
|
||
"\n",
|
||
"b_CTCF.bed\n",
|
||
"\n",
|
||
"bedtools window \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"PI_CTCF_seacr_top@.@1.peaks.stringent.bed \\\n",
|
||
"nchors_near1@kb_CTCF.bed\n",
|
||
"\n",
|
||
"bedtools window \\\n",
|
||
"\n",
|
||
"l_tab.bed \\\n",
|
||
"_CTCF_seacr_top@.01.peaks.stringent.bed \\\n",
|
||
"nchors_near1@kb_CTCF.bed\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/rbp1_anchors_near1@kb_CTCF.bed\n",
|
||
"\n",
|
||
"kb_CTCF.bed\n",
|
||
"\n",
|
||
"papantonis1@gwdu101 mustache_results]$ wc -1 anchor_bed_try2/ctrl_anchors_neari@kb_CTCF.bed\n",
|
||
"\n",
|
||
"kb_CTCF.bed\n",
|
||
"\n",
|
||
"ML classification model\n",
|
||
"\n",
|
||
"ML regression model\n",
|
||
"\n",
|
||
"ATAC QC\n",
|
||
"\n",
|
||
"ATAC peak detection\n",
|
||
"\n",
|
||
"7 ,\n",
|
||
"GitHub\n",
|
||
"8\n",
|
||
"Nextflow for ML\n",
|
||
"\n",
|
||
"\n",
|
||
"Extracting Hi-C contact matrix from.hic file\n",
|
||
"\n",
|
||
"The process obtains the hic contact matrix for each chromosome from the.hic file. It will output the\n",
|
||
"frequency_matrix file.\n",
|
||
"\n",
|
||
"Modify the path to the input and output files in the GetBigMatrix_Cells_KRobserved.sh file: The.jar file is the path\n",
|
||
"where the juicer tools resides, and run:\n",
|
||
"\n",
|
||
"bash GetBigMatrix_Cells_KRobserved.sh O\n",
|
||
"\n",
|
||
"Generating sub-matrix from Hi-C contact matrix\n",
|
||
"\n",
|
||
"The process cuts the hic contact matrix of each chromosome into multiple submatrices. Modify the path to the\n",
|
||
"input and output files in the Getnpymatrix_chr_all_sample.sh file, where the input file is the output file from the\n",
|
||
"previous step, DPATH is the root directory of the frequence_matrix file, and run:\n",
|
||
"\n",
|
||
"bash Getnpymatrix_chr_all_sample.sh oO\n",
|
||
"\n",
|
||
"i al\n",
|
||
"\n",
|
||
"Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n",
|
||
"\n",
|
||
"eQTL-gene\n",
|
||
"links >20 kb |\n",
|
||
"\n",
|
||
"shuffled pairs\n",
|
||
"\n",
|
||
"\n",
|
||
"A Sequenced ignment and\n",
|
||
"Hi-C Reads Chimera Handling\n",
|
||
"RI Re\n",
|
||
"a —————I\n",
|
||
"\n",
|
||
"Duplicate\n",
|
||
"Merge Sort removal\n",
|
||
"\n",
|
||
"Map creation\n",
|
||
"\n",
|
||
"_\n",
|
||
"\n",
|
||
"\n",
|
||
"Insights from the study\n",
|
||
"\n",
|
||
"> Total identified loops according to the study (long-range\n",
|
||
"loops 2 20 kb): 1,177;\n",
|
||
"\n",
|
||
"> Paper only analyzed chromatin loops 2 20 kb in length in\n",
|
||
"the Hi-C dataset\n",
|
||
"\n",
|
||
"> Resolutions and parameters not mentioned in paper and\n",
|
||
"Sl\n",
|
||
"\n",
|
||
"> Less no. of chromatin loops identified due to limited\n",
|
||
"sequencing depth.HenceHiChIP to detect more loops\n",
|
||
"\n",
|
||
"> H3K4me3-HiChIP dataset: 24,141 loops;\n",
|
||
"> H3K27me3-HiChIP dataset: 18,106 loops\n",
|
||
"\n",
|
||
"> FitHiC2 on resolution 20kb : 89000 loops\n",
|
||
"\n",
|
||
"PRC1 PRC2\n",
|
||
"\n",
|
||
"\n",
|
||
"What this suggests:\n",
|
||
"\n",
|
||
">\n",
|
||
"\n",
|
||
"High Mapping Percentage: The mapping quality is quite high (99.20%), which is good.\n",
|
||
"\n",
|
||
"However, the properly paired issue (0%) should be looked into further.\n",
|
||
"\n",
|
||
"Paired Read Alignment Issues: The @% for properly paired reads suggests that the\n",
|
||
"alignment tool or the pairing information may not be correct. This is a crucial issue for Hi-C\n",
|
||
"data since proper pairing indicates the correct relationship between paired-end reads. It's\n",
|
||
"worth verifying that the correct options are being used in the alignment step and whether the\n",
|
||
"\n",
|
||
"pairing information is retained properly.\n",
|
||
"\n",
|
||
"Inter-chromosomal Interactions: Given the significant number of reads with mates mapped\n",
|
||
"to different chromosomes, this aligns with your Hi-C analysis, which typically shows inter-\n",
|
||
"chromosomal interactions. However, excessive inter-chromosomal mapping could indicate a\n",
|
||
"\n",
|
||
"problem if the number is unusually high.\n",
|
||
"\n",
|
||
"A Sequenced Alignment and Duplicate Map creation\n",
|
||
"Hi-C Reads Chimera Handling Merge Sort removal\n",
|
||
"RI R2 RARA\n",
|
||
"i 1 — =~ —————— = .\n",
|
||
"SS .\n",
|
||
"; == Se = I\n",
|
||
"C ‘t 1 ———— a =\"\n",
|
||
"ess I | =\n",
|
||
": p 1 }\n",
|
||
"+ —v ===. >} 1 7 , 1\n",
|
||
"\n",
|
||
"[2]:\n",
|
||
"\n",
|
||
"hicPlotDistVsCounts —-matrix /mnt/storage3/aman/data_mcool.h5 —-outFileName contact_decay.png\n",
|
||
"usage: hicPlotDistVsCounts --matrices MATRICES [MATRICES ...] --plotFile file\n",
|
||
"\n",
|
||
"name [--labels LABELS [LABELS ...]]\n",
|
||
"[--skipDiagonal] [--maxdepth INT bp] [--perchr]\n",
|
||
"\n",
|
||
"[--chromosomeExclude CHROMOSOMEEXCLUDE [CHROMOSOMEEXCLUDE ...]]\n",
|
||
"[--outFileData OUTFILEDATA]\n",
|
||
"[--plotsize PLOTSIZE PLOTSIZE] [--help] [--version]\n",
|
||
"\n",
|
||
"hicPlotDistVsCounts: error: the following arguments are required: —-matrices/—m, —-plotFile/-o\n",
|
||
"B 2\n",
|
||
"\n",
|
||
"icv ome v Tracks ¥ mple Info v Session v Share Bookmark Save Image Circular View v Help v\n",
|
||
"\n",
|
||
"IGV oxford_e...me.fasta tig00000002:1,752,510-1,825,110 Q 72 kb (Select Tracks ) (\"Crosshairs ) (Center Line ) (Track Labels) @ qumm@ +)\n",
|
||
"\n",
|
||
"C | D)\n",
|
||
"\n",
|
||
"1,760 kb 1,770 kb 1,780 kb 1,790 kb 1,800 kb 1,810 kb 1,820 kb\n",
|
||
"L 1 n L L 1 n\n",
|
||
"\n",
|
||
"11D 2 ee) ee es 2 ie\n",
|
||
"\n",
|
||
"IKAOHOFJ_01984yijE metFmetLrpmE cytR hsilU gipF gipX tpiA pfkAcpxR sodA_1 thaBrhaD IKAOHOFJ_02046fdhE_1 dtdcsqR yihTyihR yihQ yihP_1 ompL IKAOHOFJ_02079 glnA gl\n",
|
||
"\n",
|
||
"priA_2 menA_2 sbp_2 cpxA_1 rhaT_2rhaA_1 ysdC_2 fdoG_3 yinV yihP_2 GFM1\n",
|
||
"\n",
|
||
"Adapter Content [Zi\n",
|
||
"\n",
|
||
"‘ecamuteprctoe cout open tr lay wanna aap ce a oat\n",
|
||
"\n",
|
||
"FastQC: Adapter Content\n",
|
||
"\n",
|
||
"Status Checks\n",
|
||
"Sua ren Fc sc ston wn mite ere erm ey eg ys\n",
|
||
"FastQC: Status Checks\n",
|
||
"Software Versions\n",
|
||
"\n",
|
||
"ai 8 a nse\n",
|
||
"‘anes Py ar ot es le\n",
|
||
"\n",
|
||
"Siseqera\n",
|
||
"\n",
|
||
"Y cut_n_tag/nadine_cut_tag /nadine_cut_tag\n",
|
||
"> Aux_CPI_H3K27me3_results\n",
|
||
"> C_H3K27me3_results\n",
|
||
"> C_H3K27me3_Spi_results\n",
|
||
"> CPILH3K27me3_results\n",
|
||
"\n",
|
||
"\n",
|
||
"AIAG SH =m\n",
|
||
"\n",
|
||
"\n",
|
||
"@ Terminal Shell Edit View Window Help\n",
|
||
"\n",
|
||
"4) FS Q S Sun 29. Jun 12:54\n",
|
||
"\n",
|
||
"eee ~~ aman — a.nalakath@node08:~ — ssh -L 9006:localhost:9006 a.nalakath@10.152.154.1 — 208x61\n",
|
||
"\n",
|
||
"Last login: Sun Jun 29 11:01:36 on ttys@ee\n",
|
||
"\n",
|
||
"aman@Laptop-von-Aman ~ % tum_ngs\n",
|
||
"\n",
|
||
"DRA A AA AA A A RRA A RA AA RR RRA A RRR RRA A AR AR AR HAA RR A\n",
|
||
"* Welcome to PGEN cluster *\n",
|
||
"DRA AA A AR A AA A RRA A AA ARR A RA RR A RR RRA AR A RR AA A HRA RR A\n",
|
||
"\n",
|
||
"Please use this node only to submit your jobs.\n",
|
||
"Don't use it for calculations or CPU/RAM intensive tasks!!!\n",
|
||
"\n",
|
||
"DARA A AAA AA A A AA A RA AR A RR RRA A RRR RR ARR A HRA HAR A HAA RR A\n",
|
||
"(a.nalakath@10.152.154.1) Password:\n",
|
||
"\n",
|
||
"Kickstarted on 2018-12-07\n",
|
||
"\n",
|
||
"Last login: Wed Jun 25 08:01:17 2025 from 10.157.58.238\n",
|
||
"[a.nalakath@frontend ~]$ ssh node@s\n",
|
||
"\n",
|
||
"Password:\n",
|
||
"\n",
|
||
"Kickstarted on 2018-12-04\n",
|
||
"\n",
|
||
"Last login: Wed Jun 25 08:01:32 2025 from 10.152.154.1\n",
|
||
"[a.nalakath@node@8 ~]$ tmux ls\n",
|
||
"\n",
|
||
"@: 1 windows (created Sat Jun 21 08:01:35 2025)\n",
|
||
"[a.nalakath@nodees ~1$ ff\n",
|
||
"\n",
|
||
"\n",
|
||
"AN Tene enginevelry, 5 Py weeds\n",
|
||
"- eZ Anal biota Beat dp\n",
|
||
"ate - Tyce Gear bei Oo\n",
|
||
"46, Trtwesip * Feashig UP ES\n",
|
||
"\n",
|
||
"yor\n",
|
||
"\n",
|
||
"SONGS [ab 2 S welts wort\n",
|
||
"\n",
|
||
"Coup peggy\n",
|
||
"\n",
|
||
"Repars — PDO)\n",
|
||
"Brcacvnud,\n",
|
||
"Summer school\n",
|
||
"\n",
|
||
"Reding Dalukinnoyy gprs\n",
|
||
"L¥ Pap & Stiles\n",
|
||
"Chater — Pronses\n",
|
||
"Saf & Stee Duatle fo Ui?\n",
|
||
"Anping fe Hos. Haig HG\n",
|
||
"\n",
|
||
"Mar\n",
|
||
"\n",
|
||
"elp\n",
|
||
"\n",
|
||
"@ Vivaldi File Edit View Bookmarks Mail Tools Wine\n",
|
||
"\n",
|
||
"New merch store now open, including a limited edition metal keycap! monkeytype.store\n",
|
||
"\n",
|
||
"monkeytype\n",
|
||
"\n",
|
||
"70\n",
|
||
"97%\n",
|
||
"\n",
|
||
"time 15\n",
|
||
"english\n",
|
||
"\n",
|
||
"19% 15s\n",
|
||
"\n",
|
||
"77 88/1/8/8\n",
|
||
"\n",
|
||
"jectives a’ hivinter | Etherpad\n",
|
||
"\n",
|
||
"0@0B6\n",
|
||
"\n",
|
||
"[1]:\n",
|
||
"\n",
|
||
"import h5py\n",
|
||
"\n",
|
||
"# Open the HDF5 file\n",
|
||
"\n",
|
||
"with h5py.File('cool_pileup_combined', 'r') as f:\n",
|
||
"# Inspect the structure\n",
|
||
"print(\"Keys:\", list(f.keys()))\n",
|
||
"\n",
|
||
"# Check the 'data' dataset\n",
|
||
"\n",
|
||
"data = f['data'][:]\n",
|
||
"\n",
|
||
"print(f\"'data' dataset shape: {data.shape}\")\n",
|
||
"print(f\"'data' dataset contents:\\n{data}\")\n",
|
||
"\n",
|
||
"Keys: ['annotation', ‘attrs', ‘data']\n",
|
||
"‘data' dataset shape: (16488, 3)\n",
|
||
"‘data' dataset contents:\n",
|
||
"[[1.1873085 1.2874519 1.4797186]\n",
|
||
"[1. 7349982 2.228282 3.1729212]\n",
|
||
"[1.5040904 1.3009566 1.1008095]\n",
|
||
"[1.9000989 2.8981235 1.9658103]\n",
|
||
"[2.9235291 4.7604017 2.8729181]\n",
|
||
"[1.9822323 2.930699 1.9129672]]\n",
|
||
"\n",
|
||
"\n",
|
||
"0@°8@\n",
|
||
"W PICO 5.09\n",
|
||
"\n",
|
||
"Docus_tag\n",
|
||
"KBOCNLJJ_00001\n",
|
||
"KBOCNLIJJ_00002\n",
|
||
"KBOCNLIJJ_00003\n",
|
||
"KBOCNLJJ_00004\n",
|
||
"KBOCNLJJ_00005\n",
|
||
"KBOCNLIIJ_00006\n",
|
||
"KBOCNLJJ_00007\n",
|
||
"KBOCNLJJ_00008\n",
|
||
"KBOCNLJJ_00009\n",
|
||
"KBOCNLJJ_00010\n",
|
||
"CRISPR\n",
|
||
"KBOCNLJJ_00011\n",
|
||
"KBOCNLJJ_00012\n",
|
||
"KBOCNLIJJ_00013\n",
|
||
"KBOCNLIJJ_00014\n",
|
||
"KBOCNLIJJ_00015\n",
|
||
"KBOCNLIJJ_00016\n",
|
||
"KBOCNLIJJ_00017\n",
|
||
"KBOCNLJJ_00018\n",
|
||
"KBOCNLIJJ_00019\n",
|
||
"KBOCNLJJ_00020\n",
|
||
"KBOCNLIJJ_00021\n",
|
||
"KBOCNLIJ_00022\n",
|
||
"KBOCNLIJJ_00023\n",
|
||
"KBOCNLIJJ_00024\n",
|
||
"KBOCNLIJJ_00025\n",
|
||
"KBOCNLIJJ_00026\n",
|
||
"KBOCNLJJ_00027\n",
|
||
"KBOCNLJJ_00028\n",
|
||
"KBOCNLIJJ_00029\n",
|
||
"KBOCNLIJJ_00030\n",
|
||
"KBOCNLIJJ_00031\n",
|
||
"KBOCNLIJJ_00032\n",
|
||
"KBOCNLIJJ_00033\n",
|
||
"KBOCNLIJIJ_00034\n",
|
||
"KBOCNLIJJ_00035\n",
|
||
"KBOCNLIJIJ_00036\n",
|
||
"KBOCNLIJJ_00037\n",
|
||
"KBOCNLJJ_00038\n",
|
||
"KBOCNLIJ_00039\n",
|
||
"KBOCNLIJJ_00040\n",
|
||
"KBOCNLIJJ_00041\n",
|
||
"KBOCNLIJ_00042\n",
|
||
"KBOCNLJJ_00043\n",
|
||
"KBOCNLIJ_00044\n",
|
||
"KBOCNLJJ_00045\n",
|
||
"KBOCNLIIJ_00046\n",
|
||
"KBOCNLIJJ_00047\n",
|
||
"KBOCNLIJJ_00048\n",
|
||
"KBOCNLIJJ_00049\n",
|
||
"KBOCNLJJ_00050\n",
|
||
"KBOCNLIJJ_00051\n",
|
||
"KBOCNLIIJ_00052\n",
|
||
"KBOCNLIJJ_00053\n",
|
||
"KBOCNLIJJ_00054\n",
|
||
"KBOCNLJJ_00055\n",
|
||
"KBOCNLIIJ_00056\n",
|
||
"\n",
|
||
"Wie) Get Help\n",
|
||
"Wed Exit\n",
|
||
"\n",
|
||
"ftype\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"763\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CDS\n",
|
||
"CDS\n",
|
||
"\n",
|
||
"length_bp\n",
|
||
"\n",
|
||
"1545 cysI_1\n",
|
||
"735 cysH_1\n",
|
||
"2667 ygcB_1\n",
|
||
"1509 casA_1\n",
|
||
"483 casB_1\n",
|
||
"1092 casC_1\n",
|
||
"675 casD_1\n",
|
||
"600 casE_1\n",
|
||
"918 ygbT_1\n",
|
||
"285 ygbF_1\n",
|
||
"1038\n",
|
||
"\n",
|
||
"909 cysD_1\n",
|
||
"1428 cysN\n",
|
||
"606 cysC\n",
|
||
"324 ygbE\n",
|
||
"312 ftsB\n",
|
||
"711 ispD\n",
|
||
"480 ispF\n",
|
||
"1050 truD\n",
|
||
"762 surE\n",
|
||
"627 pem\n",
|
||
"1140 nlpD_1\n",
|
||
"993 rpoS\n",
|
||
"1365 ygbN\n",
|
||
"777 otni\n",
|
||
"639 otnc\n",
|
||
"372 otnK_1\n",
|
||
"834 otnK_2\n",
|
||
"909 1tnD\n",
|
||
"768 glcR\n",
|
||
"657 pphB\n",
|
||
"2562 mutS\n",
|
||
"135\n",
|
||
"\n",
|
||
"354\n",
|
||
"\n",
|
||
"2079 fhlA\n",
|
||
"1011 hypE\n",
|
||
"1122 hypD\n",
|
||
"273 hypC\n",
|
||
"873 hypB\n",
|
||
"351 hypA\n",
|
||
"462 hycA\n",
|
||
"612 hyfA_1\n",
|
||
"1827 ndhB_1\n",
|
||
"924 hycD\n",
|
||
"1710 hycE\n",
|
||
"543 ndhI_1\n",
|
||
"768 hycG_1\n",
|
||
"411\n",
|
||
"\n",
|
||
"471 hycI\n",
|
||
"1425 bglH_1\n",
|
||
"1458 bglF_1\n",
|
||
"1014 ascG\n",
|
||
"528 hyfA_2\n",
|
||
"2253 hypF\n",
|
||
"1134 norw\n",
|
||
"1440 norv\n",
|
||
"\n",
|
||
"n\n",
|
||
"\n",
|
||
"8.1.2 COG@155\n",
|
||
"8.4.8 COG@175\n",
|
||
"tbo=\n",
|
||
"\n",
|
||
"ge\n",
|
||
"1.\n",
|
||
"1.\n",
|
||
"3. -- C0G1203\n",
|
||
"\n",
|
||
"3.1.-.-\n",
|
||
"- c0G1518\n",
|
||
"\n",
|
||
"2.7.7.4 COG0175\n",
|
||
"2.7.7.4 CO0G2895\n",
|
||
"2.7.1.2\n",
|
||
"\n",
|
||
"c0G2919\n",
|
||
"\n",
|
||
"7\n",
|
||
"COGQ496\n",
|
||
"\n",
|
||
"C0G0739\n",
|
||
"COG@568\n",
|
||
"C0G2610\n",
|
||
"\n",
|
||
"C0G1349\n",
|
||
"3.1.3.16\n",
|
||
"C0GE249\n",
|
||
"\n",
|
||
"CO0G3604\n",
|
||
"4.2.1.- C0G0@309\n",
|
||
"COGe409\n",
|
||
"C0G0298\n",
|
||
"C0G378\n",
|
||
"C0G@375\n",
|
||
"\n",
|
||
"1.-.-.- C0G1142\n",
|
||
"7.1.1.-\n",
|
||
"\n",
|
||
"COGe65e\n",
|
||
"\n",
|
||
"C0G3261\n",
|
||
"7.1.1.-\n",
|
||
"\n",
|
||
"C0G3260\n",
|
||
"3.4.23.51\n",
|
||
"3.2.1.86\n",
|
||
"\n",
|
||
"C0G1263\n",
|
||
"CO0G1609\n",
|
||
"1.-.-.- C0G1142\n",
|
||
"6.2.-.- C0G0068\n",
|
||
"\n",
|
||
"1.18.1.-\n",
|
||
"COGQ426\n",
|
||
"\n",
|
||
"We) WriteOut\n",
|
||
"We) Justify\n",
|
||
"\n",
|
||
"EC_number CoG\n",
|
||
"\n",
|
||
"aman — nano ./Downloads/assignment/Ecoli_hifi/Ecoli_hifi_genome.tsv — 208x63\n",
|
||
"\n",
|
||
"/Downloads/as\n",
|
||
"\n",
|
||
"product\n",
|
||
"\n",
|
||
"Sulfite reductase [NADPH] hemoprotein beta-component\n",
|
||
"Phosphoadenosine phosphosulfate reductase\n",
|
||
"CRISPR-associated endonuclease/helicase Cas3\n",
|
||
"CRISPR system Cascade subunit CasA\n",
|
||
"\n",
|
||
"CRISPR system Cascade subunit CasB\n",
|
||
"\n",
|
||
"CRISPR system Cascade subunit CasC\n",
|
||
"\n",
|
||
"CRISPR system Cascade subunit CasD\n",
|
||
"\n",
|
||
"CRISPR system Cascade subunit CasE\n",
|
||
"CRISPR-associated endonuclease Cas1\n",
|
||
"CRISPR-associated endoribonuclease Cas2\n",
|
||
"\n",
|
||
"hypothetical protein\n",
|
||
"Sulfate adenylyltransferase subunit 2\n",
|
||
"Sulfate adenylyltransferase subunit 1\n",
|
||
"C0G@529 Adenylyl-sulfate kinase\n",
|
||
"Inner membrane protein YgbE\n",
|
||
"Cell division protein FtsB\n",
|
||
"C0G1211 2-C-methyl-D-erythritol 4-phosphate cytidylyltransferase\n",
|
||
"C0G@245 2-C-methyl-D-erythritol 2,4-cyclodiphosphate synthase\n",
|
||
"COG@585 tRNA pseudouridine synthase D\n",
|
||
"5'/3'-nucleotidase SurE\n",
|
||
"C0G2518 Protein-L-isoaspartate O-methyltransferase\n",
|
||
"Murein hydrolase activator NlpD\n",
|
||
"RNA polymerase sigma factor RpoS\n",
|
||
"Inner membrane permease YgbN\n",
|
||
"C0G3622 2-oxo-tetronate isomerase\n",
|
||
"\n",
|
||
"3-oxo-tetronate 4-phosphate decarboxylase\n",
|
||
"C0G3395 3-oxo-tetronate kinase\n",
|
||
"C0G3395 3-oxo-tetronate kinase\n",
|
||
"\n",
|
||
"L-threonate dehydrogenase\n",
|
||
"HTH-type transcriptional repressor GlcR\n",
|
||
"C0G@639 Serine/threonine-protein phosphatase 2\n",
|
||
"DNA mismatch repair protein MutS\n",
|
||
"hypothetical protein\n",
|
||
"hypothetical protein\n",
|
||
"Formate hydrogenlyase transcriptional activator FhlA\n",
|
||
"Carbamoyl dehydratase HypE\n",
|
||
"Hydrogenase maturation factor HypD\n",
|
||
"Hydrogenase maturation factor HypC\n",
|
||
"Hydrogenase maturation factor HypB\n",
|
||
"Hydrogenase maturation factor HypA\n",
|
||
"Formate hydrogenlyase regulatory protein HycA\n",
|
||
"Hydrogenase-4 component A\n",
|
||
"NAD(P)H-quinone oxidoreductase subunit 2, chloroplastic\n",
|
||
"Formate hydrogenlyase subunit 4\n",
|
||
"Formate hydrogenlyase subunit 5\n",
|
||
"NAD(P)H-quinone oxidoreductase subunit I, chloroplastic\n",
|
||
"Formate hydrogenlyase subunit 7\n",
|
||
"hypothetical protein\n",
|
||
"COG@68@ Hydrogenase 3 maturation protease\n",
|
||
"C0G2723 Aryl-phospho-beta—D-glucosidase BglH\n",
|
||
"PTS system beta-glucoside-specific EIIBCA component\n",
|
||
"HTH-type transcriptional regulator AscG\n",
|
||
"Hydrogenase-4 component A\n",
|
||
"Carbamoyltransferase HypF\n",
|
||
"C0G1251 Nitric oxide reductase F1Rd-NAD(+) reductase\n",
|
||
"Anaerobic nitric oxide reductase flavorubredoxin\n",
|
||
"\n",
|
||
"Ws) Read File Way Prev Pg\n",
|
||
"Wi] Where is WAY Next Pg\n",
|
||
"\n",
|
||
"AKI\n",
|
||
"AU\n",
|
||
"\n",
|
||
"nment/Ecoli_hifi/Ecoli_hifi_genome.tsv\n",
|
||
"\n",
|
||
"Cut Text\n",
|
||
"UnCut Text\n",
|
||
"\n",
|
||
"me Cur Pos\n",
|
||
"Way To Spell\n",
|
||
"\n",
|
||
"OMB\n",
|
||
"\n",
|
||
"100 MB\n",
|
||
"\n",
|
||
"200 MB\n",
|
||
"\n",
|
||
"Chromosomes Show Normalization (Obs | Ctrl) Resolution (BP)\n",
|
||
"“aw “aw a a a — y,\n",
|
||
"2 Bp Observed Bala... None © Pivrb ttre teins\n",
|
||
"2.5MB 500KB 100KB 25KB 5KB 1KB 200BP\n",
|
||
"OMB 100 MB 200 MB 300 MB\n",
|
||
"\n",
|
||
"\n",
|
||
"Fragment\n",
|
||
"\n",
|
||
"= ————\n",
|
||
"\n",
|
||
"=. —,\n",
|
||
"> sequencing *——— a\n",
|
||
"\n",
|
||
"\n",
|
||
"@ Mainwindow\n",
|
||
"\n",
|
||
"Omeoerork oe oO =\n",
|
||
"\n",
|
||
"[Juicebox 2.17.00] Hi-C Map <9>: inter.hic\n",
|
||
"\n",
|
||
"View Bookmarks\n",
|
||
"Chromosomes\n",
|
||
"\n",
|
||
"Assembly Dev\n",
|
||
"\n",
|
||
"Normalization (Obs | Ctrl) Resolution (BP) Color Range\n",
|
||
"\n",
|
||
"F Q SBS MonNov4 20:49\n",
|
||
"\n",
|
||
"2.5MB 500KB 100KB 25KB 5KB\n",
|
||
"\n",
|
||
"156,000 KB 155,000 KB 154,000 KB 153,000 KB 152,000 KB 151,000 KB\n",
|
||
"\n",
|
||
"157,000 KB\n",
|
||
"\n",
|
||
"1:159,230,001-159,240,000\n",
|
||
"1:153,390,001-153,400,000\n",
|
||
"observed value (O) = 0.0\n",
|
||
"lexpected value (E) = 0.032\n",
|
||
"O/E =0\n",
|
||
"\n",
|
||
"LayerO <> | & |\n",
|
||
"\n",
|
||
"Show Annotation Panel\n",
|
||
"\n",
|
||
"\n",
|
||
"(mustache_aman) [papantonis1@gwdu101 aman]$ awk '$1 == $4 {print $1}' GEO2457_5kb_mustache_loops.bedpe | sort | unig -c && wc -1 GE02457_5kb_mustache_loops.bedpe\n",
|
||
"88@ chri\n",
|
||
"457 chr1e\n",
|
||
"536 chri1\n",
|
||
"542 chri2\n",
|
||
"297 chr13\n",
|
||
"306 chri4\n",
|
||
"278 chri5\n",
|
||
"173 chr16\n",
|
||
"253 chr17\n",
|
||
"244 chri8\n",
|
||
"\n",
|
||
"92 chri9\n",
|
||
"942 chr2\n",
|
||
"216 chr2e\n",
|
||
"\n",
|
||
"88 chr21\n",
|
||
"\n",
|
||
"65 chr22\n",
|
||
"804 chr3\n",
|
||
"686 chr4\n",
|
||
"663 chrd\n",
|
||
"731 chré\n",
|
||
"552 chr7\n",
|
||
"574 chr8&\n",
|
||
"402 chr9\n",
|
||
"205 chrx\n",
|
||
"\n",
|
||
"9987 GEO2457_5kb_mustache_loops.bedpe\n",
|
||
"\n",
|
||
"(mustache_aman) [papantonis1@gwdu1@1 aman]$ awk '$1 == $4 {print $1}' GE02459_5kb_mustache_loops.bedpe | sort | unig -c && wc -1 GE02459_5kb_mustache_loops.bedpe\n",
|
||
"673 chri\n",
|
||
"341 chr1e\n",
|
||
"394 chri1\n",
|
||
"433 chri2\n",
|
||
"233 chr13\n",
|
||
"254 chri4\n",
|
||
"207 chri5\n",
|
||
"108 chr16\n",
|
||
"147 chr1i7\n",
|
||
"234 chri8\n",
|
||
"\n",
|
||
"29 chri9\n",
|
||
"626 chr2\n",
|
||
"173 chr2e\n",
|
||
"\n",
|
||
"83 chr21\n",
|
||
"\n",
|
||
"30 chr22\n",
|
||
"60@ chr3\n",
|
||
"534 chr4\n",
|
||
"484 chrd5\n",
|
||
"536 chré\n",
|
||
"425 chr7\n",
|
||
"481 chr8\n",
|
||
"286 chr9\n",
|
||
"158 chrx\n",
|
||
"\n",
|
||
"7478 GEO2459_5kb_mustache_loops.bedpe\n",
|
||
"\n",
|
||
"Contact Matrices:\n",
|
||
"\n",
|
||
"Fig x: Visualization in Juicebox for two HiC datasets\n",
|
||
"\n",
|
||
"The 10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files\n",
|
||
"locally. The left panel shows the matrix from the cis-regulatory elements in Maize study and the one\n",
|
||
"on the right is from (7).The right panel is chromosome one at resolution 500 kb. The 10*10\n",
|
||
"chromosomes full contact matrix was visualized in Juicebox GUI app by importing files locally. The\n",
|
||
"10*10 chromosomes full contact matrix was visualized in Juicebox GUI app by importing files locally.\n",
|
||
"\n",
|
||
"Ice ot\n",
|
||
"‘earn ere ta rao pen 2 prema ne oe [eeremsne [seen]\n",
|
||
"\n",
|
||
"FastQC: Per Sequence GC Content\n",
|
||
"Pea Samp\n",
|
||
"\n",
|
||
"Per Base N Content [aim\n",
|
||
"\n",
|
||
"‘epocenapecttancastcan poten ren an asa\n",
|
||
"\n",
|
||
"FastQC: Per Base N Content\n",
|
||
"\n",
|
||
"‘Sequence Length Distribution [a\n",
|
||
"\n",
|
||
"Mimosa equa ci ng)\n",
|
||
"\n",
|
||
"‘Sequence Duplication Levels SE (ome)\n",
|
||
"‘eae ge yer\n",
|
||
"[eeewwres [cere]\n",
|
||
"\n",
|
||
"FastQC: Sequence Duplication Levels,\n",
|
||
"\n",
|
||
"Overrepresented sequences by sample SKIN\n",
|
||
"\n",
|
||
"‘Pett arr ctonnpeericsminceanh eaten.\n",
|
||
"\n",
|
||
"Top overrepresented sequences\n",
|
||
"\n",
|
||
"‘ie onmmteeseince sr ssarde The soe 2 trent ser cern aye noosa yr\n",
|
||
"\n",
|
||
"‘Adapter Content [ZI [ome]\n",
|
||
"\n",
|
||
"‘Peamusiepenep cathe sana yay te asa en aspen enon\n",
|
||
"[eeremsne [seen]\n",
|
||
"\n",
|
||
"FastQC: Adapter Content\n",
|
||
"\n",
|
||
"\n",
|
||
"© aman — nano ./Downloads/assignment/Ecoli_|\n",
|
||
"\n",
|
||
"fi/Ecoli_hifi_genome.gff — 208x63\n",
|
||
"\n",
|
||
"nment/Ecoli_hifi/Ecoli_hifi_genome.gff\n",
|
||
"\n",
|
||
"ile: ./Downloads/as\n",
|
||
"\n",
|
||
"Ww PICO 5.09\n",
|
||
"\n",
|
||
"i#gff-version 3\n",
|
||
"\n",
|
||
"##sequence-region tig@0000001 1\n",
|
||
"\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tige0eee0e1\n",
|
||
"tigeeeee0e1\n",
|
||
"\n",
|
||
"Wie) Get Help\n",
|
||
"Wed Exit\n",
|
||
"\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"minced:@.2.0\n",
|
||
"\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"Prodigal: 002006\n",
|
||
"\n",
|
||
"465\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"CRI\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"cDS\n",
|
||
"\n",
|
||
"7533\n",
|
||
"\n",
|
||
"99\n",
|
||
"1718\n",
|
||
"2811\n",
|
||
"5892\n",
|
||
"7393\n",
|
||
"7888\n",
|
||
"8982\n",
|
||
"9643\n",
|
||
"10258\n",
|
||
"11177\n",
|
||
"11567\n",
|
||
"12412\n",
|
||
"13701\n",
|
||
"14611\n",
|
||
"16038\n",
|
||
"16693\n",
|
||
"17210\n",
|
||
"17540\n",
|
||
"18250\n",
|
||
"18726\n",
|
||
"19756\n",
|
||
"20511\n",
|
||
"21277\n",
|
||
"22479\n",
|
||
"23565\n",
|
||
"25018\n",
|
||
"25799\n",
|
||
"26529\n",
|
||
"26863\n",
|
||
"27693\n",
|
||
"28797\n",
|
||
"29615\n",
|
||
"30377\n",
|
||
"33014\n",
|
||
"33225\n",
|
||
"33615\n",
|
||
"35767\n",
|
||
"36774\n",
|
||
"37895\n",
|
||
"38158\n",
|
||
"39034\n",
|
||
"39596\n",
|
||
"40182\n",
|
||
"40790\n",
|
||
"42619\n",
|
||
"43560\n",
|
||
"45279\n",
|
||
"45821\n",
|
||
"46585\n",
|
||
"46988\n",
|
||
"47617\n",
|
||
"49050\n",
|
||
"50764\n",
|
||
"51926\n",
|
||
"52606\n",
|
||
"54986\n",
|
||
"\n",
|
||
"SPR\n",
|
||
"\n",
|
||
"We) WriteOut\n",
|
||
"We) Justify\n",
|
||
"\n",
|
||
"1643\n",
|
||
"\n",
|
||
"2452\n",
|
||
"\n",
|
||
"5477\n",
|
||
"\n",
|
||
"7400\n",
|
||
"\n",
|
||
"7875\n",
|
||
"\n",
|
||
"8979\n",
|
||
"\n",
|
||
"9656\n",
|
||
"\n",
|
||
"10242\n",
|
||
"11175\n",
|
||
"11461\n",
|
||
"12329\n",
|
||
"13449\n",
|
||
"14609\n",
|
||
"16038\n",
|
||
"16643\n",
|
||
"17016\n",
|
||
"17521\n",
|
||
"18250\n",
|
||
"18729\n",
|
||
"19775\n",
|
||
"20517\n",
|
||
"21137\n",
|
||
"22416\n",
|
||
"23471\n",
|
||
"24929\n",
|
||
"25794\n",
|
||
"26437\n",
|
||
"26900\n",
|
||
"27696\n",
|
||
"28601\n",
|
||
"29564\n",
|
||
"30271\n",
|
||
"32938\n",
|
||
"33148\n",
|
||
"33578\n",
|
||
"35693\n",
|
||
"36777\n",
|
||
"37895\n",
|
||
"38167\n",
|
||
"39030\n",
|
||
"39384\n",
|
||
"40057\n",
|
||
"40793\n",
|
||
"42616\n",
|
||
"43542\n",
|
||
"45269\n",
|
||
"45821\n",
|
||
"46588\n",
|
||
"46995\n",
|
||
"47458\n",
|
||
"49041\n",
|
||
"50507\n",
|
||
"51777\n",
|
||
"52453\n",
|
||
"54858\n",
|
||
"56119\n",
|
||
"\n",
|
||
"tet et eteetse\n",
|
||
"\n",
|
||
"tet et etetetsei\n",
|
||
"\n",
|
||
"i\n",
|
||
"\n",
|
||
"tet etetesti\n",
|
||
"\n",
|
||
"++H1\n",
|
||
"\n",
|
||
"F\n",
|
||
"\n",
|
||
"SPBV2VVDVVVOVVO\n",
|
||
"\n",
|
||
"PBYWDVDDWDD WDD VDD DD VDD VV VDD VDDD DVD VDDVDDVDDVDVDVDVDVDVDVDVDVDVVVVVVOVOQ:\n",
|
||
"\n",
|
||
"Wii Read File\n",
|
||
"Wil) Where is\n",
|
||
"\n",
|
||
"ID=KBOCNLJJ_00001; eC_number=1.8.1.2;Name=cysI_1;db_xref=COG:C0G0155; gene=cysI_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00002; eC_number=1.8.4.8;Name=cysH_1;db_xref=COG:C0G0175; gene=cysH_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00003; eC_number=3.1.-.-—;Name=ygcB_1;db_xref=COG:C0G1203; gene=ygcB_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00004;Name=casA_1;gene=casA_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4690$\n",
|
||
"ID=KBOCNLJJ_@0005 ; Name=casB_1;gene=casB_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P7663$\n",
|
||
"ID=KBOCNLJJ_00006;Name=casC_1;gene=casC_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n",
|
||
"ID=KBOCNLJJ_@0007 ; Name=casD_1;gene=casD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n",
|
||
"ID=KBOCNLJJ_00008; eC_number=3.1. j;Name=casE_1;gene=casE_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n",
|
||
"ID=KBOCNLIJJ_00009; eC_number=3.1. j;Name=ygbT_1;db_xref=COG:C0G1518; gene=ygbT_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00010; eC_number=3.1.-.-—;Name=ygbF_1;gene=ygbF_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n",
|
||
"note=CRISPR with 13 repeat units;rpt_family=CRISPR;rpt_type=direct\n",
|
||
"\n",
|
||
"ID=KBOCNLJJ_00011;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00011;product=hypothetical protein\n",
|
||
"ID=KBOCNLJJ_00012; eC_number=2.7.7.4;Name=cysD_1;db_xref=COG:C0G0175; gene=cysD_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00013; eC_number=2.7.7.4;Name=cysN; db_xref=COG:C0G2895; gene=cysN;inference=ab initio prediction:Prodigal: 002006, simi$\n",
|
||
"ID=KBOCNLJJ_00014; eC_number=2.7.1.25;Name=cysC; db_xref=COG:C0G@529; gene=cysC;inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"ID=KBOCNLJJ_00015 ; Name=ygbE; gene=ygbE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P46141;1lo$\n",
|
||
"ID=KBOCNLJJ_00016;Name=ftsB; db_xref=COG:C0G2919; gene=ftsB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00017; eC_number=2.7.7.60;Name=ispD; db_xref=COG:C0G1211; gene=ispD;inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"ID=KBOCNLJJ_00018; eC_number=4.6.1.12;Name=ispF; db_xref=COG:C0G@245; gene=ispF;inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"ID=KBOCNLJJ_00019; eC_number=5.4.99.27;Name=truD; db_xref=COG:C0G0585; gene=truD; inference=ab initio prediction:Prodigal:002006,si$\n",
|
||
"ID=KBOCNLJJ_00020; eC_number=3.1.3.5;Name=surE; db_xref=COG:C0G0496; gene=surE;inference=ab initio prediction:Prodigal: 002006, simi$\n",
|
||
"ID=KBOCNLJJ_00021; eC_number=2.1.1.77;Name=pcm; db_xref=COG:C0G2518; gene=pcm; inference=ab initio prediction:Prodigal: 002006, simil$\n",
|
||
"ID=KBOCNLJJ_00022;Name=n1pD_1; db_xref=COG:C0G@739; gene=nlpD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
|
||
"ID=KBOCNLJJ_00023;Name=rpoS; db_xref=COG:C0G0568; gene=rpoS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00024;Name=ygbN; db_xref=COG:C0G2610;gene=ygbN; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00025; eC_number=5.3.1.35;Name=otnI; db_xref=COG:C0G3622; gene=otnI;inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"ID=KBOCNLJJ_00026; eC_number=4.1.1.104;Name=otnC;gene=otnC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_0@0027; eC_number=2.7.1.217;Name=otnK_1;db_xref=COG:C0G3395; gene=otnK_1;inference=ab initio prediction:Prodigal:00200$\n",
|
||
"ID=KBOCNLJJ_00028; eC_number=2.7.1.217;Name=otnK_2;db_xref=COG:C0G3395; gene=otnK_2;inference=ab initio prediction:Prodigal:00200$\n",
|
||
"ID=KBOCNLJJ_00029; eC_number=1.1.1.411;Name=1tnD; gene=1tnD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00030;Name=g1lcR;db_xref=COG:C0G1349; gene=glcR;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00031; eC_number=3.1.3.16;Name=pphB; db_xref=COG:C0G@639; gene=pphB; inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"ID=KBOCNLJJ_00032;Name=mutS;db_xref=COG:C0G0249; gene=mutS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00033;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00033;product=hypothetical protein\n",
|
||
"ID=KBOCNLJJ_00034;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00034;product=hypothetical protein\n",
|
||
"ID=KBOCNLJJ_00035 ; Name=fh1A; db_xref=COG:C0G3604;gene=fhlA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLIJJ_00036; eC_number=4.2.1.—;Name=hypE; db_xref=COG:C0G@309; gene=hypE;inference=ab initio prediction:Prodigal: 002006, simi$\n",
|
||
"ID=KBOCNLJJ_00037 ; Name=hypD; db_xref=COG:C0G0409; gene=hypD; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00038; Name=hypC; db_xref=COG:C0G0298; gene=hypC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLIJJ_00039 ; Name=hypB; db_xref=COG:C0G0378; gene=hypB; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_@0040 ; Name=hypA; db_xref=COG:C0G0375; gene=hypA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_@0041;Name=hycA;gene=hycA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P@AEV4; 1lo$\n",
|
||
"ID=KBOCNLJJ_00042; eC_number=1.-. j;Name=hyfA_1; db_xref=COG:C0G1142; gene=hyfA_1;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00043; eC_number=7.1.1.—;Name=ndhB_1;gene=ndhB_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n",
|
||
"ID=KBOCNLJJ_00044;Name=hycD; db_xref=COG:C0G0650;gene=hycD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_@0045 ; Name=hycE; db_xref=COG:C0G3261; gene=hycE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00046; eC_number=7.1.1.—;Name=ndhI_1;gene=ndhI_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n",
|
||
"ID=KBOCNLJJ_00047 ; Name=hycG_1; db_xref=COG:C0G3260; gene=hycG_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
|
||
"ID=KBOCNLJJ_00048;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00048;product=hypothetical protein\n",
|
||
"ID=KBOCNLJJ_0@0049; eC_number=3.4.23.51;Name=hycI ;db_xref=COG:C0G0680;gene=hycI;inference=ab initio prediction:Prodigal:002006,si$\n",
|
||
"ID=KBOCNLJJ_@0050; eC_number=3.2.1.86;Name=bg1H_1;db_xref=COG:C0G2723; gene=bg1H_1;inference=ab initio prediction:Prodigal:002006$\n",
|
||
"ID=KBOCNLJJ_00051; Name=bg1F_1;db_xref=COG:C0G1263; gene=bg1F_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n",
|
||
"ID=KBOCNLJJ_00052;Name=ascG; db_xref=COG:C0G1609; gene=ascG; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n",
|
||
"ID=KBOCNLJJ_00053; eC_number=1.-.-.-—;Name=hyfA_2;db_xref=COG:C0G1142;gene=hyfA_2;inference=ab initio prediction:Prodigal:002006,$\n",
|
||
"ID=KBOCNLJJ_00054; eC_number=6.2.-—.—;Name=hypF; db_xref=COG:C0G@068; gene=hypF;inference=ab initio prediction:Prodigal: 002006, simi$\n",
|
||
"ID=KBOCNLJJ_00055; eC_number=1.18.1.-—;Name=norw; db_xref=COG:C0G1251; gene=norW; inference=ab initio prediction:Prodigal: 002006, sim$\n",
|
||
"\n",
|
||
"bad Prev Pg Wag Cut Text wie Cur Pos\n",
|
||
"WA) Next Pg wig) UnCut Text Way To Spell\n",
|
||
"\n",
|
||
"\n",
|
||
"About Library _ Statistics\n",
|
||
"\n",
|
||
"Sequencing\n",
|
||
"\n",
|
||
"Sequenced Reads: 547812856\n",
|
||
"\n",
|
||
"Duplication and Complexity (% Sequenced Reads)\n",
|
||
"\n",
|
||
"Analysis of Unique Reads (% Sequenced Reads / % Unique Reads)\n",
|
||
"\n",
|
||
"Intra-fragment Reads: 34,307,613\n",
|
||
"\n",
|
||
"Below MAPQ Threshold: 355,354,506 (64.87% / 73.27%)\n",
|
||
"\n",
|
||
"Hi-C Contacts: 95,311,375 (17.40% / 19.65%)\n",
|
||
"3' Bias (Long Range): 97% - 3%\n",
|
||
"\n",
|
||
"Pair Type % (L-I-O-R): 25% - 25% - 25% - 25%\n",
|
||
"\n",
|
||
"Analysis of Hi-C Contacts (% Sequenced Reads / % Unique Reads)\n",
|
||
"\n",
|
||
"Inter-chromosomal: 22,194,956 (4.05% / 4.58%)\n",
|
||
"Intra-chromosomal: 73,116,419 (13.35% / 15.08%)\n",
|
||
"Long Range (>20Kb): 35,425,178 (6.47% / 7.30%)\n",
|
||
"\n",
|
||
"RUN wget https://github.com/samtools/htslib/releases/download/1.18/htslib-1.18.tar.bz2 && \\\n",
|
||
"tar -xvf htslib-1.18.tar.bz2 && \\\n",
|
||
"cd htslib-1.18 && \\\n",
|
||
"./configure --enable-libcurl && \\\n",
|
||
"make -j$(nproc) && \\\n",
|
||
"make install && \\\n",
|
||
"cd .. && rm -rf htslib-1.18*\n",
|
||
"\n",
|
||
"# User addition\n",
|
||
"\n",
|
||
"RUN useradd -m -u 1001 aman && echo 'aman:123' | chpasswd\n",
|
||
"RUN usermod —aG sudo aman\n",
|
||
"\n",
|
||
"RUN usermod -aG rstudio aman\n",
|
||
"\n",
|
||
"# persistent volumes. Use flag -v\n",
|
||
"RUN mkdir -p /home/rstudio/data\n",
|
||
"\n",
|
||
"RUN chown -R aman:aman /home/rstudio\n",
|
||
"VOLUME [\"/home/rstudio/data\"]\n",
|
||
"\n",
|
||
"[1]\n",
|
||
"\n",
|
||
"(4)\n",
|
||
"\n",
|
||
"tv)\n",
|
||
"\n",
|
||
"(4)\n",
|
||
"\n",
|
||
"print(hic.getGenomeID())\n",
|
||
"print(hic.getResolutions())\n",
|
||
"\n",
|
||
"hg19\n",
|
||
"[2500000, 1000000, 500000, 250000, 100000, 50000, 25000, 10000, 5000, 1000]\n",
|
||
"\n",
|
||
"now print out the chromosomes in this file.\n",
|
||
"\n",
|
||
"for chrom in hic.getChromosomes():\n",
|
||
"print(chrom.name, chrom. length)\n",
|
||
"\n",
|
||
"All 3098789\n",
|
||
"249250621\n",
|
||
"243199373\n",
|
||
"198022430\n",
|
||
"191154276\n",
|
||
"180915260\n",
|
||
"171115067\n",
|
||
"159138663\n",
|
||
"146364022\n",
|
||
"141213431\n",
|
||
"10 135534747\n",
|
||
"11 135006516\n",
|
||
"12 133851895\n",
|
||
"13 115169878\n",
|
||
"14 107349540\n",
|
||
"15 102531392\n",
|
||
"16 90354753\n",
|
||
"17 81195210\n",
|
||
"18 78077248\n",
|
||
"19 59128983\n",
|
||
"20 63025520\n",
|
||
"21 48129895\n",
|
||
"22 51304566\n",
|
||
"X 155270560\n",
|
||
"Y 59373566\n",
|
||
"MT 16569\n",
|
||
"\n",
|
||
"COIYAHAWNE\n",
|
||
"\n",
|
||
"@ Zed File Edit Selection View Go Window Help\n",
|
||
"\n",
|
||
"ma BmeOorrtktoewwwn F-<ase\n",
|
||
"\n",
|
||
"Tue Dec 17 16:52\n",
|
||
"\n",
|
||
"@ © @ = multiqc_datajson\n",
|
||
"\n",
|
||
"report_data_sources <>\n",
|
||
"FastQc\n",
|
||
"all_sections multiqc_data.json\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"GLDS-251_rna-seq_13JUN2017H\n",
|
||
"report_general_stats_data\n",
|
||
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
|
||
"percent_gc\n",
|
||
"avg_sequence_length\n",
|
||
"median_sequence_length\n",
|
||
"total_sequences\n",
|
||
"percent_duplicates\n",
|
||
"percent_fails\n",
|
||
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
|
||
"percent_gc\n",
|
||
"avg_sequence_length\n",
|
||
"median_sequence_length\n",
|
||
"total_sequences\n",
|
||
"percent_duplicates\n",
|
||
"percent_fails\n",
|
||
"GLDS-251_rna-seq_13JUN2017HiSeq_|\n",
|
||
"percent_gc\n",
|
||
"avg_sequence_length\n",
|
||
"median_sequence_length\n",
|
||
"total_sequences\n",
|
||
"\n",
|
||
"percent_duplicates\n",
|
||
"Filter... z\n",
|
||
"\n",
|
||
"st & v\n",
|
||
"\n",
|
||
"Click to restart and update Zed\n",
|
||
"\n",
|
||
"multiqc_data.json\n",
|
||
"\n",
|
||
"PELCSNL_LaLLsS 2 7.UFUFTUTUIVUIVIVIZ\n",
|
||
"\n",
|
||
"Bo\n",
|
||
"\n",
|
||
"Sign in\n",
|
||
"\n",
|
||
"+\n",
|
||
"\n",
|
||
"oO\n",
|
||
"\n",
|
||
"Q*® I\n",
|
||
"\n",
|
||
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_235_239_UMISS_Hoeksema_GTTTCG_L0@3_R1_001_1M\": {\n",
|
||
"\n",
|
||
"\"percent_gc\": 46.0,\n",
|
||
"\"avg_sequence_length\": 125.0,\n",
|
||
"\"median_sequence_length\": 125,\n",
|
||
"\"total_sequences\": 1000000.0,\n",
|
||
"\"percent_duplicates\": 23.347216247708587,\n",
|
||
"\"percent_fails\": 9.090909090909092\n",
|
||
"\n",
|
||
"Bo\n",
|
||
"\n",
|
||
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_120_UMISS_Hoeksema_TGACCA_L001_R1_001_1M\":\n",
|
||
"\n",
|
||
"\"percent_gc\": 49.0,\n",
|
||
"\"avg_sequence_length\": 125.0,\n",
|
||
"\"median_sequence_length\": 125,\n",
|
||
"\"total_sequences\": 1000000.0,\n",
|
||
"\"percent_duplicates\": 52.07411329479328,\n",
|
||
"\"percent_fails\": 18.181818181818183\n",
|
||
"\n",
|
||
"Bo\n",
|
||
"\n",
|
||
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_175_UMISS_Hoeksema_AGTTCC_L00Q2_R1_001_1M\":\n",
|
||
"\n",
|
||
"\"percent_gc\": 47.0,\n",
|
||
"\"avg_sequence_length\": 125.0,\n",
|
||
"\"median_sequence_length\": 125,\n",
|
||
"\"total_sequences\": 1000000.0,\n",
|
||
"\"percent_duplicates\": 30.77778969527732,\n",
|
||
"\"percent_fails\": 9.090909090909092\n",
|
||
"\n",
|
||
"Bo\n",
|
||
"\n",
|
||
"\"GLDS-251_rna-seq_13JUN2017HiSeq_Run_Sample_179_UMISS_Hoeksema_CCGTCC_LO0Q3_R1_001_1M\":\n",
|
||
"\n",
|
||
"\"percent_gc\": 45.0,\n",
|
||
"\"avg_sequence_length\": 125.0,\n",
|
||
"\"median_sequence_length\": 125,\n",
|
||
"\n",
|
||
"Nt ata enniianene \". ANNNANAAN A\n",
|
||
"\n",
|
||
"Updated to Zed 0.163.2\n",
|
||
"View the release notes\n",
|
||
"\n",
|
||
"algal JSON\n",
|
||
"\n",
|
||
"v\n",
|
||
"\n",
|
||
"aman@Laptop-von-Aman juicer_hpro % docker build -t juicer_hicpro .\n",
|
||
"\n",
|
||
"[+] Building 2.3s (16/18)\n",
|
||
"\n",
|
||
"=> [internal] load build definition from Dockerfile\n",
|
||
"\n",
|
||
"=> transferring dockerfile: 2.07kB\n",
|
||
"\n",
|
||
"[internal] load metadata for docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04\n",
|
||
"\n",
|
||
"[auth] nvidia/cuda:pull token for registry-1.docker.io\n",
|
||
"\n",
|
||
"[internal] load .dockerignore\n",
|
||
"\n",
|
||
"=> transferring context: 2B\n",
|
||
"\n",
|
||
"CANCELED [ 1/13] FROM docker.io/nvidia/cuda:11.7.1-devel—ubuntu22.04@sha256 : 18aade8cf@2eede9d4db5d8a8a73d4505bb2322e91cd54e4c601e5ae100ed691\n",
|
||
"=> resolve docker.io/nvidia/cuda:11.7.1-devel-ubuntu22.04@sha256: 18aade8c f02eede9d4db5d8a8a73d4505bb2322e91cd54e4c601e5ae100ed691\n",
|
||
"[internal] load build context\n",
|
||
"\n",
|
||
"=> transferring context: 2B\n",
|
||
"\n",
|
||
"CACHED [ 3/13] RUN locale-gen en_US.UTF-8\n",
|
||
"\n",
|
||
"CACHED [ 4/13] RUN wget https://repo.continuum.io/miniconda/Miniconda3-py37_4.8.2-Linux-x86_64.sh -O /tmp/miniconda.sh && bash /tmp/miniconda.sh -b -p /usr/local/anaconda &&\n",
|
||
"ERROR [ 5/13] COPY environment.yml /\n",
|
||
"\n",
|
||
"CACHED [ 6/13] RUN conda env create -f /environment.yml && conda clean -a\n",
|
||
"\n",
|
||
"CACHED [ 7/13] RUN cd /opt && wget https://github.com/nservant/HiC-Pro/archive/master.zip -O hicpro_latest.zip && unzip hicpro_latest.zip && cd HiC-Pro-master &&\n",
|
||
"\n",
|
||
"> CACHED [ 8/13] WORKDIR /opt\n",
|
||
"\n",
|
||
"ERROR [ 9/13] COPY install-dependencies.sh /opt/install-dependencies.sh\n",
|
||
"\n",
|
||
"> ERROR [10/13] COPY download-and-run-demo.sh /aidenlab/\n",
|
||
"\n",
|
||
"> ERROR [11/13] COPY download-demo.txt /aidenlab/\n",
|
||
"\n",
|
||
"v\n",
|
||
"\n",
|
||
"COPY install-dependencies.sh /opt/install-dependencies.sh:\n",
|
||
"\n",
|
||
"COPY download-and-run-demo.sh /aidenlab/:\n",
|
||
"\n",
|
||
"COPY download-demo.txt /aidenlab/:\n",
|
||
"\n",
|
||
"COPY install-dependencies.sh /opt/install-dependencies.sh\n",
|
||
"\n",
|
||
"COPY download-and-run-demo.sh /aidenlab/\n",
|
||
"\n",
|
||
"COPY download-demo.txt /aidenlab/\n",
|
||
"\n",
|
||
"RUN chmod +x /opt/install-dependencies.sh && /opt/install-dependencies.sh && \\\n",
|
||
"chmod +x /aidenlab/download-and-run-demo.sh && \\\n",
|
||
"\n",
|
||
"ERROR: failed to solve: failed to compute cache key: failed to calculate checksum of ref mh9tt@9a7urz4xt51386tebzw: :xdsz6f9f1g9z1t18j4ipud5@bf: \"/download-demo.txt\": not found\n",
|
||
"\n",
|
||
"View build details: docker-desktop://dashboard/build/desktop-—linux/desktop—linux/4aiwsé6vrixqnjrre@férxiuzt4\n",
|
||
"aman@Laptop-von-Aman juicer_hpro % I\n",
|
||
"\n",
|
||
"docker:desktop-—linux\n",
|
||
"\n",
|
||
"CACHED [ 2/13] RUN apt-get update && apt-get install -y build-essential wget unzip bzip2 gcc gt+ openjdk-11-jdk git curl make ca-certificates vim\n",
|
||
"\n",
|
||
"rm /tmp/minicon\n",
|
||
"\n",
|
||
"make configure pref\n",
|
||
"\n",
|
||
"Q.\n",
|
||
"-@s\n",
|
||
"-1s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"«1s\n",
|
||
"-@s\n",
|
||
"-1s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-Os\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-@s\n",
|
||
"-Os\n",
|
||
"-Os\n",
|
||
"-Os\n",
|
||
"\n",
|
||
"PBVVVWWWVVVVVVVGTGVONO\n",
|
||
"\n",
|
||
"Qs\n",
|
||
"\n",
|
||
"Last login: Wed Sep 18 15:46:07 on ttys@0ee\n",
|
||
"aman@Laptop-von-Aman ~ % ssh amnala@base.hpc.taltech.ee\n",
|
||
"amnala@base.hpc.taltech.ee's password:\n",
|
||
"\n",
|
||
"Last login: Wed Sep 18 16:49:35 2024 from 193.40.250.119\n",
|
||
"\n",
|
||
"Welcome to base.hpc.taltech.ee.\n",
|
||
"It has been freshly upgraded to Rocky 8!\n",
|
||
"\n",
|
||
"This is HPC Centre's main batch cluster.\n",
|
||
"If you run into any trouble, let us know in Teams 'HPC Support Chat' or write to us: hpcsupport@taltech.ee\n",
|
||
"\n",
|
||
"User guides: https://hpc.pages.taltech.ee/user-guides\n",
|
||
"\n",
|
||
"NEW MODULES:\n",
|
||
"\n",
|
||
"module load rocky8/all\n",
|
||
"\n",
|
||
"module load rocky8-spack/master\n",
|
||
"\n",
|
||
"module load openmpi/4.1.1-gcc-10.3.0-r8\n",
|
||
"\n",
|
||
"URGENT ==\n",
|
||
"\n",
|
||
". The module system has changed so your job submission scripts need to be changed\n",
|
||
"\n",
|
||
"-— amp*, green* and gray* modules have been replaced by rocky8* modules.\n",
|
||
"\n",
|
||
"-— most of the module names have changed, use module avail to see the available ones\n",
|
||
"\n",
|
||
"- Infiniband is not available currently, for MPI jobs use the openmpi/4.1.1-gcc-10.3.@-r8-tcp module\n",
|
||
"\n",
|
||
"2. We are missing some software currently, it will become available in the coming weeks\n",
|
||
"\n",
|
||
"3. The user-guide will be updated in the coming weeks and the example scripts and modules do not yet reflect the current module structure/naming\n",
|
||
"4. user-guides have been moved to https://docs.hpc.taltech.ee\n",
|
||
"\n",
|
||
"If you run into any trouble, let us know in Teams 'HPC Support Chat' or e-mail us: hpcsupport@taltech.ee\n",
|
||
"\n",
|
||
"[amnala@base ~]$ ls\n",
|
||
"\n",
|
||
"fruitsalad.txt fruitsalad_cleaned.txt history_aman.txt\n",
|
||
"\n",
|
||
"[amnala@base ~]$ cat history_aman.txt\n",
|
||
"14 cd smbgroup/bioinf-students/\n",
|
||
"\n",
|
||
"15 s -ltr\n",
|
||
"16 clear\n",
|
||
"17 s -ltr\n",
|
||
"\n",
|
||
"18 echo $HOME\n",
|
||
"\n",
|
||
"19 cp fruitsalad.txt $HOME\n",
|
||
"\n",
|
||
"20 cd $HOME\n",
|
||
"\n",
|
||
"21 s\n",
|
||
"\n",
|
||
"22 cat fruitsalad.txt\n",
|
||
"\n",
|
||
"23 uniq fruitsalad.txt\n",
|
||
"\n",
|
||
"24 cat fruitsalad.txt | sort | uniq -u\n",
|
||
"25 s\n",
|
||
"26 cat fruitsalad.txt\n",
|
||
"\n",
|
||
"27 cat fruitsalad.txt | sort | uniq -u > fruitsalad_cleaned.txt\n",
|
||
"\n",
|
||
"28 s\n",
|
||
"29 cat fruitsalad_cleaned.txt\n",
|
||
"3@ we -h\n",
|
||
"\n",
|
||
"31 we --help\n",
|
||
"\n",
|
||
"32 we -l1 fruitsalad_cleaned.txt\n",
|
||
"\n",
|
||
"33 cat fruitsalad_cleaned.txt\n",
|
||
"\n",
|
||
"34 history | less\n",
|
||
"\n",
|
||
"35 history | tail\n",
|
||
"\n",
|
||
"36 history\n",
|
||
"\n",
|
||
"37 history | tail -n +14 > history_aman.txt\n",
|
||
"[amnala@base ~]$\n",
|
||
"\n",
|
||
"\n",
|
||
"Genome vv Tracks ¥ Sample Info v Session v Share Bookmark Save Image Circular View v Help v\n",
|
||
"\n",
|
||
"IGV oxford_e...me.fasta tig00000002:1,989,819-1,993,234 Q 3,416 bp (Select Tracks ) (Crosshairs )(_Center Line )(TrackLabels) @ +)\n",
|
||
"1,990 kb j 1,991 kb j 1,992 kb j 1,993 kb\n",
|
||
"AQ 0 EA A MY TAY A AY a\n",
|
||
"|= SS SS en |\n",
|
||
"tnaB tnaA mnmE_1\n",
|
||
"\n",
|
||
"INSTITUTE\n",
|
||
"\n",
|
||
"Heng igv.org UCSan Diego fe BROAD\n",
|
||
"\n",
|
||
"\n",
|
||
"@FastQC Report\n",
|
||
"\n",
|
||
"Summary\n",
|
||
"\n",
|
||
"Qeasic Statistics\n",
|
||
"Ore base sequence quality\n",
|
||
"\n",
|
||
"Ober sequence quality scores\n",
|
||
"\n",
|
||
"Ober base sequence content\n",
|
||
"Qeer sequence GC content\n",
|
||
"Oeer base N content\n",
|
||
"\n",
|
||
"Q sequence Length Distribution\n",
|
||
"Qseauence Duplication Levels\n",
|
||
"Qoverrepresented sequences\n",
|
||
"Qadapter Content\n",
|
||
"\n",
|
||
"Qrxmmer Content\n",
|
||
"\n",
|
||
"Qbasic Statistics\n",
|
||
"\n",
|
||
"a\n",
|
||
"\n",
|
||
"Filename\n",
|
||
"\n",
|
||
"File type\n",
|
||
"\n",
|
||
"Encoding\n",
|
||
"\n",
|
||
"Total Sequences\n",
|
||
"\n",
|
||
"Sequences flagged as poor quality\n",
|
||
"Sequence length\n",
|
||
"\n",
|
||
"%GC\n",
|
||
"\n",
|
||
"wood_sample_3_forward_paired. fq.gz\n",
|
||
"Conventional base calls\n",
|
||
"\n",
|
||
"Sanger / Illumina 1.9\n",
|
||
"\n",
|
||
"185642\n",
|
||
"\n",
|
||
")\n",
|
||
"\n",
|
||
"30-150\n",
|
||
"\n",
|
||
"36\n",
|
||
"\n",
|
||
"@per base sequence quality\n",
|
||
"\n",
|
||
"Quality scores across all bases (Sanger / Illumina 1.9 encoding)\n",
|
||
"\n",
|
||
"40\n",
|
||
"\n",
|
||
"16\n",
|
||
"\n",
|
||
"14\n",
|
||
"12\n",
|
||
"10\n",
|
||
"\n",
|
||
"oN B&O\n",
|
||
"\n",
|
||
"12345 67 8 9 1519\n",
|
||
"\n",
|
||
"30-34 45-49 60-64 75-79 90-94 105-109 120-124 135-139 150\n",
|
||
"\n",
|
||
"@FastQC Report\n",
|
||
"\n",
|
||
"Summary\n",
|
||
"\n",
|
||
"Qbasic Statistics\n",
|
||
"\n",
|
||
"Ober base sequence quality\n",
|
||
"\n",
|
||
"Ober sequence quality scores\n",
|
||
"Ober base sequence content\n",
|
||
"OQer sequence GC content\n",
|
||
"Ober base N content\n",
|
||
"Osequence Length Distribution\n",
|
||
"Osequence Duplication Levels\n",
|
||
"Q overrepresented sequences\n",
|
||
"Qoaaapter Content\n",
|
||
"\n",
|
||
"Tue 8 Oct 2024\n",
|
||
"\n",
|
||
"5_merged_2_paired.fastq\n",
|
||
"\n",
|
||
"Oper base sequence content\n",
|
||
"\n",
|
||
"100\n",
|
||
"\n",
|
||
"90\n",
|
||
"\n",
|
||
"80\n",
|
||
"\n",
|
||
"70\n",
|
||
"\n",
|
||
"60\n",
|
||
"\n",
|
||
"50\n",
|
||
"\n",
|
||
"40\n",
|
||
"\n",
|
||
"30\n",
|
||
"\n",
|
||
"20\n",
|
||
"\n",
|
||
"10\n",
|
||
"\n",
|
||
"123456789\n",
|
||
"\n",
|
||
"Sequence content across all bases\n",
|
||
"\n",
|
||
"11 $13 15 17 19 21 23 25 27 29 31 33 35 37 39 41 43 45 47 49 51 53 55 57 59 61 63 65\n",
|
||
"Position in read (bp)\n",
|
||
"\n",
|
||
"%T\n",
|
||
"%C\n",
|
||
"\n",
|
||
"%G\n",
|
||
"\n",
|
||
"eoo <— > OQ VD G monkeytype.com Ws Search SEARXNG-NALAKATH eave @ @ ~™@®\n",
|
||
"ay & a Ab New merch store now open, including a limited edition metal keycap! monkeytype.store x\n",
|
||
"\n",
|
||
"monkeytype\n",
|
||
"\n",
|
||
"73\n",
|
||
"96%\n",
|
||
"\n",
|
||
"cautich 76 182/3/1/0 84% 30s\n",
|
||
"\n",
|
||
"GCO@Qe2® ag Avnud9g HSBTOC BD\n",
|
||
"\n",
|
||
"english\n",
|
||
"&\n",
|
||
"S Workspaces v (mi) Monkeytype | A minimalisti + Vv\n",
|
||
"0&8 S CI CQ Reset Om 100% 11:12\n",
|
||
"\n",
|
||
"Labs/Group Leaders that interest\n",
|
||
"you (up to 5)\n",
|
||
"\n",
|
||
"Labs/Group Leaders that interest\n",
|
||
"you (up to 5)\n",
|
||
"\n",
|
||
"Labs/Group Leaders that interest\n",
|
||
"you (up to 5)\n",
|
||
"\n",
|
||
"Labs/Group Leaders that interest\n",
|
||
"you (up to 5)\n",
|
||
"\n",
|
||
"Christa Buecker\n",
|
||
"\n",
|
||
"Daniel Gerlich\n",
|
||
"\n",
|
||
"Marco Hein\n",
|
||
"\n",
|
||
"Yan Ma\n",
|
||
"\n",
|
||
"@FastQC Report\n",
|
||
"\n",
|
||
"Summary\n",
|
||
"\n",
|
||
"Qeasic Statistics\n",
|
||
"Ore base sequence quality\n",
|
||
"\n",
|
||
"Ober sequence quality scores\n",
|
||
"\n",
|
||
"Ober base sequence content\n",
|
||
"Qeer sequence GC content\n",
|
||
"Oeer base N content\n",
|
||
"\n",
|
||
"Q sequence Length Distribution\n",
|
||
"Qseauence Duplication Levels\n",
|
||
"Qoverrepresented sequences\n",
|
||
"Qadapter Content\n",
|
||
"\n",
|
||
"Okmmer Content\n",
|
||
"\n",
|
||
"Qbasic Statistics\n",
|
||
"\n",
|
||
"a\n",
|
||
"\n",
|
||
"Filename\n",
|
||
"\n",
|
||
"File type\n",
|
||
"\n",
|
||
"Encoding\n",
|
||
"\n",
|
||
"Total Sequences\n",
|
||
"\n",
|
||
"Sequences flagged as poor quality\n",
|
||
"Sequence length\n",
|
||
"\n",
|
||
"%GC\n",
|
||
"\n",
|
||
"wood_sample_5_forward_paired. fq.gz\n",
|
||
"Conventional base calls\n",
|
||
"\n",
|
||
"Sanger / Illumina 1.9\n",
|
||
"\n",
|
||
"179506\n",
|
||
"\n",
|
||
")\n",
|
||
"\n",
|
||
"30-150\n",
|
||
"\n",
|
||
"37\n",
|
||
"\n",
|
||
"@per base sequence quality\n",
|
||
"\n",
|
||
"Quality scores across all bases (Sanger / Illumina 1.9 encoding)\n",
|
||
"\n",
|
||
"40\n",
|
||
"\n",
|
||
"16\n",
|
||
"\n",
|
||
"14\n",
|
||
"12\n",
|
||
"10\n",
|
||
"\n",
|
||
"oN B&O\n",
|
||
"\n",
|
||
"12345 67 8 9 1519\n",
|
||
"\n",
|
||
"30-34 45-49 60-64 75-79 90-94 105-109 120-124 135-139 150\n",
|
||
"\n",
|
||
"In [36]:\n",
|
||
"\n",
|
||
"%%sbash\n",
|
||
"\n",
|
||
"head /mnt/storage3/aman/wdbasejuicer_new/hiccups_output/postprocessed_pixels_10000.bedpe\n",
|
||
"\n",
|
||
"#chr1— x1 x2 chr2\n",
|
||
"expectedDonut expectedH\n",
|
||
"centroid2 radius\n",
|
||
"\n",
|
||
"# juicer_tools version 2.20.00\n",
|
||
"10 6090000 6100000 10\n",
|
||
"6.738838 8.369542\n",
|
||
"\n",
|
||
"6098333 6208333 7454\n",
|
||
"\n",
|
||
"10 139920000 13993000\n",
|
||
"55,255 62.0 8.725843\n",
|
||
"455184E-15 9.31793E-40\n",
|
||
"\n",
|
||
"10 76000000 76010000\n",
|
||
"55,255 57.0 9.344456\n",
|
||
"203114E-17 2.29482E-25\n",
|
||
"\n",
|
||
"10 149390000 14940000\n",
|
||
"55,255 56.0 5.521386\n",
|
||
"702141E-16 2.387457E-16\n",
|
||
"\n",
|
||
"10 136480000 13649000\n",
|
||
"55,255 56.0 5 8624353\n",
|
||
"23398E-20 1.2297154E-24\n",
|
||
"\n",
|
||
"10 148200000 14821000\n",
|
||
"55,255 55.0 78222165\n",
|
||
"\n",
|
||
"19 8.71397@5E-12 2\n",
|
||
"\n",
|
||
"10 145390000 14540000\n",
|
||
"55,255 52.0 9.858375\n",
|
||
"\n",
|
||
"17 1.6487045E-21 2\n",
|
||
"\n",
|
||
"10 143300000 14331000\n",
|
||
"55,255 48.0 7.270913\n",
|
||
"923472E-15 8.827955E-12\n",
|
||
"\n",
|
||
"yl y2 name score strand1 strand2 color observed expectedBL\n",
|
||
"expectedV fdrBL fdrDonut fdrH fdrv numCollapsed centroid1\n",
|
||
"6200000 6210000 . . : : @,255,255 69.0 7.9115663\n",
|
||
"13.515236 1.45373255E-30 5.202941E-36 3.1267008E-30 1.2960435E-19 3\n",
|
||
"0 10 139980000 139990000 : : : . 0,2\n",
|
||
"7.795326 15.521655 4.7749968 5.0803407E-25 6.842732E-30 1.4\n",
|
||
"3 139925000 139985000 10000\n",
|
||
"\n",
|
||
"10 76080000 76090000 : : : . 0,2\n",
|
||
"8.861963 11.599155 7.0608373 2.5698042E-21 1.734446E-21 4.3\n",
|
||
"6 76006666 76076666 14337\n",
|
||
"0 10 149450000 149460000 : : : . 0,2\n",
|
||
"7.006336 10.389031 11.967166 2.12049@5E-29 6.3991415E-25 1.6\n",
|
||
"4 149390000 149450000 7071\n",
|
||
"0 10 136880000 136890000 : : : . 0,2\n",
|
||
"4.0235314 9.664011 6.9882493 2.1204905E-29 8.194439E-34 2.8\n",
|
||
"7 136483571 136879285 16659\n",
|
||
"0 10 148260000 148270000 : : : . 0,2\n",
|
||
"9.238162 9.26983 14.654494 6.932115E-24 3.9216012E-20 1.4314703E-\n",
|
||
"148205000 148260000 5000\n",
|
||
"0 10 145440000 145450000 : : : . 0,2\n",
|
||
"6.957423 8.590018 6.5711 5.5672264E-14 6.6677316E-22 1.1138844E-\n",
|
||
"145395000 145450000 5000\n",
|
||
"0 10 143360000 143370000 : : : . 0,2\n",
|
||
"55802155 8.395383 12.302593 1.2983397E-18 1.498726E-22 4.4\n",
|
||
"2 143310000 143365000 5000\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"screenshots = get_screenshots(\"/Users/aman/Pictures\")\n",
|
||
"texts = extract_text(screenshots)\n",
|
||
"embeddings = create_and_index(texts)\n",
|
||
"results = query_embedding(embeddings, \"hic\")\n",
|
||
"for r in results:\n",
|
||
" print(r)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "81a8265a",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"True"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"load_dotenv()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "8e20bf7e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"messages = \"What is Hi-C and how does it work?\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "4c5ca3c7",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Retrieved from docs\n",
|
||
"[Score: 0.745] This slide explains how rapid fluctuations in population size influence the effective population size\n",
|
||
"(N-), a key parameter in population genetics. Un...\n",
|
||
"\n",
|
||
"[Score: 0.726] This slide focuses on the effect of slow fluctuations in population size on the effective\n",
|
||
"population size (V.) and emphasizes the conditions under whi...\n",
|
||
"\n",
|
||
"[Score: 0.640] Variable population size\n",
|
||
"\n",
|
||
"Beyond the Standard Neutral Model\n",
|
||
"\n",
|
||
"Slow fluctuations\n",
|
||
"in population size : = =\n",
|
||
"\n",
|
||
"4 Need:\n",
|
||
"A, 7 T << min[N, |\n",
|
||
"\n",
|
||
"...\n",
|
||
"\n",
|
||
"\n",
|
||
"llm ans\n",
|
||
"# Impact of Population Size Fluctuations on Effective Population Size\n",
|
||
"\n",
|
||
"Based on the provided documents, population size fluctuations significantly reduce the effective population size (Ne), primarily because Ne reflects the **harmonic mean** of population sizes over time rather than the arithmetic mean.\n",
|
||
"\n",
|
||
"## Key Effects:\n",
|
||
"\n",
|
||
"1. **Disproportionate Impact of Small Populations**: The harmonic mean is heavily influenced by periods of small population size. For example, if a population fluctuates between N and N/4, the effective population size becomes Ne = 2N—significantly smaller than the actual average population size [1].\n",
|
||
"\n",
|
||
"2. **Increased Coalescence**: Smaller populations have higher probabilities of coalescence, which reduces genetic diversity and lowers Ne [1].\n",
|
||
"\n",
|
||
"3. **Even Brief Reductions Matter**: Even short periods of population decline can greatly reduce Ne, emphasizing that temporary bottlenecks have lasting genetic consequences [1].\n",
|
||
"\n",
|
||
"## Rapid vs. Slow Fluctuations:\n",
|
||
"\n",
|
||
"- **Rapid fluctuations**: The harmonic mean formula accurately represents Ne, and population size changes are abrupt [1].\n",
|
||
"\n",
|
||
"- **Slow fluctuations**: When the observation time (T') is much smaller than the minimum population size (min[Nt]), the population appears relatively stable, and the harmonic mean formula may not accurately represent Ne over longer periods [2]. For the harmonic mean calculation to be meaningful, the time scale of observation must be significantly smaller than the scale of population size changes [2].\n",
|
||
"\n",
|
||
"**Bottom line**: Fluctuations in population size reduce genetic diversity by lowering Ne, affecting coalescence rates and increasing the impact of genetic drift [1].\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# do embedding search\n",
|
||
"question = \"How do population size fluctuations affect effective population size?\"\n",
|
||
"results = embeddings.search(question, 3)\n",
|
||
"context = \"\\n\\n\".join([r[\"text\"] for r in results]) # pass to llm\n",
|
||
"\n",
|
||
"# verify\n",
|
||
"print(\"Retrieved from docs\")\n",
|
||
"for r in results:\n",
|
||
" print(f\"[Score: {r['score']:.3f}] {r['text'][:150]}...\")\n",
|
||
" print()\n",
|
||
"\n",
|
||
"# send with context\n",
|
||
"response = litellm.completion(\n",
|
||
" model=\"openrouter/minimax/minimax-m2.5:free\",\n",
|
||
" messages=[\n",
|
||
" {\n",
|
||
" \"role\": \"system\",\n",
|
||
" \"content\": \"Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'.\"\n",
|
||
" },\n",
|
||
" {\n",
|
||
" \"role\": \"user\",\n",
|
||
" \"content\": f\"Context from my documents:\\n{context}\\n\\nQuestion: {question}\"\n",
|
||
" }\n",
|
||
" ]\n",
|
||
")\n",
|
||
"print(\"\\nllm ans\")\n",
|
||
"print(response.choices[0].message.content)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "base",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.12.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|