diff --git a/.env.template b/.env.template new file mode 100644 index 0000000..75cb3b0 --- /dev/null +++ b/.env.template @@ -0,0 +1,2 @@ +OPENROUTER_API_KEY= +OPENROUTER_BASE_URL = \ No newline at end of file diff --git a/.gitignore b/.gitignore index a80c48c..1eaef39 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,4 @@ Thumbs.db # Project specific data/ +.env \ No newline at end of file diff --git a/kg_ocr/__init__.py b/kg_ocr/__init__.py index 31a199f..1cb88fb 100644 --- a/kg_ocr/__init__.py +++ b/kg_ocr/__init__.py @@ -1,4 +1,5 @@ from .ocr import get_screenshots, extract_text -from .embeddings import create_and_index, query_embedding +from .embeddings import create_and_index +from .rag import retrieve, ask_wllm -__all__ = ["get_screenshots", "extract_text", "create_and_index", "query_embedding"] +__all__ = ["get_screenshots", "extract_text", "create_and_index", "retrieve", "ask_wllm"] diff --git a/kg_ocr/embeddings/__init__.py b/kg_ocr/embeddings/__init__.py index 1f0fb56..9f1382e 100644 --- a/kg_ocr/embeddings/__init__.py +++ b/kg_ocr/embeddings/__init__.py @@ -1,3 +1,3 @@ -from .indexer import create_and_index, query_embedding +from .indexer import create_and_index -__all__ = ["create_and_index", "query_embedding"] +__all__ = ["create_and_index"] diff --git a/kg_ocr/embeddings/indexer.py b/kg_ocr/embeddings/indexer.py index ea964f8..8a20037 100644 --- a/kg_ocr/embeddings/indexer.py +++ b/kg_ocr/embeddings/indexer.py @@ -15,9 +15,4 @@ def create_and_index( return embeddings -def query_embedding( - embeddings: Embeddings, query: str, limit: int = 100 -) -> list[str]: - """Search embeddings and return matching texts.""" - results = embeddings.search(query, limit) - return [r["text"] for r in results] + diff --git a/kg_ocr/rag/__init__.py b/kg_ocr/rag/__init__.py new file mode 100644 index 0000000..645b12d --- /dev/null +++ b/kg_ocr/rag/__init__.py @@ -0,0 +1,3 @@ +from .query import retrieve, ask_wllm + +__all__ = ["retrieve", "ask_wllm"] diff --git a/kg_ocr/rag/query.py b/kg_ocr/rag/query.py new file mode 100644 index 0000000..dd09627 --- /dev/null +++ b/kg_ocr/rag/query.py @@ -0,0 +1,32 @@ +from txtai.embeddings import Embeddings +from txtai import LLM +import litellm +from dotenv import load_dotenv +import os + +load_dotenv() + +def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]: + """Search embeddings and return results with scores""" + return embeddings.search(query, limit) + +def ask_wllm(embeddings: Embeddings, question: str, model: str = "openrouter/minimax/minimax-m2.5:free", limit: int = 3) -> str: + """RAG: retrieve context from embeddings, then answer with an LLM.""" + results = retrieve(embeddings, question, limit) + context = "\n\n".join([r["text"] for r in results]) + + response = litellm.completion( + model=model, + messages=[ + { + "role": "system", + "content": "Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'." + }, + { + "role": "user", + "content": f"Context from my documents:\n{context}\n\nQuestion: {question}" + } + ] + ) + + return response.choices[0].message.content \ No newline at end of file diff --git a/notebooks/01_ocr_sc.ipynb b/notebooks/01_init.ipynb similarity index 100% rename from notebooks/01_ocr_sc.ipynb rename to notebooks/01_init.ipynb diff --git a/notebooks/02_functions_legacy.ipynb b/notebooks/02_functions.ipynb similarity index 95% rename from notebooks/02_functions_legacy.ipynb rename to notebooks/02_functions.ipynb index b788d6f..057e81d 100644 --- a/notebooks/02_functions_legacy.ipynb +++ b/notebooks/02_functions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "id": "e58ed372", "metadata": {}, "outputs": [], @@ -11,34 +11,16 @@ "from pathlib import Path\n", "import pytesseract\n", "from PIL import Image\n", - "from txtai.embeddings import Embeddings" + "from txtai.embeddings import Embeddings\n", + "from txtai import LLM\n", + "import litellm\n", + "from dotenv import load_dotenv\n", + "import os" ] }, { "cell_type": "code", - "execution_count": 1, - "id": "5f2d75d9", - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'kg_scr'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkg_scr\u001b[39;00m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'kg_scr'" - ] - } - ], - "source": [ - "import kg_scr" - ] - }, - { - "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "10a7eff9", "metadata": {}, "outputs": [], @@ -89,19 +71,51 @@ "def query_embedding(embeddings: Embeddings, query: str, limit: int = 100) -> list[str]:\n", " \"\"\"Search embeddings and return matching texts\"\"\"\n", " results = embeddings.search(query, limit)\n", - " return [r[\"text\"] for r in results]" + " return [r[\"text\"] for r in results]\n", + "\n", + "def retrieve(embeddings: Embeddings, query: str, limit: int = 3) -> list[dict]:\n", + " \"\"\"Search embeddings and return results with scores\"\"\"\n", + " return embeddings.search(query, limit)\n", + "\n", + "def ask_wllm(embeddings: Embeddings, question: str, model: str = \"openrouter/minimax/minimax-m2.5:free\", limit: int = 3) -> str:\n", + " \"\"\"RAG: retrieve context from embeddings, then answer with an LLM.\"\"\"\n", + " results = retrieve(embeddings, question, limit)\n", + " context = \"\\n\\n\".join([r[\"text\"] for r in results])\n", + "\n", + " response = litellm.completion(\n", + " model=model,\n", + " messages=[\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"Answer ONLY using the provided context. Cite which parts you're drawing from. If the context doesn't cover something, say 'not in my documents'.\"\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"Context from my documents:\\n{context}\\n\\nQuestion: {question}\"\n", + " }\n", + " ]\n", + " )\n", + "\n", + " return response.choices[0].message.content" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "e73d6386", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "da81a1463d0f4d5694edbfd412e52763", + "model_id": "41d3c1089d7346229f5c9ff7b31068a1", "version_major": 2, "version_minor": 0 }, @@ -3466,76 +3480,28 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a613a361", + "execution_count": 7, + "id": "81a8265a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# # create embeddings\n", - "# embeddings = Embeddings({\n", - "# \"path\": \"sentence-transformers/all-MiniLM-L6-v2\",\n", - "# \"content\": True,\n", - "# \"graph\": True,\n", - "# \"hybrid\": True, \n", - "# \"scoring\": True\n", - "# })\n", - "\n", - "# # do indexing\n", - "# embeddings.index(txt)" + "load_dotenv()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6f94de70", - "metadata": {}, - "outputs": [], - "source": [ - "# embeddings search\n", - "print(\"%-20s %s\" % (\"Query\", \"Best Match\"))\n", - "print(\"-\" * 50)\n", - "\n", - "for query in [\"genome\"]:\n", - " results = embeddings.search(query, 100)\n", - " for r in results:\n", - " print(r[\"text\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10c81e27", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"OPENROUTER_API_KEY\"] = \"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\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e9519cf2", - "metadata": {}, - "outputs": [], - "source": [ - "from txtai import LLM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58bce2ae", - "metadata": {}, - "outputs": [], - "source": [ - "OPENROUTER_API_KEY = os.getenv(\"OPENROUTER_API_KEY\")\n", - "OPENROUTER_BASE_URL = os.getenv(\"OPENROUTER_API_BASE\", \"https://openrouter.ai/api/v1\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "8e20bf7e", "metadata": {}, "outputs": [], @@ -3545,60 +3511,57 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "41f0f066", - "metadata": {}, - "outputs": [], - "source": [ - "import litellm\n", - "\n", - "response = litellm.completion(\n", - " model=\"openrouter/minimax/minimax-m2.5:free\",\n", - " messages=[\n", - " {\"role\": \"user\", \"content\": \"How do population size fluctuations affect effective population size??\"}\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "879c7011", - "metadata": {}, - "outputs": [], - "source": [ - "# Just the answer\n", - "print(\"Answer:\", response.choices[0].message.content)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b2f7af13", - "metadata": {}, - "outputs": [], - "source": [ - "# The reasoning/thinking\n", - "print(\"Reasoning:\", response.choices[0].message.reasoning_content)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0e29bc4c", - "metadata": {}, - "outputs": [], - "source": [ - "# Token usage\n", - "print(\"Tokens used:\", response.usage.total_tokens)" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "4c5ca3c7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Retrieved from docs\n", + "[Score: 0.745] This slide explains how rapid fluctuations in population size influence the effective population size\n", + "(N-), a key parameter in population genetics. Un...\n", + "\n", + "[Score: 0.726] This slide focuses on the effect of slow fluctuations in population size on the effective\n", + "population size (V.) and emphasizes the conditions under whi...\n", + "\n", + "[Score: 0.640] Variable population size\n", + "\n", + "Beyond the Standard Neutral Model\n", + "\n", + "Slow fluctuations\n", + "in population size : = =\n", + "\n", + "4 Need:\n", + "A, 7 T << min[N, |\n", + "\n", + "...\n", + "\n", + "\n", + "llm ans\n", + "# Impact of Population Size Fluctuations on Effective Population Size\n", + "\n", + "Based on the provided documents, population size fluctuations significantly reduce the effective population size (Ne), primarily because Ne reflects the **harmonic mean** of population sizes over time rather than the arithmetic mean.\n", + "\n", + "## Key Effects:\n", + "\n", + "1. **Disproportionate Impact of Small Populations**: The harmonic mean is heavily influenced by periods of small population size. For example, if a population fluctuates between N and N/4, the effective population size becomes Ne = 2N—significantly smaller than the actual average population size [1].\n", + "\n", + "2. **Increased Coalescence**: Smaller populations have higher probabilities of coalescence, which reduces genetic diversity and lowers Ne [1].\n", + "\n", + "3. **Even Brief Reductions Matter**: Even short periods of population decline can greatly reduce Ne, emphasizing that temporary bottlenecks have lasting genetic consequences [1].\n", + "\n", + "## Rapid vs. Slow Fluctuations:\n", + "\n", + "- **Rapid fluctuations**: The harmonic mean formula accurately represents Ne, and population size changes are abrupt [1].\n", + "\n", + "- **Slow fluctuations**: When the observation time (T') is much smaller than the minimum population size (min[Nt]), the population appears relatively stable, and the harmonic mean formula may not accurately represent Ne over longer periods [2]. For the harmonic mean calculation to be meaningful, the time scale of observation must be significantly smaller than the scale of population size changes [2].\n", + "\n", + "**Bottom line**: Fluctuations in population size reduce genetic diversity by lowering Ne, affecting coalescence rates and increasing the impact of genetic drift [1].\n" + ] + } + ], "source": [ "# do embedding search\n", "question = \"How do population size fluctuations affect effective population size?\"\n", diff --git a/notebooks/03_flow.ipynb b/notebooks/03_flow.ipynb deleted file mode 100644 index e98e655..0000000 --- a/notebooks/03_flow.ipynb +++ /dev/null @@ -1,2827 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "143717cd", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/opt/homebrew/anaconda3/bin/python\n" - ] - } - ], - "source": [ - "import sys\n", - "print(sys.executable)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "508336f4", - "metadata": {}, - "outputs": [], - "source": [ - "from kg_ocr import get_screenshots, extract_text, create_and_index, query_embedding" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "11055f85", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e22406e942764e928a8bf58776e96e45", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading weights: 0%| | 0/103 [00:00 Oy and D > 0\n", - ", as intermediate-frequency alleles become more common. The slide emphasizes the challenge of\n", - "interpreting Tajima's D in bottleneck models due to these variable outcomes and underscores the\n", - "\n", - "need for careful consideration of demographic history in genetic analyses.\n", - "\n", - "@ Vivaldi\n", - "\n", - "at)\n", - "\n", - "v Speed Dial\n", - "\n", - "File Edit View Bookmarks\n", - "\n", - "S\n", - "\n", - "&% Workspaces v\n", - "\n", - "—- > a U8\n", - "\n", - "v Imported From... YY Imported From...\n", - "\n", - "© sic all pats\n", - "\n", - "Sources\n", - "\n", - "+ Add source\n", - "\n", - "Select all sources\n", - "\n", - "]\n", - "\n", - "Formulae.pdf\n", - "\n", - "Lecture O.pdf\n", - "\n", - "Lecture 1.pdf\n", - "\n", - "Lecture 1_1.pdf\n", - "\n", - "Lecture 2.pdf\n", - "\n", - "Lecture 3.pdf\n", - "\n", - "Lecture 4.pdf\n", - "\n", - "Lecture 5.pdf\n", - "\n", - "Lecture 6.pdf\n", - "\n", - "0 @ 0 ®\n", - "\n", - "Mail\n", - "\n", - "@ Cool Guides\n", - "\n", - "google.com\n", - "\n", - "Online Bewerbung QGIS API Docume...\n", - "\n", - "Oo\n", - "\n", - "Tools Window Help\n", - "\n", - "Startpage Search Results Kurs: Evolutionsgenetik de § NotebookLM\n", - "\n", - "om {fl v\n", - "\n", - "qgis - Trying to pe... New Script - Earth...\n", - "\n", - "Chat @ Refresh\n", - "\n", - "explain more about the following as per the sources\n", - "- 1. The change in allele frequency due to selection\n", - "2. The probability of fixation for an allele with\n", - "selection advantage and the time to fixation 3. The\n", - "equilibrium frequencies for both alleles in\n", - "overdominance\n", - "\n", - "Here's more information about allele frequency\n", - "change due to selection, the probability and time to\n", - "fixation of an allele with a selective advantage, and\n", - "equilibrium frequencies in overdominance, based on\n", - "the provided sources:\n", - "1. The change in allele frequency due to selection\n", - "* General principle Natural selection causes\n", - "changes in allele frequencies from one generation\n", - "to the next 1... . Fitter individuals produce more\n", - "viable offspring, influencing future generations 3.\n", - "- Fisher's theorem The fitness of the population is\n", - "\n", - "maximized 4\n", - "\n", - "14 sources\n", - "\n", - "Start typing...\n", - "\n", - "°©@\n", - "\n", - "Describe Wright-Fisher model assumptions abou >\n", - "\n", - "Pastebin.com - #1...\n", - "\n", - "So\n", - "te)\n", - "\n", - "$vertr? 6 8B &@ SB FQ\n", - "\n", - "® EvolG all pdfs - Notebook BAKSMMaTi ela dale most rel is\n", - "\n", - "Q: Search Startpage v @\n", - "TargetP 2.0-DTU... https://www.mood... OnePlus12R revie... Whois “Indian\"in.. vA\n", - "«~ Share 3 Settings CO\n", - "Studio > Note a\n", - "\n", - "Genomic Signatures of Natural Selection\n", - "\n", - "(Saved responses are view only)\n", - "\n", - "a reduction in variation [9].\n", - "2. Detecting Selection: Neutrality Tests\n", - "\n", - "- Neutrality tests compare observed data to theoretical\n", - "expectations under a neutral model (where alleles are not under\n", - "selection) to detect if selection has occurred [10].\n", - "\n", - "* Comparing multiple loci across the genome: Because selection\n", - "acts on few genes, loci under selection should be outliers when\n", - "compared to other parts of the genome [10, 11].\n", - "\n", - "* Tajima's D: This test compares two estimators of the population\n", - "mutation rate, 6 (theta). Tajima's D will be:\n", - "\n", - "* Negative when there is an excess of rare alleles, suggesting a\n", - "recent selective sweep or population expansion [5, 12].\n", - "\n", - "+ Positive when there is an excess of intermediate-frequency\n", - "alleles, suggesting balancing selection, population structure,\n", - "or a recent bottleneck [8, 9, 12].\n", - "\n", - "* Zero when a population is in equilibrium, and evolving\n", - "neutrally [12].\n", - "\n", - "* McDonald-Kreitman (MK) test: Compares the ratio of non-\n", - "synonymous (amino acid changing) to synonymous (silent)\n", - "substitutions within a species and between species. A higher ratio\n", - "between species compared to within species suggests positive\n", - "selection, and a lower ratio suggests purifying selection [13, 14].\n", - "\n", - "* Site Frequency Spectrum (SFS): The distribution of allele\n", - "frequencies in a sample. Selection leaves specific sianatures in the\n", - "\n", - "G) Convert to source\n", - "\n", - "NotebookLM can be inaccurate; please double check its responses.\n", - "\n", - "+\n", - "Q\n", - "\n", - "QC) Co reset —O—— 100 %\n", - "\n", - "c\n", - "\n", - "Mon Feb 10 16:00\n", - "\n", - "aw\n", - "\n", - "v\n", - "\n", - "O © HD\n", - "\n", - "QD\n", - "\n", - "ee\n", - "\n", - "Qu qa\n", - "\n", - "OH O®e< GO 8W OW A\n", - "\n", - "&\n", - "\n", - "16:00\n", - "\n", - "”\n", - "\n", - "@ Vivaldi\n", - "\n", - "at)\n", - "\n", - "v Speed Dial\n", - "\n", - "File Edit View Bookmarks\n", - "\n", - "S\n", - "\n", - "&% Workspaces v\n", - "\n", - "—- > a U8\n", - "\n", - "v Imported From... YY Imported From...\n", - "\n", - "© sic all pats\n", - "\n", - "Sources\n", - "\n", - "+ Add source\n", - "\n", - "Select all sources\n", - "\n", - "]\n", - "\n", - "Formulae.pdf\n", - "\n", - "Lecture O.pdf\n", - "\n", - "Lecture 1.pdf\n", - "\n", - "Lecture 1_1.pdf\n", - "\n", - "Lecture 2.pdf\n", - "\n", - "Lecture 3.pdf\n", - "\n", - "Lecture 4.pdf\n", - "\n", - "Lecture 5.pdf\n", - "\n", - "Lecture 6.pdf\n", - "\n", - "0 @ 0 ®\n", - "\n", - "Mail\n", - "\n", - "@ Cool Guides\n", - "\n", - "google.com\n", - "\n", - "Online Bewerbung QGIS API Docume...\n", - "\n", - "Oo\n", - "\n", - "Tools Window Help\n", - "\n", - "Startpage Search Results Kurs: Evolutionsgenetik de § NotebookLM\n", - "\n", - "om {fl v\n", - "\n", - "qgis - Trying to pe... New Script - Earth...\n", - "\n", - "Chat @ Refresh\n", - "\n", - "explain more about the following as per the sources\n", - "- 1. The change in allele frequency due to selection\n", - "2. The probability of fixation for an allele with\n", - "selection advantage and the time to fixation 3. The\n", - "equilibrium frequencies for both alleles in\n", - "overdominance\n", - "\n", - "Here's more information about allele frequency\n", - "change due to selection, the probability and time to\n", - "fixation of an allele with a selective advantage, and\n", - "equilibrium frequencies in overdominance, based on\n", - "the provided sources:\n", - "1. The change in allele frequency due to selection\n", - "* General principle Natural selection causes\n", - "changes in allele frequencies from one generation\n", - ".. . Fitter individuals produce more\n", - "viable offspring, influencing future generations 3.\n", - "\n", - "to the next 1\n", - "\n", - "- Fisher's theorem The fitness of the population is\n", - "\n", - "maximized 4\n", - "\n", - "14 sources\n", - "\n", - "Start typing...\n", - "\n", - "°©@\n", - "\n", - "Describe Wright-Fisher model assumptions abou >\n", - "\n", - "Pastebin.com - #1...\n", - "\n", - "So\n", - "te)\n", - "\n", - "$vertr? 6 8B &@ SB FQ\n", - "\n", - "® EvolG all pdfs - Notebook BAKSMMaTi ela dale most rel is\n", - "\n", - "Q: Search Startpage v\n", - "\n", - "TargetP 2.0-DTU... https://www.mood... OnePlus 12R revie... Who is “Indian” in ...\n", - "\n", - "«~ Share 3 Settings\n", - "\n", - "Studio > Note a\n", - "\n", - "Genomic Signatures of Natural Selection\n", - "\n", - "(Saved responses are view only)\n", - "\n", - "a reduction in variation [9].\n", - "2. Detecting Selection: Neutrality Tests\n", - "\n", - "- Neutrality tests compare observed data to theoretical\n", - "expectations under a neutral model (where alleles are not under\n", - "selection) to detect if selection has occurred [10].\n", - "\n", - "* Comparing multiple loci across the genome: Because selection\n", - "acts on few genes, loci under selection should be outliers when\n", - "compared to other parts of the genome [10, 11].\n", - "\n", - "* Tajima's D: This test compares two estimators of the population\n", - "mutation rate, 6 (theta). Tajima's D will be:\n", - "\n", - "* Negative when there is an excess of rare alleles, suggesting a\n", - "recent selective sweep or population expansion [5, 12].\n", - "\n", - "+ Positive when there is an excess of intermediate-frequency\n", - "alleles, suggesting balancing selection, population structure,\n", - "or a recent bottleneck [8, 9, 12].\n", - "\n", - "* Zero when a population is in equilibrium, and evolving\n", - "neutrally [12].\n", - "\n", - "* McDonald-Kreitman (MK) test: Compares the ratio of non-\n", - "synonymous (amino acid changing) to synonymous (silent)\n", - "substitutions within a species and between species. A higher ratio\n", - "between species compared to within species suggests positive\n", - "selection, and a lower ratio suggests purifying selection [13, 14].\n", - "\n", - "* Site Frequency Spectrum (SFS): The distribution of allele\n", - "frequencies in a sample. Selection leaves specific sianatures in the\n", - "\n", - "G) Convert to source\n", - "\n", - "NotebookLM can be inaccurate; please double check its responses.\n", - "\n", - "+\n", - "Q\n", - "\n", - "QC) Co reset —O—— 100 %\n", - "\n", - "c\n", - "\n", - "vA\n", - "\n", - "Mon Feb 10 16:00\n", - "\n", - "aw\n", - "\n", - "v\n", - "\n", - "QD\n", - "\n", - "ee\n", - "\n", - "Qu qa\n", - "\n", - "OH O®e< GO 8W OW A\n", - "\n", - "&\n", - "\n", - "16:00\n", - "\n", - "O © HD\n", - "\n", - "”\n", - "\n", - "ioh\n", - "\n", - "R\n", - "\n", - "i\n", - "\n", - "response to selection\n", - "selection intensity\n", - "\n", - "/ genetic variance\n", - "\n", - "' heritability\n", - "\n", - "Intro\n", - "\n", - "¢ Plant Morphogenesis\n", - "¢ Arabidopsis\n", - "\n", - "* Ovule development\n", - "* Kink & Bend\n", - "\n", - "Figure 1: Kink and Bend in Arabidopsis Thaliana\n", - "\n", - "Bottleneck models\n", - "\n", - "(A) (B)\n", - "\n", - "time\n", - "\n", - "population size\n", - "\n", - "Figure 5.2: Two cases in a bottleneck mode. (A) Only one ancestral line survives the\n", - "bottleneck. (B) Two or more lines survive which leads to different patterns in observed\n", - "data.\n", - "\n", - "8, 8, < Oy 8, > Ow\n", - "Tajima‘s D D<0 D>0\n", - "\n", - "It is more difficult for bottleneck modell!!\n", - "\n", - "Why is important to have an accurate demography?\n", - "\n", - "‘ol\n", - "\n", - "TY ey\n", - "\n", - "® o\n", - "position along genome\n", - "\n", - "\n", - "‘The difference between orthologs and paralogs lies in their evolutionary origin and functional\n", - "\n", - "rgence:\n", - "\n", - "1. Orthologs\n", - "\n", - "* Def\n", - "\n", - "1n: Genes that originate from a common ancestor due to a speci\n", - "\n", - "+ Function: Often retain similar functions across different species.\n", - "\n", - "+ Example: The hemoglobin gene in humans and mice is orthologous because both species\n", - "inherited it from a common ancestor.\n", - "\n", - "ints:\n", - "\n", - "Key\n", - "\n", - "V Arise from speciation events\n", - "V Found in different species\n", - "\n", - "V Generally have similar functions\n", - "\n", - "2. Paralogs\n", - "\n", - "* Def\n", - "\n", - "1n: Genes that arise from a gene duplication event within the same species.\n", - "+ Function: May evolve new or specialized functions.\n", - "\n", - "+ Example: Hemoglobin and myoglobin in humans—both originated from a gene duplication event\n", - "but evolved to serve different functions.\n", - "\n", - "Key Points:\n", - "V Arise from gene duplication events\n", - "V Found within the same species (or later diverging species)\n", - "\n", - "V Can have different functions\n", - "\n", - "Summary Table\n", - "Feature Orthologs Paralogs\n", - "Origin Speciation Gene duplication\n", - "Found in Different species ‘Same species (or later divergence)\n", - "Function Often conserved Can diverge significantly\n", - "\n", - "Example Human vs. mouse hemoglobin Human hemoglobin vs. myoglobin\n", - "\n", - "Self-fertilization TM\n", - "\n", - "Parents AA x aa Hetero- Homo-\n", - "J zygosity zygosity\n", - "\n", - "Aa «Aa\n", - "\n", - "— !~ may\n", - "\n", - "F, generation 50\n", - "\n", - "F, generation\n", - "\n", - "F, generation\n", - "\n", - "F, generation\n", - "\n", - "¢,corerion A ss\n", - "\n", - "Prof. Chns-Carolin Schon (TUM) | Plunt Brooding\n", - "\n", - "F, versus DH\n", - "\n", - "\n", - "Outcrossing — Panmixia — Hardy-Weinberg-Law TM\n", - "\n", - "In the absence of\n", - "\n", - "- selection\n", - "\n", - "- migration\n", - "\n", - "~ mutation\n", - "\n", - "we have under panmixia\n", - "\n", - "no change in gene frequencies\n", - "\n", - "EE recone\n", - "\n", - "=P, =p, -...\n", - "p=P+05H id mene\n", - "\n", - "equilibrium genotype\n", - "AA: Aa: aa=p?:2pq:q?\n", - "\n", - "after one generation!\n", - "\n", - "\n", - "Figure 1: Kink and Bend in Arabidopsis Thaliana\n", - "\n", - "\n", - "lf T’ is not significantly smaller than the fluctuation scale, the harmonic mean calculation risks\n", - "smoothing out critical periods of small population size, underestimating the true effect of genetic\n", - "drift on N.. For accurate modeling of genetic processes, T < min|[.N;] ensures that the\n", - "calculation aligns with the biological timescales of population size changes and their genetic\n", - "\n", - "consequences.\n", - "\n", - "Project 4: Phylogenetic Analysis\n", - "\n", - "Phylogenetic analysis is a crucial aspect of evolutionary biology and bioinformatics that\n", - "involves studying the evolutionary relationships among organisms. This project idea offers\n", - "opportunities for both undergraduate (UG) and postgraduate (PG) students to engage in\n", - "phylogenetic analysis, starting with constructing basic phylogenetic trees and progressing\n", - "to more complex methods.\n", - "\n", - "Bioinformatics Project Ideas — Undergraduate Level: Construct a Simple\n", - "Phylogenetic Tree\n", - "\n", - "At the undergraduate level, students can begin by constructing a basic phylogenetic tree\n", - "based on a gene or protein sequence. This project provides a foundational understanding of\n", - "phylogenetics and evolutionary relationships.\n", - "\n", - "Steps for UG Students:\n", - "\n", - "1. Gene or Protein Selection: Choose a gene or protein of interest that is well-\n", - "documented and has sequences available for multiple organisms.\n", - "\n", - "2. Sequence Alignment: Align the sequences of the chosen gene or protein using\n", - "software like ClustalW or MAFFT to identify conserved regions.\n", - "\n", - "3. Phylogenetic Tree Construction: Utilize software such as MEGA or PhyML to construct\n", - "a phylogenetic tree based on the aligned sequences. Apply methods like neighbor-\n", - "joining or maximum parsimony.\n", - "\n", - "4. Tree Visualization: Visualize the phylogenetic tree, highlighting the evolutionary\n", - "relationships among the organisms.\n", - "\n", - "5. Interpretation: Gain insights into the evolutionary history and relatedness of the\n", - "organisms based on the tree’s topology. Consider factors like branching patterns and\n", - "branch lengths.\n", - "\n", - "Postgraduate Level: Complex Phylogenetic Analyses and Co-evolutionary Patterns\n", - "\n", - "Bioinformatics Project Ideas — For postgraduate students, the project can advance to more\n", - "complex phylogenetic analyses, incorporating maximum likelihood methods and exploring\n", - "co-evolutionary patterns among genes or organisms.\n", - "\n", - "Additional Steps for PG Students:\n", - "\n", - "1. Maximum Likelihood Analysis: Learn and apply maximum likelihood methods for\n", - "phylogenetic tree reconstruction, which offer more accurate models of sequence\n", - "evolution. Software packages like RAXML or PhyML can be used.\n", - "\n", - "2. Molecular Clock Analysis: Investigate the concept of molecular clocks to estimate\n", - "divergence times between species. This involves incorporating evolutionary rates into\n", - "phylogenetic analyses.\n", - "\n", - "3. Co-evolutionary Analysis: Explore co-evolutionary patterns between genes, proteins,\n", - "or organisms using tools like Coevol or CAPS. Understand how changes in one\n", - "component correlate with changes in another.\n", - "\n", - "4. Advanced Tree Visualization: Use advanced tree visualization tools to create\n", - "informative and publication-quality figures. Highlight key evolutionary events or\n", - "relationships.\n", - "\n", - "5. Biological Interpretation: Analyze the implications of the phylogenetic findings. How\n", - "do the results contribute to our understanding of evolutionary processes, adaptations, or\n", - "co-evolutionary dynamics?\n", - "\n", - "6. Publication and Presentation: Encourage PG students to disseminate their findings\n", - "through research publications or presentations at scientific conferences, contributing to\n", - "the field of evolutionary biology and phylogenetics.\n", - "\n", - "In summary, phylogenetic analysis projects offer a captivating journey into the study of\n", - "evolutionary relationships among organisms. These projects provide valuable insights into\n", - "the evolutionary history of genes, proteins, and species, and they equip students with\n", - "essential skills in bioinformatics and computational biology. Additionally, complex\n", - "phylogenetic analyses enable postgraduate students to explore cutting-edge methods and\n", - "contribute to our understanding of co-evolutionary dynamics in biology.\n", - "\n", - "Project 5: Drug Discovery and Virtual Screening\n", - "\n", - "Drug discovery is a multidisciplinary field that combines biology, chemistry, and\n", - "computational methods to identify and design potential drug candidates. This project idea\n", - "provides opportunities for both undergraduate (UG) and postgraduate (PG) students to\n", - "explore the exciting world of drug discovery, starting with basic virtual screening\n", - "experiments and progressing to advanced structure-based drug design.\n", - "\n", - "Undergraduate Level: Basic Virtual Screening\n", - "\n", - "At the undergraduate level, students can start by learning about drug databases and\n", - "conducting basic virtual screening experiments to identify potential drug candidates. This\n", - "project offers an introduction to the concepts and tools used in drug discovery.\n", - "\n", - "Steps for UG Students:\n", - "\n", - "1. Drug Database Exploration: Familiarize yourself with drug databases like PubChem or\n", - "DrugBank. Select a target protein of interest, preferably one with known drug-binding\n", - "sites.\n", - "\n", - "2. Ligand Preparation: Retrieve ligand molecules (small compounds) from the database\n", - "that may potentially bind to your target protein. Prepare the ligands by removing any\n", - "irrelevant atoms or functional groups.\n", - "\n", - "3. Protein-Ligand Docking: Utilize software tools like AutoDock or PyRx to perform\n", - "\n", - "2-IV\n", - "\n", - "Figure 1: Kink and Bend in Arabidopsis Thaliana\n", - "\n", - "> Science. 2009 Oct 9;326(5950):289-93. doi: 10.1126/science.1181369.\n", - "\n", - "Comprehensive mapping of long-range interactions\n", - "reveals folding principles of the human genome\n", - "\n", - "Erez Lieberman-Aiden ’, Nynke L van Berkum, Louise Williams, Maxim Imakaev, Tobias Ragoczy,\n", - "Agnes Telling, Ido Amit, Bryan R Lajoie, Peter J Sabo, Michael O Dorschner, Richard Sandstrom,\n", - "Bradley Bernstein, M A Bender, Mark Groudine, Andreas Gnirke, John Stamatoyannopoulos,\n", - "Leonid A Mirny, Eric S Lander, Job Dekker\n", - "\n", - "Affiliations + expand\n", - "PMID: 19815776 PMCID: PMC2858594 DOI: 10.1126/science.1181369\n", - "\n", - "Abstract\n", - "\n", - "We describe Hi-C, a method that probes the three-dimensional architecture of whole genomes by\n", - "coupling proximity-based ligation with massively parallel sequencing. We constructed spatial\n", - "proximity maps of the human genome with Hi-C at a resolution of 1 megabase. These maps confirm\n", - "the presence of chromosome territories and the spatial proximity of small, gene-rich chromosomes.\n", - "We identified an additional level of genome organization that is characterized by the spatial\n", - "segregation of open and closed chromatin to form two genome-wide compartments. At the\n", - "megabase scale, the chromatin conformation is consistent with a fractal globule, a knot-free,\n", - "polymer conformation that enables maximally dense packing while preserving the ability to easily\n", - "fold and unfold any genomic locus. The fractal globule is distinct from the more commonly used\n", - "globular equilibrium model. Our results demonstrate the power of Hi-C to map the dynamic\n", - "conformations of whole genomes.\n", - "\n", - "Figure 4 Genetic separation between\n", - "population pairs. (a) Relative cross\n", - "coalescence rates in and out of Africa.\n", - "African-non-African pairs are shown in red,\n", - "and pairs within Africa are shown in purple.\n", - "(b) Relative cross coalescence rates between\n", - "populations outside Africa. European—East\n", - "Asian pairs are shown in blue, Asian-MXL\n", - "pairs are shown in green, and other\n", - "non-African pairs are shown in other\n", - "\n", - "colors, as indicated. The pairs that include\n", - "MXL are masked to include only the putative\n", - "Native American components. In a and b,\n", - "the most recent population separations\n", - "\n", - "are inferred from eight haplotypes, that is,\n", - "four haplotypes from each population, and\n", - "corresponding pairs are indicated by a\n", - "\n", - "cross. (c) Comparison of the African—non-\n", - "African split with simulations of clean splits.\n", - "We simulated three scenarios, at split times\n", - "50,000, 100,000 and 150,000 years ago.\n", - "The comparison demonstrates that the history\n", - "of relative cross coalescence rate between\n", - "African and non-African ancestors\n", - "\n", - "is incompatible with a clean split model\n", - "\n", - "and suggests it progressively decreased from\n", - "\n", - "Relative cross coalescence rate\n", - "\n", - "Relative cross coalescence rate ©\n", - "\n", - "0.8\n", - "\n", - "0.6\n", - "\n", - "O4\n", - "\n", - "0.2\n", - "\n", - "— MXL-YRI\n", - "— CEU-YRI\n", - "— CHB-YRI\n", - "— CEU-MKK\n", - "— CEU-LWK\n", - "~ YRI-MKKT\n", - "= LWK-MKKt\n", - "= YRI-LWkt\n", - "\n", - "10°\n", - "\n", - "Time (years ago)\n", - "\n", - "100\n", - "\n", - "Time (x1 o years ago)\n", - "\n", - "150\n", - "\n", - "a\n", - "fs 1.0\n", - "2\n", - "2 08 — CHB-CEU\n", - "8 ~ MXL-CEU\n", - "8 0.6 — CHB-MXL\n", - "8 — GIH-MXL\n", - "8 04 = CHB-GIH\n", - "3 — GIH-CEut\n", - "2 02 - CHB-UPT!\n", - "= CEU-TSI\n", - "2 o CEU-TSI\n", - "10°\n", - "200\n", - "® 100\n", - "~ CEU-YRI Fy\n", - "~ 50,000 years ago, %b 50\n", - "simulation XK\n", - "— 100,000 years ago, 3 20\n", - "simulation E\n", - "= 150,000 years ago, 10\n", - "\n", - "200\n", - "\n", - "simulation\n", - "\n", - "250\n", - "\n", - "beyond 150,000 years ago to approximately 50,000 years ago. (d) Schematic of population separations. Timings of splits, population separations,\n", - "gene flow and bottleneck are shown along a logarithmic axis of time.\n", - "\n", - "\n", - "© aman — nano ./Downloads/assignment/Ecoli_|\n", - "\n", - "fi/Ecoli_hifi_genome.gff — 208x63\n", - "\n", - "nment/Ecoli_hifi/Ecoli_hifi_genome.gff\n", - "\n", - "ile: ./Downloads/as\n", - "\n", - "Ww PICO 5.09\n", - "\n", - "i#gff-version 3\n", - "\n", - "##sequence-region tig@0000001 1\n", - "\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tige0eee0e1\n", - "tigeeeee0e1\n", - "\n", - "Wie) Get Help\n", - "Wed Exit\n", - "\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "minced:@.2.0\n", - "\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "Prodigal: 002006\n", - "\n", - "465\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "CRI\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "cDS\n", - "\n", - "7533\n", - "\n", - "99\n", - "1718\n", - "2811\n", - "5892\n", - "7393\n", - "7888\n", - "8982\n", - "9643\n", - "10258\n", - "11177\n", - "11567\n", - "12412\n", - "13701\n", - "14611\n", - "16038\n", - "16693\n", - "17210\n", - "17540\n", - "18250\n", - "18726\n", - "19756\n", - "20511\n", - "21277\n", - "22479\n", - "23565\n", - "25018\n", - "25799\n", - "26529\n", - "26863\n", - "27693\n", - "28797\n", - "29615\n", - "30377\n", - "33014\n", - "33225\n", - "33615\n", - "35767\n", - "36774\n", - "37895\n", - "38158\n", - "39034\n", - "39596\n", - "40182\n", - "40790\n", - "42619\n", - "43560\n", - "45279\n", - "45821\n", - "46585\n", - "46988\n", - "47617\n", - "49050\n", - "50764\n", - "51926\n", - "52606\n", - "54986\n", - "\n", - "SPR\n", - "\n", - "We) WriteOut\n", - "We) Justify\n", - "\n", - "1643\n", - "\n", - "2452\n", - "\n", - "5477\n", - "\n", - "7400\n", - "\n", - "7875\n", - "\n", - "8979\n", - "\n", - "9656\n", - "\n", - "10242\n", - "11175\n", - "11461\n", - "12329\n", - "13449\n", - "14609\n", - "16038\n", - "16643\n", - "17016\n", - "17521\n", - "18250\n", - "18729\n", - "19775\n", - "20517\n", - "21137\n", - "22416\n", - "23471\n", - "24929\n", - "25794\n", - "26437\n", - "26900\n", - "27696\n", - "28601\n", - "29564\n", - "30271\n", - "32938\n", - "33148\n", - "33578\n", - "35693\n", - "36777\n", - "37895\n", - "38167\n", - "39030\n", - "39384\n", - "40057\n", - "40793\n", - "42616\n", - "43542\n", - "45269\n", - "45821\n", - "46588\n", - "46995\n", - "47458\n", - "49041\n", - "50507\n", - "51777\n", - "52453\n", - "54858\n", - "56119\n", - "\n", - "tet et eteetse\n", - "\n", - "tet et etetetsei\n", - "\n", - "i\n", - "\n", - "tet etetesti\n", - "\n", - "++H1\n", - "\n", - "F\n", - "\n", - "SPBV2VVDVVVOVVO\n", - "\n", - "PBYWDVDDWDD WDD VDD DD VDD VV VDD VDDD DVD VDDVDDVDDVDVDVDVDVDVDVDVDVDVVVVVVOVOQ:\n", - "\n", - "Wii Read File\n", - "Wil) Where is\n", - "\n", - "ID=KBOCNLJJ_00001; eC_number=1.8.1.2;Name=cysI_1;db_xref=COG:C0G0155; gene=cysI_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00002; eC_number=1.8.4.8;Name=cysH_1;db_xref=COG:C0G0175; gene=cysH_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00003; eC_number=3.1.-.-—;Name=ygcB_1;db_xref=COG:C0G1203; gene=ygcB_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00004;Name=casA_1;gene=casA_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4690$\n", - "ID=KBOCNLJJ_@0005 ; Name=casB_1;gene=casB_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P7663$\n", - "ID=KBOCNLJJ_00006;Name=casC_1;gene=casC_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n", - "ID=KBOCNLJJ_@0007 ; Name=casD_1;gene=casD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:Q4689$\n", - "ID=KBOCNLJJ_00008; eC_number=3.1. j;Name=casE_1;gene=casE_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n", - "ID=KBOCNLIJJ_00009; eC_number=3.1. j;Name=ygbT_1;db_xref=COG:C0G1518; gene=ygbT_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00010; eC_number=3.1.-.-—;Name=ygbF_1;gene=ygbF_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequen$\n", - "note=CRISPR with 13 repeat units;rpt_family=CRISPR;rpt_type=direct\n", - "\n", - "ID=KBOCNLJJ_00011;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00011;product=hypothetical protein\n", - "ID=KBOCNLJJ_00012; eC_number=2.7.7.4;Name=cysD_1;db_xref=COG:C0G0175; gene=cysD_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00013; eC_number=2.7.7.4;Name=cysN; db_xref=COG:C0G2895; gene=cysN;inference=ab initio prediction:Prodigal: 002006, simi$\n", - "ID=KBOCNLJJ_00014; eC_number=2.7.1.25;Name=cysC; db_xref=COG:C0G@529; gene=cysC;inference=ab initio prediction:Prodigal: 002006, sim$\n", - "ID=KBOCNLJJ_00015 ; Name=ygbE; gene=ygbE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P46141;1lo$\n", - "ID=KBOCNLJJ_00016;Name=ftsB; db_xref=COG:C0G2919; gene=ftsB;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00017; eC_number=2.7.7.60;Name=ispD; db_xref=COG:C0G1211; gene=ispD;inference=ab initio prediction:Prodigal: 002006, sim$\n", - "ID=KBOCNLJJ_00018; eC_number=4.6.1.12;Name=ispF; db_xref=COG:C0G@245; gene=ispF;inference=ab initio prediction:Prodigal: 002006, sim$\n", - "ID=KBOCNLJJ_00019; eC_number=5.4.99.27;Name=truD; db_xref=COG:C0G0585; gene=truD; inference=ab initio prediction:Prodigal:002006,si$\n", - "ID=KBOCNLJJ_00020; eC_number=3.1.3.5;Name=surE; db_xref=COG:C0G0496; gene=surE;inference=ab initio prediction:Prodigal: 002006, simi$\n", - "ID=KBOCNLJJ_00021; eC_number=2.1.1.77;Name=pcm; db_xref=COG:C0G2518; gene=pcm; inference=ab initio prediction:Prodigal: 002006, simil$\n", - "ID=KBOCNLJJ_00022;Name=n1pD_1; db_xref=COG:C0G@739; gene=nlpD_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n", - "ID=KBOCNLJJ_00023;Name=rpoS; db_xref=COG:C0G0568; gene=rpoS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00024;Name=ygbN; db_xref=COG:C0G2610;gene=ygbN; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00025; eC_number=5.3.1.35;Name=otnI; db_xref=COG:C0G3622; gene=otnI;inference=ab initio prediction:Prodigal: 002006, sim$\n", - "ID=KBOCNLJJ_00026; eC_number=4.1.1.104;Name=otnC;gene=otnC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_0@0027; eC_number=2.7.1.217;Name=otnK_1;db_xref=COG:C0G3395; gene=otnK_1;inference=ab initio prediction:Prodigal:00200$\n", - "ID=KBOCNLJJ_00028; eC_number=2.7.1.217;Name=otnK_2;db_xref=COG:C0G3395; gene=otnK_2;inference=ab initio prediction:Prodigal:00200$\n", - "ID=KBOCNLJJ_00029; eC_number=1.1.1.411;Name=1tnD; gene=1tnD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00030;Name=g1lcR;db_xref=COG:C0G1349; gene=glcR;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00031; eC_number=3.1.3.16;Name=pphB; db_xref=COG:C0G@639; gene=pphB; inference=ab initio prediction:Prodigal: 002006, sim$\n", - "ID=KBOCNLJJ_00032;Name=mutS;db_xref=COG:C0G0249; gene=mutS;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00033;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00033;product=hypothetical protein\n", - "ID=KBOCNLJJ_00034;inference=ab initio prediction: Prodigal : 002006; locus_tag=KBOCNLJJ_00034;product=hypothetical protein\n", - "ID=KBOCNLJJ_00035 ; Name=fh1A; db_xref=COG:C0G3604;gene=fhlA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLIJJ_00036; eC_number=4.2.1.—;Name=hypE; db_xref=COG:C0G@309; gene=hypE;inference=ab initio prediction:Prodigal: 002006, simi$\n", - "ID=KBOCNLJJ_00037 ; Name=hypD; db_xref=COG:C0G0409; gene=hypD; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00038; Name=hypC; db_xref=COG:C0G0298; gene=hypC;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLIJJ_00039 ; Name=hypB; db_xref=COG:C0G0378; gene=hypB; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_@0040 ; Name=hypA; db_xref=COG:C0G0375; gene=hypA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_@0041;Name=hycA;gene=hycA;inference=ab initio prediction:Prodigal:002006,similar to AA sequence:UniProtKB:P@AEV4; 1lo$\n", - "ID=KBOCNLJJ_00042; eC_number=1.-. j;Name=hyfA_1; db_xref=COG:C0G1142; gene=hyfA_1;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00043; eC_number=7.1.1.—;Name=ndhB_1;gene=ndhB_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n", - "ID=KBOCNLJJ_00044;Name=hycD; db_xref=COG:C0G0650;gene=hycD;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_@0045 ; Name=hycE; db_xref=COG:C0G3261; gene=hycE;inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00046; eC_number=7.1.1.—;Name=ndhI_1;gene=ndhI_1;inference=ab initio prediction:Prodigal:002006,protein motif :HAMAP:$\n", - "ID=KBOCNLJJ_00047 ; Name=hycG_1; db_xref=COG:C0G3260; gene=hycG_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n", - "ID=KBOCNLJJ_00048;inference=ab initio prediction: Prodigal: 002006; locus_tag=KBOCNLJJ_00048;product=hypothetical protein\n", - "ID=KBOCNLJJ_0@0049; eC_number=3.4.23.51;Name=hycI ;db_xref=COG:C0G0680;gene=hycI;inference=ab initio prediction:Prodigal:002006,si$\n", - "ID=KBOCNLJJ_@0050; eC_number=3.2.1.86;Name=bg1H_1;db_xref=COG:C0G2723; gene=bg1H_1;inference=ab initio prediction:Prodigal:002006$\n", - "ID=KBOCNLJJ_00051; Name=bg1F_1;db_xref=COG:C0G1263; gene=bg1F_1;inference=ab initio prediction:Prodigal:002006,similar to AA sequ$\n", - "ID=KBOCNLJJ_00052;Name=ascG; db_xref=COG:C0G1609; gene=ascG; inference=ab initio prediction:Prodigal:002006,similar to AA sequence$\n", - "ID=KBOCNLJJ_00053; eC_number=1.-.-.-—;Name=hyfA_2;db_xref=COG:C0G1142;gene=hyfA_2;inference=ab initio prediction:Prodigal:002006,$\n", - "ID=KBOCNLJJ_00054; eC_number=6.2.-—.—;Name=hypF; db_xref=COG:C0G@068; gene=hypF;inference=ab initio prediction:Prodigal: 002006, simi$\n", - "ID=KBOCNLJJ_00055; eC_number=1.18.1.-—;Name=norw; db_xref=COG:C0G1251; gene=norW; inference=ab initio prediction:Prodigal: 002006, sim$\n", - "\n", - "bad Prev Pg Wag Cut Text wie Cur Pos\n", - "WA) Next Pg wig) UnCut Text Way To Spell\n", - "\n", - "\n", - "tion divergence as a function of divergence time\n", - "\n", - "bt (generators\n", - "\n", - "Ot (gene ations\n", - "\n", - "\n", - "© Pupiisn\n", - "\n", - "10-\n", - "\n", - "group\n", - "° 1G\n", - "\n", - "BOUBUBA %LZ 'ZOd\n", - "\n", - "\n", - "Figure 1 MSMC locally infers branch lengths a Recombination\n", - "\n", - "and coalescence times from observed\n", - "\n", - "mutations. (a) Schematic of the model. Total branch length T Time\n", - "Local genealogies change along the sequences (past)\n", - "by recombination events that rejoin branches of First coalescence t\n", - "\n", - "the tree, according to the SMC’ model®®. (hidden state) %\n", - "The pattern of mutations depends on the %\n", - "\n", - "genealogy, with few mutations on branches % a SS\n", - "with recent coalescences and more mutations\n", - "in deeper branches. The hidden states of the\n", - "model are the time to the first coalescence and\n", - "the identity of the two sequences participating\n", - "in the first coalescence. (b) MSMC can locally\n", - "infer its hidden states, shown by the posterior\n", - "probability with color. In black, we plot the\n", - "first coalescence time as generated by the\n", - "simulation. This local inference works well\n", - "\n", - "for two, four and eight haplotypes. As more 300\n", - "haplotypes are used, the typical time to the Position (kb)\n", - "first coalescence event decreases, whereas the 4 haplotypes\n", - "typical segment length increases.\n", - "\n", - "cs\n", - "\n", - "Log\n", - "\n", - "First coalescence fy...\n", - "\n", - "of the sample size (M), = 2/(M(M — 1)), in\n", - "units of 2No generations (Fig. 1b and Online\n", - "Methods), where No is the long-term average\n", - "effective population size. Here we demonstrate\n", - "\n", - "0 200 400 600 800 1,000 1,200 1,400\n", - "application of our model on up to 8 haplotypes, Position (kb)\n", - "which allows us to study changes in popula- 8 haplotypes\n", - "tion size occurring as recently as 70 genera- 0.15\n", - "tions ago. As a special case of MSMC for two\n", - "haplotypes, we provide a new implementation\n", - "of PSMC that we call PSMC’ because it uses\n", - "the SMC’ model, which accounts for recombi-\n", - "nation events between segments with the same\n", - "time to coalescence®. PSMC’ accurately esti- 500 1,000 1,500 2,000 2,500\n", - "mates the recombination rate (Supplementary Position (kb)\n", - "Fig. 1), which is not the case for PSMC’.\n", - "\n", - "First coalescence tj,.\n", - "\n", - "S\n", - "o\n", - "\n", - "0.05\n", - "\n", - "First coalescence tj...\n", - "\n", - "Ayiqeqosd 10N0}s0q\n", - "\n", - "What excites you about doing science?\n", - "\n", - "What excites you about doing science?\n", - "do you have? Please describe a past ex\n", - "your drive for scientific inquiry. {max 3C\n", - "\n", - "COURSEWORK 4 6\n", - "DL: check the Moodle athe\n", - "\n", - "7 >\n", - "\n", - "Towards complete and error-free genome assemblies of\n", - "\n", - "all vertebrate species\n", - "1.Pick one of main themes . t Pp\n", - "\n", - "MORE VIDEOS\n", - "[BREE TA\n", - "\n", - "TECH\n", - "Pm i) 35:57/37:42 © @ & Voulube ++\n", - "\n", - "\n", - "Reference genome $$ eee eC CCC OOOO OCT OO a\n", - "\n", - "= Sa « al\n", - "’ ’\n", - "nn | Pullup (or grastso dactang\n", - "\n", - ">\n", - "-\n", - "\n", - "*\n", - "\n", - "—\n", - "- : =\n", - ". -\n", - "*\n", - "ee\n", - "\n", - "Effects of Population Size on Genetic Diversity Metrics\n", - "\n", - "“To understand how population growth, dactne, and stability influence genetic diversity, we consider\n", - "ther effects on tres kay mats:\n", - "\n", - "+x (Theta n): The average numberof pairwise cifferences between sequences.\n", - "+ 8W (Theta Watterson}: A measure based on the number af segregating sites.\n", - "‘+ Tojima's D: A statistical test that compares 8 and 6W to detect deviations from neutral\n", - "\n", - "evolution,\n", - "\n", - "“These simplified scenarios illustrate how population size changes impact genetic variation.\n", - "\n", - "Scenario 1: Population Growth\n", - "\n", - "Description:\n", - "‘When @ population expands rapidly, many rare alleles appear due to the racent increase in inevicuals.\n", - "\n", - "‘Assumptions (Hypothetical Values):\n", - "\n", - "+ on\n", - "\n", - "(Pairwise citferences are low since most sequences are very similar due tothe\n", - "expansion)\n", - "\n", - "+ 8W-=4 (More segregating sites appear due to expansion)\n", - "+ Tajima’sb Calculation:\n", - "\n", - "O, = Ow 2-4\n", - "” Sandard deviation 1\n", - "\n", - "Since 6r < 8W, Tajma’s Dis negative.\n", - "\n", - "‘conclusion:\n", - "Population growth results in 6x < OW and Tajima’s D <0, indicating an excoss of rare variants\n", - "\n", - "Scenario 2: Population Deciit\n", - "\n", - "Description:\n", - "\n", - "e (Bottleneck)\n", - "\n", - "‘A population experiences a drastic reduction in size, leacing to the loss of rare alleles and an\n", - "‘overrpresentation of comman ones.\n", - "\n", - "‘Assumptions (Hypothetical Values):\n", - "+ on\n", - "\n", - "(Pairwise citferonces are higher because the remaining sequences are more divergent)\n", - "+ eW=4 (Fewer segregating sites due tothe bottleneck)\n", - "+ Tajima’sb Calculation:\n", - "\n", - "0, = Ow. on4\n", - "~ Wandard deviation ~ 1\n", - "\n", - "Since 6x > BW, Tajma’s Dis postive\n", - "\n", - "D =2\n", - "\n", - "‘conclusion:\n", - "\n", - "Population dectne results in @x > BW and Taima's D > 0, suggesting a loss of rae aloes.\n", - "\n", - "Scenario 3: Constant Population Size\n", - "\n", - "Description:\n", - "[A population remains stable ver time, with alle frequencies evolving neutral\n", - "\n", - "‘Assumptions (Hypothetical Values):\n", - "+ on\n", - "\n", - "(Pairwise citferances match the expected diversity level)\n", - "+ @W=5 (Segregating sites align with a stable population)\n", - "+ Tajima’sb Calculation:\n", - "\n", - "0, — Ow\n", - "Randard deviation 1\n", - "\n", - "Since 6x = BW, Tajima’s Dis zor,\n", - "\n", - "D\n", - "\n", - "‘conclusion:\n", - "\n", - "[A stable population results in 8x = 8W and Taima’s D = 0, indicating neutral evelution.\n", - "\n", - "Summary\n", - "\n", - "‘Changes in population size affect genetic variation in distinct ways:\n", - "+ Population Growth > More rave alleles > Negative Tajima’sD.\n", - "+ Population Decline > Fewer rare alleles > Positive Tajima's .\n", - "\n", - "+ Stable Population > Balanced allele frequencies > Tajima’s D= 0.\n", - "\n", - "‘These tends help researchers infer historical der raphic changes in populations from genetic data,\n", - "\n", - "Fragment\n", - "\n", - "= ————\n", - "\n", - "=. —,\n", - "> sequencing *——— a\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "This slide focuses on the effect of slow fluctuations in population size on the effective\n", - "population size (V.) and emphasizes the conditions under which the harmonic mean formula for\n", - "N- applies. Slow fluctuations occur when the time period of interest (Z’) is much shorter than the\n", - "minimum population size (min[N;]) across the fluctuation cycle. In such cases, the population\n", - "\n", - "-1\n", - "size appears relatively stable, and the harmonic mean formula (N. = (4 wh x) ) may not\n", - "\n", - "accurately represent the effective population size over longer periods. The diagram illustrates that\n", - "during slow fluctuations, the coalescent events occur more gradually, and population size changes\n", - "are less abrupt compared to rapid fluctuations. The key message is that for the harmonic mean\n", - "calculation to be meaningful, the time scale of observation (Z') must be significantly smaller than\n", - "the scale of population size changes, ensuring accurate modeling of genetic drift and coalescence\n", - "\n", - "processes over generations.\n", - "\n", - "= F, compares the average expected heterozygosity of\n", - "individual subpopulations (S) to the total expected\n", - "heterozygosity if the subpopulations are combined (T).\n", - "\n", - "py, = n= Hs) -\\-(%)\n", - "H, H,\n", - "\n", - "@ Safari File Edit\n", - "\n", - "M- <\n", - "\n", - "Paraphraser\n", - "\n", - "x\n", - "\n", - "Grammar\n", - "Checker\n", - "\n", - "ws\n", - "Al Detector\n", - "Q@\n", - "\n", - "Plagiarism\n", - "Checker\n", - "\n", - "@\n", - "Al\n", - "Humanizer\n", - "\n", - "ie)\n", - "Al Chat\n", - "\n", - "cs\n", - "\n", - "Al lmage\n", - "Generator\n", - "\n", - "Summarizer\n", - "\n", - "MA\n", - "\n", - "Translate\n", - "\n", - "99\n", - "\n", - "Citation\n", - "Generator\n", - "\n", - "eG\n", - "\n", - "QuillBot\n", - "Flow\n", - "\n", - "a\n", - "a\n", - "QuillBot for\n", - "macOS\n", - "\n", - "View History Bookmarks Window’ Help\n", - "\n", - "118\n", - "\n", - "© ® ec\n", - "\n", - "nH Al Detector - QuillBot Al oe G\n", - "\n", - "quillbot.com\n", - "\n", - "@&@ GO BO ¥ 3 ©\n", - "\n", - "Gea ©\n", - "\n", - "(4) Perfect your writing in all your favorite apps with QuillBot for macOS\n", - "\n", - "Al Detector\n", - "\n", - "English French Spanish German Ally\n", - "\n", - "factors. Also some of the differential genes were associated with compartment switches too, W\n", - "especially upregulated ones, but these were not statistically significant. It was seen that\n", - "upregulated genes had more significant structural links as compared to the downregulated\n", - "genes. Although the smaller number of downregulated genes may reduce statistical power,\n", - "\n", - "the consistent lack of enrichment across architectural levels suggests that their regulation is\n", - "\n", - "less connected to architecture reorganization.\n", - "\n", - "Taken together, the transcriptional changes in the PRC2 mutant are linked to regions\n", - "undergoing architectural reorganisation in the form of loops, weak insulation, and\n", - "compartment switches. It was also noted that not all architectural changes connected to\n", - "transcriptional changes, and not all DEGs aligned with structural reorganization, implying\n", - "presence of additional regulatory layers. Chromatin architecture provides a necessary\n", - "framework for gene regulation, but it may not be sufficient on its own.\n", - "\n", - "with many being linked to upregulated genes. These results indicate that the effect of PRC2\n", - "\n", - "loss on transcription is not restricted to newly formed contacts but extends across different\n", - "\n", - "categories of loop stability. Moreover, genes were often contacted by multiple loops, in some\n", - "cases over ten, pointing to a high degree of regulatory connectivity. The reason for this\n", - "multiplicity or redundancy was not explored in terms of log fold change. Some genes had\n", - "\n", - "oD\n", - "\n", - "2,909 Words @ Analysis complete\n", - "\n", - "Want your text to sound more authentic?\n", - "\n", - "Model Version: v5.7.1\n", - "\n", - "2%\n", - "\n", - "of text is likely Al ©\n", - "© QuillBot\n", - "\n", - "Al\n", - "\n", - "Al-generated @\n", - "Al-generated & Al-refined @\n", - "Human-written & Al-refined @\n", - "\n", - "Human-written @\n", - "\n", - "¥Y Understanding your results\n", - "\n", - "Human\n", - "\n", - "< Share\n", - "\n", - "@ Tue 14. Oct 22:33\n", - "\n", - "a\n", - "\n", - "&} Apps and Extensio...\n", - "\n", - "& Download =\n", - "\n", - "Feedback\n", - "\n", - "D\n", - "\n", - "History\n", - "\n", - "oO 22%\n", - "0%\n", - "0%\n", - "\n", - "98%\n", - "\n", - "Refine with Paraphra\n", - "\n", - "v\n", - "\n", - "ae (eG\n", - "\n", - "\n", - "Figure 2 Testing MSMC on simulated data. a b ~ 10,000 years ago. Sees\n", - "\n", - "(a) To test the resolution of MSMC applied to = Simulation — 40,000 years ago, 8 haplotypes\n", - "two, four and eight haplotypes, we simulated — 2 haplotypes +++» 100,000 years ago, simulation\n", - "a series of exponential population growths and — 4 haplotypes — 100,000 years ago, 4 haplotypes\n", - "\n", - "— 100,000 years ago, 8 haplotypes\n", - "\n", - "— 8 haplotypes\n", - "\n", - "declines, each changing the population size by\n", - "a factor of ten. MSMC recovers the resulting\n", - "zigzag pattern (on a double-logarithmic plot)\n", - "in different times, depending on the number\n", - "of haplotypes. With two haplotypes, MSMC\n", - "infers the population history from 40,000 to\n", - "\n", - "3 million years ago, whereas, with four and\n", - "eight haplotypes, it infers the population\n", - "history from 8,000 to 30,000 years ago ra 7 ;\n", - "and from 2,000 to 50,000 years ago, 10 10 10 10 10 10\n", - "respectively. (b) Model estimates from two Time (years ago) Time (years ago)\n", - "\n", - "simulated population splits 10,000 and 100,000 years ago. The dotted lines plot the expected relative cross coalescence rate between the two\n", - "populations before and after the splits. Maximum-likelihood estimates are shown in red (four haplotypes) and purple (eight haplotypes). As expected,\n", - "four haplotypes yield good estimates for the older split, whereas eight haplotypes give better estimates for the more recent split.\n", - "\n", - "ond\n", - "o\n", - "\n", - "°\n", - "©\n", - "\n", - "10°\n", - "\n", - "°\n", - "b\n", - "\n", - "10*\n", - "\n", - "Effective population size\n", - "°\n", - "ny\n", - "\n", - "Relative cross coalescence rate\n", - "o\n", - "o\n", - "\n", - "°\n", - "\n", - "\n", - "eco (ff = > OQ VD BG monkeytype.com Ws. Search SEARXNG-NALAKATH eo @°\n", - "\n", - "New merch store now open, including a limited edition metal keycap! monkeytype.store x\n", - "\n", - "70\n", - "94%\n", - "\n", - "cautich 80 176/7/86/0 82% 30s\n", - "\n", - "english\n", - "\n", - "GO @QZx,gQvnvuds» HM BOC BD.\n", - "\n", - "&\n", - "\n", - "Workspaces v <_|/txtai: @ All-in-one a Examples - txtai () txtai/examples/13_Similar () txtai/examples/38_Introdu Mc Introducing RAG with txta *K Image caption generation (m) Monkeytype | A minimalis' > +\n", - "\n", - "0@e S CI CQ Reset —O——$—$——— 100% = 21:20\n", - "\n", - "g\n", - "&\n", - "\n", - "‘oUala Cel (urgor IS Freguiatea DY a\n", - "\n", - "ABA complex network of interacting second\n", - "Se messengers, pH, membrane potential,\n", - "protein phosphorylation, ion channel\n", - "NOE 10 activity — and more!!\n", - "\n", - "\n", - "Variable population size\n", - "\n", - "Beyond the Standard Neutral Model\n", - "\n", - "Slow fluctuations\n", - "in population size : = =\n", - "\n", - "4 Need:\n", - "A, 7 T << min[N, |\n", - "\n", - "\n", - "@ = Safari File\n", - "\n", - "Edit View\n", - "\n", - "History\n", - "\n", - "¥% © & @ &#\n", - "\n", - ">\n", - "cod\n", - "\n", - "Q\n", - "\n", - "S Mon3.Nov 14:17\n", - "\n", - "-\n", - "eco -\n", - "\n", - "rp | A pipeline for...\n", - "\n", - "HUMAN CELL ATLAS,\n", - "DATA EXPLORER\n", - "\n", - "<\n", - "\n", - "Q tINIT tutorial...\n", - "\n", - "Bookmarks Window Help\n", - "0O9eW¢s8\n", - "Ce) The integrate... © Swagger UI\n", - "\n", - "explore.data.humancellatlas.org\n", - "\n", - "ce) Choose Expor...\n", - "\n", - "Explore > Export Selected Data > Download Selecte..\n", - "\n", - "Download Selected Data Using “curl”\n", - "\n", - "io Census data...\n", - "\n", - "io The integrate...\n", - "\n", - "Gea ©\n", - "\n", - "(=) HLCA/docs/fa...\n", - "\n", - "e Files from projects with access \"required\" will be excluded from this export.\n", - "\n", - "Download via curt\n", - "Species\n", - "\n", - "Mus musculus\n", - "\n", - "Homo sapiens\n", - "\n", - "File Type\n", - "Name\n", - "bai\n", - "\n", - "bam\n", - "\n", - "cmd.exe\n", - "\n", - "quest curl Command\n", - "\n", - "File Count\n", - "\n", - "22.0k\n", - "\n", - "22.0k\n", - "\n", - "22\n", - "\n", - "File Size\n", - "\n", - "39.15 GB\n", - "\n", - "3.98 TB\n", - "\n", - "24.66 GB\n", - "\n", - "The generated curl command is compatible with the Bash shell on Mac and Linux systems,\n", - "and the Command shell on Windows systems, and will remain valid for seven days.\n", - "\n", - "Current Query\n", - "\n", - "Access\n", - "true\n", - "\n", - "Genus Species\n", - "Homo sapiens\n", - "\n", - "Paired End\n", - "true\n", - "\n", - "Nucleic Acid Source\n", - "single cell\n", - "\n", - "File Source\n", - "DCP/2 Analysis\n", - "\n", - "File Format\n", - "loom\n", - "\n", - "Selected Data Summary\n", - "\n", - "Estimated Cells\n", - "570.8k\n", - "\n", - "File Size\n", - "24.66 GB\n", - "\n", - "Files\n", - "22\n", - "\n", - "Projects\n", - "19\n", - "\n", - "Species\n", - "Homo sapiens\n", - "\n", - "Donors\n", - "45\n", - "\n", - "Disease Status (Donor)\n", - "4 disease statuses\n", - "\n", - "Specimens\n", - "775\n", - "\n", - "Disease Status (Specimen)\n", - "3 disease statuses\n", - "\n", - "Anatomical Entity\n", - "12 anatomical entities\n", - "\n", - "Organ Part\n", - "14 organ parts\n", - "\n", - "Library Construction Method\n", - "2 library construction methods\n", - "\n", - "Paired End\n", - "true\n", - "\n", - "Downloaded and exported data is\n", - "\n", - "@ ChatGPT - Dr...\n", - "\n", - "Pastebin.com...\n", - "\n", - "a\n", - "© @ +\n", - "\n", - "Ce) Download Sel\n", - "\n", - "Help & Documentation + e@\n", - "\n", - "(\n", - "\n", - "v Please select\n", - "Chalmers tekniska hoegskola AB\n", - "Goteborgs Universitet\n", - "Handelshégskolan i Stockholm (HHS)\n", - "Hégskolan i Halmstad\n", - "Karlstads universitet\n", - "Karolinska Institutet\n", - "Kungliga Tekniska H6gskolan (KTH)\n", - "Linképings universitet (LiU)\n", - "Linnéuniversitetet\n", - "Lulea tekniska universitet\n", - "Lunds universitet\n", - "Stockholms universitet\n", - "Sveriges lantbruksuniversitet (SLU)\n", - "Umea universitet\n", - "Uppsala universitet\n", - "\n", - "Genetic context of bacterial aqpN genes\n", - "\n", - "44 AQPNsinKEGG (45% in arsenic resistance operons — 55 % in NO operon)\n", - "57 AQPNsin NCBI (68% in arsenic resistance operons — 32 % in NO operon)\n", - "As(V)\n", - "\n", - "As(II!)\n", - "\n", - "Progeny genotypes\n", - "\n", - "p2\n", - "PH\n", - "\n", - "=\n", - "—\n", - "eo\n", - "\n", - "(1/4)H2 (1/2)H2 (1/4)H2\n", - "HQ HQ\n", - "\n", - "2(P+(1/2)H)\n", - "(P rads (Q +(1/2)H) (Q oo\n", - "\n", - "\n", - "Arbuscule development\n", - "\n", - "a cee. ees\n", - "SbtM1 Gene\n", - "ceeennnnnnennnsnnenseneesennenennsenneenenennseneenensesnenasenennsecunmeaneneanees expression\n", - "BCPI\n", - "\n", - "PM Cell wall\n", - "\n", - "C | stage! Stage Il Stage Ill Stage lV Stage V\n", - "PPA Cell entry Birdsfoot Mature arbuscule Collapsed arbuscule\n", - "t t t t TL\n", - "CYCLOPS RAM1, RAM2 OsPT13\n", - "DIS\n", - "RED\n", - "\n", - "3 VAMPs @ PT4 tT ] SbtM1 P BCPI\n", - "\n", - "Scientific interests\n", - "\n", - "Research Interests:\n", - "\n", - "Description: At this stage, which research areas and scientific questions are you most interested in exploring during your PhD? Please describe the techniques and\n", - "methods you are currently considering. (min. 100 words - max. 400 words)\n", - "\n", - "CURRENT research area (Primary) Computational Biology, Genomes and Evolution\n", - "\n", - "Scientific Question:\n", - "Click here to enter your comments (What excites you about doing science?)\n", - "Applicant's answer:\n", - "\n", - "Epigenetic basis of complex Spontaneous epimutations Epigenetic clocks Machine learning of 3D\n", - "\n", - "traits chromatin contacts\n", - "\n", - "Genomic and epigenomic basis\n", - "\n", - "of high-alpine adaptation\n", - "\n", - "Usefulness of crosses\n", - "\n", - "Selection of Parents\n", - "\n", - "U, = Cj +R;\n", - "\n", - "m midparent value, perfect predictor of cj with additive gene action\n", - "\n", - "4 and absence of epistasis\n", - "\n", - "0.7 ¢ (0.8\n", - "\n", - "Vinyl\n", - "Rij = iho,\n", - "\n", - "i prediction difficult\n", - "\n", - "\n", - "Method\n", - "\n", - "Heterozygosity\n", - "\n", - "Nucleotide diversity (tt)\n", - "\n", - "Site Frequency Spectrum (SFS)\n", - "Linkage Disequilibrium (LD)\n", - "Tajima’s D\n", - "\n", - "Runs of Homozygosity (ROH)\n", - "\n", - "Effective Population Size (Ne)\n", - "\n", - "Signature of Bottleneck\n", - "\n", - "Decreased heterozygosity\n", - "\n", - "Reduced genetic diversity\n", - "\n", - "Skew toward intermediate alleles\n", - "\n", - "Increased LD, slower decay\n", - "\n", - "Positive values due to allele frequency shift\n", - "Longer ROH in bottlenecked populations\n", - "\n", - "Sudden decrease in Ne\n", - "\n", - "3. What sort of growth pattern in the epidermis would explain\n", - "the kink formation?\n", - "\n", - "°\n", - "3.1. Is there any cellular evidence for PD growth signal in epidermis?\n", - "\n", - "\n", - "Genome vv Tracks ¥ Sample Info v Session v Share Bookmark Save Image Circular View v Help v\n", - "\n", - "IGV oxford_e...me.fasta tig00000002:1,989,819-1,993,234 Q 3,416 bp (Select Tracks ) (Crosshairs )(_Center Line )(TrackLabels) @ +)\n", - "1,990 kb j 1,991 kb j 1,992 kb j 1,993 kb\n", - "AQ 0 EA A MY TAY A AY a\n", - "|= SS SS en |\n", - "tnaB tnaA mnmE_1\n", - "\n", - "INSTITUTE\n", - "\n", - "Heng igv.org UCSan Diego fe BROAD\n", - "\n", - "\n", - "Genome vv Tracks ¥ Sample Info v Session v Share Bookmark Save Image Circular View v Help v\n", - "\n", - "GV oxford_e...me.fasta _ tig00000002:2,754-6,178 Q 3,425 bp (Select Tracks )( Crosshairs )( Center Line ){ Track Labels ) (—) auu==® +)\n", - "3 kb j 4 kb j 5 kb j 6 kb\n", - "LSA A A 8 a\n", - "po ee ss sss | %\n", - "dadA_1 IKAOHOFJ_00007 fadR_1\n", - "pac Pi\n", - "dadA_2 fadR_2\n", - "\n", - "igv.org UCSanDiego EEBROAD\n", - "\n", - "INSTITUTE\n", - "\n", - "i al\n", - "\n", - "Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n", - "\n", - "eQTL-gene\n", - "links >20 kb |\n", - "\n", - "shuffled pairs\n", - "\n", - "\n", - "Leaf Hi-C\n", - "\n", - "N=OS (fihered) 347 (unique) 347 (total), PALL = 0,909\n", - "\n", - "eQTL-gene —\n", - "links >20 kb .\n", - "\n", - "shuffled pairs\n", - "\n", - "\n", - "Figure 3 Inference of population size from whole- —— YRI (Nigeria) —— CHB (China)\n", - "\n", - ". : . — MKK (Kenya) — JPT (Japan)\n", - "genome sequences. (a) Population size estimates —— LWK (Kenya) — GIH (N. India\n", - "indivi — CEU (N.Europe) —— MXL (Mexico — CEU (N. Europe)\n", - "\n", - "from four haplotypes (two phased individuals) a — fsiaayy pe) — we re Rletive American) b ~ TSI (aly)\n", - "from each of nine populations. The dashed line — CHB (China)\n", - "was generated from a reduced data set of only the © g 10° — JPT (Japan)\n", - "Native American components of the MXL genomes. 3 ae — GIH (N. India)\n", - "\n", - ". < 5 10 — YRI (Nigeria)\n", - "Estimates from two haplotypes for CEU and YRI g FS — LWK (Kenya)\n", - "are shown for comparison as dotted lines. 2 2 108\n", - "N, Northern. (b) Population size estimates from a a\n", - "eight haplotypes (four phased individuals) from the g 2 108\n", - "same populations as in a but excluding MXL and 3 E 10!\n", - "\n", - "Ww\n", - "\n", - "MKK. In contrast to estimates with four haplotypes,\n", - "estimates are more recent. For comparison, we\n", - "show the result from four haplotypes for CEU,\n", - "\n", - "10° 104 10°\n", - "CHB and YRI as dotted lines. Time (years ago) Time (years ago)\n", - "\n", - "\n", - "Leaf Hi-C K4me3 HiChIP K27me3 HiChIP\n", - "\n", - "face mar rapa mat\n", - "\n", - "eQTL-gene\n", - "links >20 kb\n", - "\n", - "shuffled pairs :\n", - "\n", - "\n", - "(mustache_aman) [papantonis1@gwdu1@1 aman]$ awk '$1 == $9 {print $1}' GE02457_dots_5kb.bedpe | sort | unig -c && wc -1 GE02457_dots_5kb.bedpe\n", - "842 chri\n", - "413 chr1e\n", - "465 chri1\n", - "442 chr12\n", - "244 chri3\n", - "254 chri4\n", - "234 chris\n", - "174 chri16\n", - "248 chr17\n", - "196 chri8\n", - "122 chri9\n", - "817 chr2\n", - "196 chr2e\n", - "\n", - "81 chr21\n", - "78 chr22\n", - "731 chr3\n", - "\n", - "594 chr4\n", - "609 chr5\n", - "631 chr6é\n", - "478 chr7\n", - "\n", - "505 chr8&\n", - "349 chr9\n", - "\n", - "184 chrx\n", - "\n", - "8888 GE02457_dots_5kb.bedpe\n", - "\n", - "ge Plant Epigenome\n", - "Browser\n", - "\n", - "sect\n", - "\n", - "anc seg 3\n", - "\n", - "meg :\n", - "Bea meri\n", - "\n", - "rae ‘\n", - "\n", - "irsenabnitninpmeyiityrr afl mnie ahi\n", - "\n", - "\n", - "Chromosomes\n", - "\n", - "o\n", - "\n", - "8\n", - "\n", - "ae\n", - "\n", - "Show\n", - "\n", - "Observed\n", - "\n", - "Normalization (Obs | Ctrl)\n", - "\n", - "None ¢\n", - "\n", - "Bala...\n", - "\n", - "Resolution (BP)\n", - "\n", - "I rrdtdot ttt td\n", - "2.5MB 500KB 100KB 25KB 5KB 1KB 200BP\n", - "\n", - "OMB\n", - "\n", - "\n", - "Genetic context of bacterial aqpN genes TUT\n", - "\n", - "44 AQPNsinKEGG (45% in arsenic resistance operons — 55 % in NO operon)\n", - "\n", - "57 AQPNs in NCBI (68% in arsenic resistance operons — 32 % in NO operon)\n", - "\n", - "As(V)\n", - "\n", - "transporter\n", - "\n", - "As(ltl)\n", - "\n", - "= > > | >> >>>\n", - "f GipF Aqpz |\n", - "\n", - "Crop\n", - "Physiology\n", - "56\n", - "\n", - "\n", - "clonalAbundance\n", - "\n", - "We can also examine the relative distribution of clones by abundance. Here clonalAbundance() will produce a line graph with a total\n", - "number of clones by the number of instances within the sample or run. Like above, we can also group.by this by vectors within the\n", - "contig object using the group.by variable in the function.\n", - "\n", - "clonalAbundance(combined. TCR,\n", - "cloneCall = \"gene\",\n", - "scale = FALSE)\n", - "\n", - "5000\n", - "4000\n", - "Samples\n", - "— P17B\n", - "3 PI7L\n", - "5 3000\n", - "ro) — P18B\n", - "ao) — P18L\n", - "3 — P19B\n", - "2000\n", - "5 — PI19L\n", - "Zz\n", - "— P20B\n", - "— P20L\n", - "1000\n", - "oo §\n", - "1 10 100 1000\n", - "Abundance\n", - "\n", - "clonalAbundance() output can also be converted into a density plot, which may allow for better comparisons between different\n", - "repertoire sizes, by setting scale = TRUE.\n", - "\n", - "clonalAbundance(combined.TCR, cloneCall = \"gene\", scale = TRUE)\n", - "\n", - "Gibberellin biosynthesis is well understood TUT\n", - "\n", - "core\n", - "SS\n", - ") The “green revolution”\n", - "semidwarf1 rice variety is\n", - "af mutated in a GA20ox that is\n", - "expressed in shoots but not\n", - "\n", - "GAg\n", - "\n", - "a\n", - "\n", - "1\n", - "i 5, ¢ ot — ent-kaurenoic acid in reproductive tissues, ; Q\n", - "i | t Ga2q GA. leading to increased grain |\n", - "i GA GAs . yields.\n", - "a; —_—_\n", - "5A200 .\n", - "\\ GA _ Sasaki ef al. & Matsuoka, 2002, Nature\n", - "A > GA, —+ GA, —+> GAy, Spielmeyer et al. & Chandler, 2002, PNAS\n", - "”\n", - "\n", - "4 Ce Gazal\n", - "\n", - "CA» ——> GA,\n", - "\n", - "~\n", - "\n", - "GA30x\n", - "\n", - "GA\n", - "\n", - "GAs ——® GA\n", - "\n", - "Brigitte Poppenberger (TUM) Hernandez-Garcia et al & Blazquez, 2021, Sem. Cell Dev. Biol 13\n", - "\n", - "\n", - "Genome vv Tracks ¥ Sample Info v Session v Share Bookmark Save Image Circular View v Help v\n", - "\n", - "IGV oxford_e...me.fasta _ tig00000002:1,604,261-1,606,695 § Q. 2,435 bp (Select Tracks ) (\"Crosshairs )(_Center Line )(TrackLabels) @ iE +)\n", - "\n", - "C D)\n", - "\n", - "604,300 bp 1,604,500 bp 1,604,700 bp 1,604,900 bp 1,605,100 bp 1,605,300 bp 1,605,500 bp 1,605,700 bp 1,605,900 bp 1,606,100 bp 1,606,300 bp 1,606,500 bp 1,606,\n", - "L i 1 L L L 1 L L L 1 L L L 1 L L L 1 L L L 1 L L L 1 L L L 1 L L L 1 L L L 1 L L i 1 L L L 1 L\n", - "\n", - "%\n", - "ee Pe | ZZ\n", - "\n", - "pdeC_1 IKAOHOFJ_01847 ssb\n", - "\n", - "pdeC_2\n", - "\n", - "Investigating the Impact of Hexaploidization on Gene Expression in Oat: in this project, we compare gene expression in hexaploid oat\n", - "species with their tetraploid ancestors. The aim is to explore how the addition of a new genome through hybridization has affected gene\n", - "regulation.\n", - "\n", - "Results - 1. Confocal Images\n", - "\n", - "~ Adaxial oi1\n", - "\n", - "Nucelus ss __-» Abaxial oi2\n", - "\n", - "Adaxial ii1. <——\n", - "\n", - "Abaxial ii2 __—» Chalaza\n", - "\n", - "—+ Funiculus\n", - "\n", - "chromosome1 x1 x2 chromosome2 yl y2 color observed\n", - "expected_bottom_left expected_donut expected_horizontal expected_vertical\n", - "fdr_bottom_left fdr_donut fdr_horizontal fdr_vertical\n", - "number_collapsed centroid1 centroid2 radius\n", - "\n", - "eoo <— > OQ VD G monkeytype.com Ws Search SEARXNG-NALAKATH eave @ @ ~™@®\n", - "ay & a Ab New merch store now open, including a limited edition metal keycap! monkeytype.store x\n", - "\n", - "monkeytype\n", - "\n", - "73\n", - "96%\n", - "\n", - "cautich 76 182/3/1/0 84% 30s\n", - "\n", - "GCO@Qe2® ag Avnud9g HSBTOC BD\n", - "\n", - "english\n", - "&\n", - "S Workspaces v (mi) Monkeytype | A minimalisti + Vv\n", - "0&8 S CI CQ Reset Om 100% 11:12\n", - "\n", - "3.3 SUVRS affects the gene region more than the TE region\n", - "\n", - "\n", - "~*~\n", - "\n", - "@ Safari File Edit View History Bookmarks Develop Window Help @ ne) ¥@6© €& @) F Q ® Fri21.Nov 14:15\n", - "eecax m&-< ‘on © {1| @ 2g pax-db.org Ga co Oo +\n", - "{0.0} Yes. The paper states that t... iG] geckopy/geckopy/experimen... FE] geckopy — geckopy 0.0.1 do... a= PaxDb - Help 2 https://pax-db.org/download... a PaxDb - Download iN} FragPipe workflows | FragPipe\n", - "\n", - "paxdb®° PaxDb: Protein Abundance Database\n", - "\n", - "dary\n", - "Pio,\n", - "\n", - "x protein(s) id/name\n", - "\n", - "PaxDb Downloads\n", - "\n", - "Accessory files\n", - "\n", - "e All datasets can be found here paxdb-abundance-files.zip (~31MB).\n", - "\n", - "¢ Protein sequences fasta file can be found here paxdb-protein-sequences.zip (~498MB).\n", - "¢ Mapped peptides files can be found here paxdb-mapped_peptides.zip (~193MB).\n", - "\n", - "¢ Orthologs list can be found here paxdb-orthologs.zip (~25MB).\n", - "\n", - "e UniProt mappings can be found here paxdb-uniprot-links.zip (~11MB).\n", - "\n", - "e Files from previous PaxDB versions can be found here: /downloads/\n", - "\n", - "Per-species abundance files\n", - "\n", - "ES -\n", - "\n", - "Species Datasets J?\n", - "Homo sapiens 375\n", - "Mus musculus 175\n", - "\n", - "DOWNLOAD\n", - "\n", - "COMPUTE+;\n", - "\n", - "REQUEST+*;\n", - "\n", - "Download\n", - "\n", - "9606.zip\n", - "\n", - "10090.zip\n", - "\n", - "WHAT'S NEW4;\n", - "\n", - "HELP\n", - "\n", - "\n", - "First 5 rows and columns of raw genotype data:\n", - "\n", - "Cl\n", - "\n", - "dddde|\n", - "trop\n", - "\n", - "dddae\n", - "\n", - "dddd|\n", - "rrr\n", - "\n", - "ddd\n", - "\n", - "dddd|\n", - "rrr\n", - "\n", - "a)\n", - "\n", - "dadded\n", - "rrr\n", - "\n", - "eSeeooe\n", - "\n", - "dade|\n", - "\n", - "eeoo\n", - "\n", - "® -1]]]\n", - "\n", - "\n", - "AN Tene enginevelry, 5 Py weeds\n", - "- eZ Anal biota Beat dp\n", - "ate - Tyce Gear bei Oo\n", - "46, Trtwesip * Feashig UP ES\n", - "\n", - "yor\n", - "\n", - "SONGS [ab 2 S welts wort\n", - "\n", - "Coup peggy\n", - "\n", - "Repars — PDO)\n", - "Brcacvnud,\n", - "Summer school\n", - "\n", - "Reding Dalukinnoyy gprs\n", - "L¥ Pap & Stiles\n", - "Chater — Pronses\n", - "Saf & Stee Duatle fo Ui?\n", - "Anping fe Hos. Haig HG\n", - "\n", - "a\n", - "\n", - "I\n", - "I\n", - ".* a\n", - "\n", - "LIEN TIE uc\n", - "\n", - "olathe id- \"4 ut ]\n", - "Figure 2 | Haplotype pattern in a region defined by SNPs that are at high\n", - "frequency in Tibetans and at low frequency in Han Chinese. Each column is\n", - "a polymorphic genomic location (95 in total), each row is a phased haplotype\n", - "(80 Han and 80 Tibetan haplotypes), and the coloured column on the left\n", - "denotes the population identity of the individuals. Haplotypes of the Denisovan\n", - "individual are shown in the top two rows (green). The black cells represent the\n", - "presence of the derived allele and the grey space represents the presence of\n", - "the ancestral allele (see Methods). The first and last columns correspond to the\n", - "first and last positions in Supplementary Table 3, respectively. The red and\n", - "blue arrows indicate the 32 sites in Supplementary Table 3. The blue arrows\n", - "represent a five-SNP haplotype block defined by the first five SNPs in the\n", - "32.7-kb region. Asterisks indicate sites at which Tibetans share a derived allele\n", - "with the Denisovan individual.\n", - "\n", - "\n", - "Building regulatory landscapes\n", - "reveals that an enhancer can recruit\n", - "cohesin to create contact domains,\n", - "engage CTCF sites and activate\n", - "distant genes\n", - "\n", - "Rinzema NJ, Sofiados k, [...], de Laat W\n", - "\n", - "Nature Structural & Molecular Biology (2022)\n", - "\n", - "[| DOWNLOAD | 2022\n", - "\n", - "Robust detection of translocations in\n", - "lymphoma FFPE samples using\n", - "targeted locus capture-based\n", - "sequencing\n", - "\n", - "Allahyar A, Pieterse M, [...], de Laat W\n", - "\n", - "NATURE COMMUNICATIONS: 12:3361\n", - "\n", - "[| DOWNLOAD | 2021\n", - "\n", - "Ready-to-use public infrastructure\n", - "for global SARS-CoV-2 monitoring\n", - "Krijger PHL, Hoek TA, [...], de Laat W, Tanenbaum M\n", - "\n", - "Nature Biotechnology 39: 1178-1184\n", - "\n", - "[| DOWNLOAD | 2021\n", - "\n", - "Novel orthogonal methods to\n", - "uncover the complexity and diversity\n", - "of nuclear architecture\n", - "\n", - "Tjalsma SJD, de Laat W\n", - "\n", - "Current Opinion in Genetics & Development: 67:10-17\n", - "\n", - "[| DOWNLOAD | 2021\n", - "\n", - "Interplay between CTCF boundaries\n", - "and a super enhancer controls\n", - "cohesin extrusion trajectories and\n", - "gene expression\n", - "\n", - "Vos ESM, Valdes-Quezada C, Huang Y [...], de Laat\n", - "Ww\n", - "\n", - "Mol. Cell 81(15):3082-3095\n", - "\n", - "[| DOWNLOAD | 2021\n", - "\n", - "How chromosome topologies get\n", - "their shape: views from proximity\n", - "ligation and microscopy methods\n", - "Huang Y, Neijts R, de Laat W\n", - "\n", - "FEBS Letters: 594 3439-3449\n", - "\n", - "[| DOWNLOAD | 2020\n", - "\n", - "Instituto Universitario de Lisboa (ISCTE IUL)\n", - "UNIVERSIDADE CATOLICA PORTUGUESA\n", - "Universidade de Coimbra\n", - "\n", - "Universidade de Evora\n", - "\n", - "Universidade de Lisboa\n", - "\n", - "Universidade do Porto\n", - "\n", - "Universidade Nova de Lisboa\n", - "\n", - "Ice ot\n", - "‘earn ere ta rao pen 2 prema ne oe [eeremsne [seen]\n", - "\n", - "FastQC: Per Sequence GC Content\n", - "Pea Samp\n", - "\n", - "Per Base N Content [aim\n", - "\n", - "‘epocenapecttancastcan poten ren an asa\n", - "\n", - "FastQC: Per Base N Content\n", - "\n", - "‘Sequence Length Distribution [a\n", - "\n", - "Mimosa equa ci ng)\n", - "\n", - "‘Sequence Duplication Levels SE (ome)\n", - "‘eae ge yer\n", - "[eeewwres [cere]\n", - "\n", - "FastQC: Sequence Duplication Levels,\n", - "\n", - "Overrepresented sequences by sample SKIN\n", - "\n", - "‘Pett arr ctonnpeericsminceanh eaten.\n", - "\n", - "Top overrepresented sequences\n", - "\n", - "‘ie onmmteeseince sr ssarde The soe 2 trent ser cern aye noosa yr\n", - "\n", - "‘Adapter Content [ZI [ome]\n", - "\n", - "‘Peamusiepenep cathe sana yay te asa en aspen enon\n", - "[eeremsne [seen]\n", - "\n", - "FastQC: Adapter Content\n", - "\n", - "\n", - "% TADS\n", - "\n", - "Sequencing technologies have been a driving force in genomics science\n", - "since the 70's.\n", - "\n", - "After reading the article De novo genome assembly: what every biologist\n", - "should know (Published: March 2012)\n", - "\n", - "Link: https://www.nature.com/articles/nmeth.1935,\n", - "\n", - "1. Pick one issue or problem that is mentioned in it. Describe it shortly with\n", - "your own words and try to produce a possible solution for it based on what\n", - "you have learnt in this course so far (it doesn't matter if your solution is\n", - "\n", - "really doable)\n", - "\n", - "2. Share your problem description and solution in TWO places:\n", - "In the discussion forum “Impact of sequencing technology\" and submit the\n", - "same text also in the task “Impact of sequencing technology”. Please, read in\n", - "\n", - "the discussion forum your peers’ answers,\n", - "\n", - "TAL\n", - "TECH\n", - "\n", - "NEXT STEPS |\n", - "\n", - "CHECK THE DEADLINES IN MOODLE\n", - "\n", - "* Read \"De novo genome assembly: what every biologist should know\"\n", - "\n", - "* Do and submit Coursework 3 (based on the lectures so far + reading).\n", - "\n", - "Once you have done all these, you may move on to the \"Week 4, Session 1\"\n", - "\n", - ". 7 sad\n", - "\n", - "Ethylene induces expression of ACS genes during ripening\n", - "\n", - "ACS ACO\n", - "\n", - "SAM LEACSS ACC — > C2H, — Perception\n", - "DS\n", - "LEACS1A —\n", - "4 LEACS4 =e)\n", - "LEACS2\n", - "\n", - "Developmentally\n", - "regulated\n", - "\n", - "Brigitte Poppenberger (TUM) Plant Cell teaching tool\n", - "\n", - "OMB\n", - "\n", - "100 MB\n", - "\n", - "200 MB\n", - "\n", - "Chromosomes Show Normalization (Obs | Ctrl) Resolution (BP)\n", - "“aw “aw a a a — y,\n", - "2 Bp Observed Bala... None © Pivrb ttre teins\n", - "2.5MB 500KB 100KB 25KB 5KB 1KB 200BP\n", - "OMB 100 MB 200 MB 300 MB\n", - "\n", - "\n", - "@ Zoom Workplace\n", - "\n", - "ox\n", - "\n", - "Ww\n", - "\n", - "4\n", - "\n", - "a\n", - "\n", - "Clipboard\n", - "\n", - "11\n", - "\n", - "Slide 10 0f 14 4\n", - "\n", - "Meeting View Edit\n", - "\n", - "[==] Layout ¥\n", - "\n", - "‘© Reset\n", - "New\n", - "\n", - "Slide v Section\n", - "\n", - "Slides\n", - "\n", - "English (India)\n", - "\n", - "Cy, Accessibility: Investigate\n", - "\n", - "W4\n", - "\n", - "x\n", - "\n", - "Pre\n", - "\n", - "& 00 Ce PD Find i) A\n", - "D | 5 b S yy\n", - "ALLS o5|¥ 82 Replace v\n", - "D Swen Arrange Create PDF Create PDF and Add-ins\n", - "° soe 2 I$ Select v and Share link Share via Outlook\n", - "Font Paragraph Drawing Editing Adobe Acrobat Add-ins\n", - "\n", - "Growth : Tissue expansion\n", - "\n", - "Across stages (from 2-III to 2-V) , interval [0.75-1.00] along PD axis sees the highest tissue expansion\n", - "\n", - "U\n", - "\n", - "What sort of tissue expansion is it (isotropic or anisotropic) ?\n", - "\n", - "MAXMIN Histogram MAXMID Histogram\n", - "(0.75. 1.00} 00} oe\n", - "= 2 7\n", - "} (0.25, 0.30) 4\n", - "- Po om st 0.00, 0.25)\n", - "a =\n", - "\n", - "Average of MAXMID\n", - "\n", - "> No evidence of purely isotropic cell growth in any stage at any interval\n", - "\n", - "> Cell growth on average is anisotropic for all intervals at each stage\n", - "\n", - "“ oo\n", - "= Notes QB comments oo\n", - "\n", - "oO\n", - "\n", - "Window Help Bevwvue@8Wrt+ ort.t@goe wy Se Wed Feb 12 22:24\n", - "\n", - "\n" - ] - } - ], - "source": [ - "print(*results, sep='\\n')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.7" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/03_testing.ipynb b/notebooks/03_testing.ipynb new file mode 100644 index 0000000..460f5ec --- /dev/null +++ b/notebooks/03_testing.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "508336f4", + "metadata": {}, + "outputs": [], + "source": [ + "from kg_ocr import get_screenshots, extract_text, create_and_index, retrieve, ask_wllm" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11055f85", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "20e3c61fe191485da79a32e823ccd1ec", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading weights: 0%| | 0/103 [00:00