annotate egapx_runner.xml @ 8:1680e72e27be draft default tip

planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
author fubar
date Mon, 05 Aug 2024 03:56:41 +0000
parents 9c778770514f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
1 <tool name="egapx_runner" id="egapx_runner" version="@TOOL_VERSION@" profile="22.05">
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
2 <description>Runs egapx</description>
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
3 <macros>
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
4 <token name="@TOOL_VERSION@">0.02-alpha</token>
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
5 </macros>
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
6 <requirements>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
7 <requirement version="3.12.3" type="package">python</requirement>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
8 <requirement version="24.04.4-0" type="package">nextflow</requirement>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
9 <requirement version="6.0.1" type="package">pyyaml</requirement>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
10 </requirements>
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
11 <version_command><![CDATA[echo "@TOOL_VERSION@"]]></version_command>
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
12 <command><![CDATA[mkdir -p ./egapx_config &&
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
13 #set econfigfile = $econfig + '.config'
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
14 cp '$__tool_directory__/ui/assets/config/executor/$econfigfile' ./egapx_config/ &&
5
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
15 python '$__tool_directory__/ui/egapx.py' '$yamlconfig' -e '$econfig' -o 'egapx_out']]></command>
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
16 <inputs>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
17 <param name="yamlconfig" type="data" optional="false" label="egapx configuration yaml file to execute" help="" format="yaml,txt" multiple="false"/>
4
6592ae57bb8b planemo upload for repository https://github.com/ncbi/egapx commit cb2d8304fde9fad4348296c3a51b7992ac5b83bb
fubar
parents: 2
diff changeset
18 <param name="econfig" type="select" label="Workflow run configuration to suit the machine in use" help="Docker minimal will run the sample minimal dustmite yaml">
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
19 <option value="docker_minimal">Docker_minimal: supports only the minimal dust mite example yaml using 6GB and 4 cores</option>
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
20 <option value="singularity">Singularity: requires at least 128GB ram and 32 cores. 256GB and 64 cores recommended</option>
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
21 <option value="docker">Docker: requires at least 128GB ram and 32 cores. 256GB and 64 cores recommended</option>
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
22 </param>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
23 </inputs>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
24 <outputs>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
25 <collection name="egapx_out" type="list" label="Outputs from egapx">
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
26 <discover_datasets pattern="__name_and_ext__" directory="egapx_out" visible="false"/>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
27 </collection>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
28 </outputs>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
29
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
30
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
31 <tests>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
32 <test>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
33 <output_collection name="egapx_out" count="8"/>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
34 <param name="yamlconfig" value="yamlconfig_sample"/>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
35 <param name="econfig" value="docker_minimal"/>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
36 </test>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
37 </tests>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
38
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
39
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
40
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
41 <help><![CDATA[
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
42 Galaxy tool wrapping the Eukaryotic Genome Annotation Pipeline (EGAPx)
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
43 =================================================================================================
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
44
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
45 .. class:: warningmark
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
46
5
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
47 **Proof of concept: a quick hack to run a NF workflow inside a specialised Galaxy tool wrapper**
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
48
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
49 EGAPx is a big, complicated Nextflow workflow, challenging and costly to re-implement **properly**, requiring dozens of new tools and replicating a lot of
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
50 complicated *groovy* workflow logic.
5
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
51
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
52 It is also very new and in rapid development. Investing developer effort and keeping updated as EGAPx changes rapidly may be *inefficient of developer resources*.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
53
5
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
54 This wrapper is designed to allow measuring how *inefficient* it is in terms of computing resource utilisation, in comparison to the developer effort
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
55 required to convert Nextflow DDL into tools and WF logic. Balancing these competing requirements is a fundamental Galaxy challenge.
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
56
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
57
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
58 EGAPx requires very substantial resources to run with real data. *128GB and 32 cores* are the minimum requirement; *256GB and 64 cores* are recommended.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
59
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
60 A special minimal example that can be run in 6GB with 4 cores is provided as a yaml configuration and is used for the tool test.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
61
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
62 In this implementation, the user must supply a yaml configuration file as initial proof of concept.
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
63 History inputs and even a yaml editor might be provided in future.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
64
5
6effccc966d0 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0
fubar
parents: 4
diff changeset
65 The NF workflow to tool model tested here may be applicable to other NF workflows that take a single configuration yaml.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
66
6
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
67 .. class:: warningmark
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
68
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
69 The computational resource cost of typing the wrong SRA identifiers into a tool form is potentially enormous with this tool!
a7304162d737 planemo upload for repository https://github.com/ncbi/egapx commit 9e59da535540cb4d5c1c412bb2b0969744dfb0b0-dirty
fubar
parents: 5
diff changeset
70
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
71
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
72 Sample yaml configurations
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
73 ===========================
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
74
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
75 YAML sample configurations can be uploaded into your Galaxy history from the `EGAPx github repository <https://github.com/ncbi/egapx/tree/main/examples/>`_.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
76 The simplest possible example is shown below - can be cut/paste into a history dataset in the upload tool.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
77
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
78
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
79 *./examples/input_D_farinae_small.yaml* is shown below and can be cut and pasted into the upload form to create a yaml file.
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
80 RNA-seq data is provided as URI to the reads FASTA files.
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
81
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
82 input_D_farinae_small.yaml
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
83
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
84 ::
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
85
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
86 genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/020/809/275/GCF_020809275.1_ASM2080927v1/GCF_020809275.1_ASM2080927v1_genomic.fna.gz
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
87 taxid: 6954
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
88 reads:
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
89 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR8506572.1
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
90 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR8506572.2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
91 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR9005248.1
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
92 - https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/EGAP/data/Dermatophagoides_farinae_small/SRR9005248.2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
93
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
94
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
95 input_Gavia_stellata.yaml
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
96
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
97 ::
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
98
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
99 genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/030/936/135/GCF_030936135.1_bGavSte3.hap2/GCF_030936135.1_bGavSte3.hap2_genomic.fna.gz
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
100 reads: txid37040[Organism] AND biomol_transcript[properties] NOT SRS024887[Accession]
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
101 taxid: 37040
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
102
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
103 input_C_longicornis.yaml
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
104
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
105 ::
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
106
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
107 genome: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/029//603/195/GCF_029603195.1_ASM2960319v2/GCF_029603195.1_ASM2960319v2_genomic.fna.gz
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
108 reads: txid2530218[Organism] AND biomol_transcript[properties] NOT SRS024887[Accession]
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
109 taxid: 2530218
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
110
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
111 Purpose
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
112 ========
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
113
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
114 **This is not intended for production**
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
115
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
116 Just a proof of concept.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
117 It is possibly too inefficient to be useful although it may turn out not to be a problem if run on a dedicated workstation.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
118 At least the efficiency can now be more easily estimated.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
119
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
120 This tool is not recommended for public deployment because of the resource demands.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
121
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
122 EGAPx Overview
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
123 ===============
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
124
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
125 .. image:: $PATH_TO_IMAGES/Pipeline_sm_ncRNA_CAGE_80pct.png
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
126
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
127 **Warning:**
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
128 The current version is an alpha release with limited features and organism scope to collect initial feedback on execution. Outputs are not yet complete and not intended for production use. Please open a GitHub [Issue](https://github.com/ncbi/egapx/issues) if you encounter any problems with EGAPx. You can also write to cgr@nlm.nih.gov to give us your feedback or if you have any questions.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
129
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
130 EGAPx is the publicly accessible version of the updated NCBI [Eukaryotic Genome Annotation Pipeline](https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/).
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
131
8
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
132 EGAPx takes an assembly fasta file, a taxid of the organism, and RNA-seq data. Based on the taxid, EGAPx will pick protein sets and HMM models. The pipeline runs `miniprot` to align protein sequences, and `STAR` to align RNA-seq to the assembly. Protein alignments and RNA-seq read alignments are then passed to `Gnomon` for gene prediction. In the first step of `Gnomon`, the short alignments are chained together into putative gene models.
1680e72e27be planemo upload for repository https://github.com/ncbi/egapx commit bdbe05027c2c40e217a2ff0c9e0556450c443e54
fubar
parents: 7
diff changeset
133 In the second step, these predictions are further supplemented by *ab-initio* predictions based on HMM models. The final annotation for the input assembly is produced as a `gff` file.
2
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
134
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
135 **Security Notice:**
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
136
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
137 EGAPx has dependencies in and outside of its execution path that include several thousand files from the [NCBI C++ toolkit](https://www.ncbi.nlm.nih.gov/toolkit), and more than a million total lines of code. Static Application Security Testing has shown a small number of verified buffer overrun security vulnerabilities. Users should consult with their organizational security team on risk and if there is concern, consider mitigating options like running via VM or cloud instance.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
138
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
139
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
140 *To specify an array of NCBI SRA datasets in yaml*
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
141
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
142 ::
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
143
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
144 reads:
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
145 - SRR8506572
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
146 - SRR9005248
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
147
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
148
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
149 *To specify an SRA entrez query*
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
150
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
151 ::
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
152
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
153 reads: 'txid6954[Organism] AND biomol_transcript[properties] NOT SRS024887[Accession] AND (SRR8506572[Accession] OR SRR9005248[Accession] )'
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
154
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
155
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
156 **Note:** Both the above examples will have more RNA-seq data than the `input_D_farinae_small.yaml` example. To make sure the entrez query does not produce a large number of SRA runs, please run it first at the [NCBI SRA page](https://www.ncbi.nlm.nih.gov/sra). If there are too many SRA runs, then select a few of them and list it in the input yaml.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
157
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
158 Output
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
159 =======
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
160
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
161 EGAPx output will appear as a collection in the user history. The main annotation file is called *accept.gff*.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
162
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
163 ::
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
164
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
165 accept.gff
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
166 annot_builder_output
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
167 nextflow.log
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
168 run.report.html
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
169 run.timeline.html
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
170 run.trace.txt
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
171 run_params.yaml
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
172
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
173
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
174 The *nextflow.log* is the log file that captures all the process information and their work directories. ``run_params.yaml`` has all the parameters that were used in the EGAPx run. More information about the process time and resources can be found in the other run* files.
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
175
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
176 ## Intermediate files
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
177
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
178 In the log, each line denotes the process that completed in the workflow. The first column (_e.g._ `[96/621c4b]`) is the subdirectory where the intermediate output files and logs are found for the process in the same line, _i.e._, `egapx:miniprot:run_miniprot`. To see the intermediate files for that process, you can go to the work directory path that you had supplied and traverse to the subdirectory `96/621c4b`:
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
179
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
180 ::
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
181
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
182 $ aws s3 ls s3://temp_datapath/D_farinae/96/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
183 PRE 06834b76c8d7ceb8c97d2ccf75cda4/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
184 PRE 621c4ba4e6e87a4d869c696fe50034/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
185 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
186 PRE output/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
187 2024-03-27 11:19:18 0
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
188 2024-03-27 11:19:28 6 .command.begin
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
189 2024-03-27 11:20:24 762 .command.err
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
190 2024-03-27 11:20:26 762 .command.log
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
191 2024-03-27 11:20:23 0 .command.out
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
192 2024-03-27 11:19:18 13103 .command.run
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
193 2024-03-27 11:19:18 129 .command.sh
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
194 2024-03-27 11:20:24 276 .command.trace
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
195 2024-03-27 11:20:25 1 .exitcode
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
196 $ aws s3 ls s3://temp_datapath/D_farinae/96/621c4ba4e6e87a4d869c696fe50034/output/
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
197 2024-03-27 11:20:24 17127134 aligns.paf
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
198
a3b158471bd3 planemo upload for repository https://github.com/ncbi/egapx commit 98875ef7eda9323fc9991970103954e9097d9e73
fubar
parents: 1
diff changeset
199
1
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
200 ]]></help>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
201 <citations>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
202 <citation type="doi">10.1093/bioinformatics/bts573</citation>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
203 </citations>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
204 </tool>
c8e1543546f8 planemo upload for repository https://github.com/ncbi/egapx commit 8173d01b08d9a91c9ec5f6cb50af346edc8020c4-dirty
fubar
parents:
diff changeset
205