changeset 1:7bc75dd0f782 draft

planemo upload for repository https://github.com/genouest/galaxy-tools/tree/master/tools/helixer commit e516933a2c3aa6bcd336a0b5c072893fe9019dd9
author genouest
date Mon, 25 Sep 2023 12:47:12 +0000
parents 1b08e39cc52d
children 7c1dc010a819
files helixer.xml macros.xml test-data/fungi.gff3 test-data/invertebrate.gff3 test-data/land_plant.gff3 test-data/ouput_species.gff3 test-data/output.gff3 test-data/vertebrate.gff3
diffstat 8 files changed, 76 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/helixer.xml	Wed Jun 28 08:39:38 2023 +0000
+++ b/helixer.xml	Mon Sep 25 12:47:12 2023 +0000
@@ -6,16 +6,12 @@
     </macros>
 
     <requirements>
-        <expand macro="requirements" />
+        <expand macro="requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
-        ## Not in $PATH in the docker image
-        ## Manage models with a data manager?
-        /usr/local/bin/fetch_helixer_models.py
 
-        &&
-
+        /usr/local/bin/fetch_helixer_models.py &&
         Helixer.py
         --fasta-path '$input'
         --species '$species'
@@ -31,7 +27,7 @@
         #else:
             --no-overlap
         #end if
-
+        --batch-size $size
         --window-size $post_processing.window_size
         --min-coding-length $post_processing.min_coding_length
         --edge-threshold $post_processing.edge_threshold
@@ -68,7 +64,7 @@
             </sanitizer>
             <validator type="regex">[0-9a-zA-Z_]+</validator>
         </param>
-
+        <param name="size" argument="--batch-size" type="integer" value="8" label="Batch size" help="It may be necessary to reduce it if the GPU runs out of memory" />
         <section name="post_processing" title="Post-processing">
             <param argument="--window-size" type="integer" min="0" value="100" label="Window size" help="This determines the number of bases averaged during the sliding window approach"/>
             <param argument="--edge-threshold" type="float" min="0" max="1" value="0.1" label="Edge threshold" help="This threshold specifies the genic score which defines the start / end boundaries of each candidate region"/>
@@ -82,26 +78,45 @@
         </data>
     </outputs>
     <tests>
-        <!-- Test for land_plant -->
         <test expect_num_outputs="1">
-            <param name="input" value="sequence.fasta"/>
-            <conditional name="lineage">
-                <param name="lineages" value="land_plant"/>
-            </conditional>
-            <output name="output" value="output.gff3" ftype="gff3" lines_diff="2" />
-        </test>
-        <test expect_num_outputs="1">
-            <!-- Test for species -->
+            <!-- Test for species and land_plant-->
             <param name="input" value="sequence.fasta"/>
             <param name="species" value="Arabidopsis"/>
             <conditional name="lineage">
                 <param name="lineages" value="land_plant"/>
             </conditional>
-            <output name="output" value="ouput_species.gff3" ftype="gff3" lines_diff="2" />
+            <param name="size" value="8"/>
+            <output name="output" value="ouput_species.gff3" ftype="gff3" compare="sim_size" delta="100"/>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- Test for vertebrates-->
+            <param name="input" value="sequence.fasta"/>
+            <conditional name="lineage">
+                <param name="lineages" value="vertebrate"/>
+            </conditional>
+            <param name="size" value="8"/>
+            <output name="output" value="vertebrate.gff3" ftype="gff3" lines_diff="2"/>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- Test for invertebrates-->
+            <param name="input" value="sequence.fasta"/>
+            <conditional name="lineage">
+                <param name="lineages" value="invertebrate"/>
+            </conditional>
+            <param name="size" value="8"/>
+            <output name="output" value="invertebrate.gff3" ftype="gff3" lines_diff="2"/>
+        </test>
+        <test expect_num_outputs="1">
+            <!-- Test for fungi-->
+            <param name="input" value="sequence.fasta"/>
+            <conditional name="lineage">
+                <param name="lineages" value="fungi"/>
+            </conditional>
+            <param name="size" value="8"/>
+            <output name="output" value="fungi.gff3" ftype="gff3" lines_diff="2"/>
         </test>
     </tests>
 
-
     <help><![CDATA[
         Helixer_: Gene calling with Deep Neural Networks.
 
--- a/macros.xml	Wed Jun 28 08:39:38 2023 +0000
+++ b/macros.xml	Mon Sep 25 12:47:12 2023 +0000
@@ -1,5 +1,5 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.3.1</token>
+    <token name="@TOOL_VERSION@">0.3.2</token>
 
     <xml name="citation">
         <citations>
@@ -9,9 +9,9 @@
     </xml>
 
     <xml name="requirements">
-        <container type="docker">gglyptodon/helixer-docker:helixer_v@TOOL_VERSION@_cuda_11.2.0-cudnn8</container>
+        <container type="docker">gglyptodon/helixer-docker:helixer_v@TOOL_VERSION@_cuda_11.8.0-cudnn8</container>
     </xml>
-
+    
     <xml name="subseq" tokens="length,offset,offsetlen">
         <param argument="--subsequence-length" type="integer" min="0" max="213840" value="@LENGTH@" label="Subsequence length: how much of the genome the Neural Network can see at once" help="Should ideally be comfortably longer than the typical gene. For genomes with large genes (>20kpb) it is recommended to increase this parameter."></param>
         <conditional name="option_overlap">
@@ -26,6 +26,5 @@
             <when value="false"/>
         </conditional>
     </xml>
-
 </macros>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fungi.gff3	Mon Sep 25 12:47:12 2023 +0000
@@ -0,0 +1,4 @@
+##gff-version 3.2.1
+##species 
+# c62d9615e722c2f927a0de3f4fcf5b70  /shared/ifbstor1/galaxy/jobs/003/517/3517616/home/.local/share/Helixer/models/fungi/fungi_v0.3_a_0100.h5
+##sequence-region NC_034365.1:c72045-70009 1 2037
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/invertebrate.gff3	Mon Sep 25 12:47:12 2023 +0000
@@ -0,0 +1,13 @@
+##gff-version 3.2.1
+##species 
+# fdd5a925728cf56e2470d96beeeb797b  /shared/ifbstor1/galaxy/jobs/003/517/3517615/home/.local/share/Helixer/models/invertebrate/invertebrate_v0.3_m_0100.h5
+##sequence-region NC_034365.1:c72045-70009 1 2037
+NC_034365.1:c72045-70009	Helixer	gene	1029	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000001
+NC_034365.1:c72045-70009	Helixer	mRNA	1029	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1;Parent=_NC_034365.1:c72045-70009_000001
+NC_034365.1:c72045-70009	Helixer	exon	1029	1228	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.exon.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1029	1035	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.five_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	CDS	1036	1228	.	+	0	ID=_NC_034365.1:c72045-70009_000001.1.CDS.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	exon	1595	1598	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.exon.2;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	CDS	1595	1598	.	+	2	ID=_NC_034365.1:c72045-70009_000001.1.CDS.2;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	exon	1812	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.exon.3;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	CDS	1812	2037	.	+	1	ID=_NC_034365.1:c72045-70009_000001.1.CDS.3;Parent=_NC_034365.1:c72045-70009_000001.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/land_plant.gff3	Mon Sep 25 12:47:12 2023 +0000
@@ -0,0 +1,17 @@
+##gff-version 3.2.1
+##species 
+# f0e00efcbea83c66b69258d11119a691  /shared/ifbstor1/galaxy/jobs/003/517/3517613/home/.local/share/Helixer/models/land_plant/land_plant_v0.3_a_0080.h5
+##sequence-region NC_034365.1:c72045-70009 1 2037
+NC_034365.1:c72045-70009	Helixer	gene	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001
+NC_034365.1:c72045-70009	Helixer	mRNA	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1;Parent=_NC_034365.1:c72045-70009_000001
+NC_034365.1:c72045-70009	Helixer	exon	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.exon.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1035	1035	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.five_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	CDS	1036	1260	.	+	0	ID=_NC_034365.1:c72045-70009_000001.1.CDS.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	three_prime_UTR	1261	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.three_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000001.1
+NC_034365.1:c72045-70009	Helixer	gene	1603	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002
+NC_034365.1:c72045-70009	Helixer	mRNA	1603	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1;Parent=_NC_034365.1:c72045-70009_000002
+NC_034365.1:c72045-70009	Helixer	exon	1603	1725	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.exon.1;Parent=_NC_034365.1:c72045-70009_000002.1
+NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1603	1606	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.five_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000002.1
+NC_034365.1:c72045-70009	Helixer	CDS	1607	1725	.	+	0	ID=_NC_034365.1:c72045-70009_000002.1.CDS.1;Parent=_NC_034365.1:c72045-70009_000002.1
+NC_034365.1:c72045-70009	Helixer	exon	1812	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.exon.2;Parent=_NC_034365.1:c72045-70009_000002.1
+NC_034365.1:c72045-70009	Helixer	CDS	1812	2037	.	+	1	ID=_NC_034365.1:c72045-70009_000002.1.CDS.2;Parent=_NC_034365.1:c72045-70009_000002.1
--- a/test-data/ouput_species.gff3	Wed Jun 28 08:39:38 2023 +0000
+++ b/test-data/ouput_species.gff3	Mon Sep 25 12:47:12 2023 +0000
@@ -14,4 +14,4 @@
 NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1603	1606	.	+	.	ID=Arabidopsis_NC_034365.1:c72045-70009_000002.1.five_prime_UTR.1;Parent=Arabidopsis_NC_034365.1:c72045-70009_000002.1
 NC_034365.1:c72045-70009	Helixer	CDS	1607	1725	.	+	0	ID=Arabidopsis_NC_034365.1:c72045-70009_000002.1.CDS.1;Parent=Arabidopsis_NC_034365.1:c72045-70009_000002.1
 NC_034365.1:c72045-70009	Helixer	exon	1812	2037	.	+	.	ID=Arabidopsis_NC_034365.1:c72045-70009_000002.1.exon.2;Parent=Arabidopsis_NC_034365.1:c72045-70009_000002.1
-NC_034365.1:c72045-70009	Helixer	CDS	1812	2037	.	+	1	ID=Arabidopsis_NC_034365.1:c72045-70009_000002.1.CDS.2;Parent=Arabidopsis_NC_034365.1:c72045-70009_000002.1
+NC_034365.1:c72045-70009	Helixer	CDS	1812	2037	.	+	1	ID=Arabidopsis_NC_034365.1:c72045-70009_000002.1.CDS.2;Parent=Arabidopsis_NC_034365.1:c72045-70009_000002.1
\ No newline at end of file
--- a/test-data/output.gff3	Wed Jun 28 08:39:38 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-##gff-version 3.2.1
-##species 
-# f0e00efcbea83c66b69258d11119a691  /tmp/tmpyabgcmro/job_working_directory/000/2/home/.local/share/Helixer/models/land_plant/land_plant_v0.3_a_0080.h5
-##sequence-region NC_034365.1:c72045-70009 1 2037
-NC_034365.1:c72045-70009	Helixer	gene	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001
-NC_034365.1:c72045-70009	Helixer	mRNA	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1;Parent=_NC_034365.1:c72045-70009_000001
-NC_034365.1:c72045-70009	Helixer	exon	1035	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.exon.1;Parent=_NC_034365.1:c72045-70009_000001.1
-NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1035	1035	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.five_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000001.1
-NC_034365.1:c72045-70009	Helixer	CDS	1036	1260	.	+	0	ID=_NC_034365.1:c72045-70009_000001.1.CDS.1;Parent=_NC_034365.1:c72045-70009_000001.1
-NC_034365.1:c72045-70009	Helixer	three_prime_UTR	1261	1269	.	+	.	ID=_NC_034365.1:c72045-70009_000001.1.three_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000001.1
-NC_034365.1:c72045-70009	Helixer	gene	1603	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002
-NC_034365.1:c72045-70009	Helixer	mRNA	1603	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1;Parent=_NC_034365.1:c72045-70009_000002
-NC_034365.1:c72045-70009	Helixer	exon	1603	1725	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.exon.1;Parent=_NC_034365.1:c72045-70009_000002.1
-NC_034365.1:c72045-70009	Helixer	five_prime_UTR	1603	1606	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.five_prime_UTR.1;Parent=_NC_034365.1:c72045-70009_000002.1
-NC_034365.1:c72045-70009	Helixer	CDS	1607	1725	.	+	0	ID=_NC_034365.1:c72045-70009_000002.1.CDS.1;Parent=_NC_034365.1:c72045-70009_000002.1
-NC_034365.1:c72045-70009	Helixer	exon	1812	2037	.	+	.	ID=_NC_034365.1:c72045-70009_000002.1.exon.2;Parent=_NC_034365.1:c72045-70009_000002.1
-NC_034365.1:c72045-70009	Helixer	CDS	1812	2037	.	+	1	ID=_NC_034365.1:c72045-70009_000002.1.CDS.2;Parent=_NC_034365.1:c72045-70009_000002.1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vertebrate.gff3	Mon Sep 25 12:47:12 2023 +0000
@@ -0,0 +1,4 @@
+##gff-version 3.2.1
+##species 
+# acedf94d7c4f811e877da07844bc58f4  /shared/ifbstor1/galaxy/jobs/003/517/3517614/home/.local/share/Helixer/models/vertebrate/vertebrate_v0.3_m_0080.h5
+##sequence-region NC_034365.1:c72045-70009 1 2037