changeset 0:0a66ef6dacb7 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/minia commit cd3b054df45f2805457941fa09170389f99df80e"
author iuc
date Thu, 09 Apr 2020 03:42:24 -0400
parents
children b2dc51639ad5
files minia.xml test-data/ec.fa
diffstat 2 files changed, 104 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/minia.xml	Thu Apr 09 03:42:24 2020 -0400
@@ -0,0 +1,55 @@
+<tool id="minia" name="Minia" version="@TOOL_VERSION@">
+    <description>Short-read assembler based on a de Bruijn graph</description>
+    <macros>
+        <token name="@TOOL_VERSION@">3.2.1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">minia</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+#set input = 'infile.' + $in.ext
+ln -s '$in' '$input' &&
+
+minia
+-in '$input'
+-kmer-size $kmer_size
+#if str($abundance_min):
+    -abundance-min $abundance_min
+#end if
+#if str($abundance_max):
+    -abundance-min $abundance_max
+#end if
+-nb-cores \${GALAXY_SLOTS:-1}
+-out output
+    ]]></command>
+    <inputs>
+        <param argument="-in" type="data" format="fasta,fastqsanger,fastqsanger.gz,fasta.gz" label="Reads in FASTA or FASTQ format" />
+        <param argument="-kmer-size" type="integer" value="31" min="1" label="Size of a kmer" />
+        <param argument="-abundance-min" type="integer" value="" optional="true" min="0" label="Min abundance threshold for solid kmers (default: 2)" />
+        <param argument="-abundance-max" type="integer" value="" optional="true" label="Max abundance threshold for solid kmers" />
+    </inputs>
+    <outputs>
+        <data name="output" format="fasta" label="${tool.name} on ${on_string}" from_work_dir="output.contigs.fa" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="in" value="ec.fa" />
+            <param name="kmer_size" value="21" />
+            <param name="abundance_min" value="1" />
+            <output name="output" ftype="fasta">
+                <assert_contents>
+                    <has_text text="LN:i:460 KC:i:2200 km:f:5.000"/>
+                    <has_text text="LN:i:637 KC:i:3085 km:f:5.000"/>
+                    <has_n_lines n="4"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Minia is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day.
+The output of Minia is a set of contigs.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1186/1748-7188-8-22</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ec.fa	Thu Apr 09 03:42:24 2020 -0400
@@ -0,0 +1,49 @@
+>works well for k=21; part of genome10K.fasta
+CATCGATGCGAGACGCCTGTCGCGGGGAATTGTGGGGCGGACCACGCTCTGGCTAACGAGCTACCGTTTCCTTTAACCTGCCAGACGGTGACCAGGGCCGTTCGGCGTTGCATCGAGCGGTGTCGCTAGCGCAATGCGCAAGATTTTGACATTTACAAGGCAACATTGCAGCGTCCGATGGTCCGGTGGCCTCCAGATAGTGTCCAGTCGCTCTAACTGTATGGAGACCATAGGCATTTACCTTATTCTCATCGCCACGCCCCAAGATCTTTAGGACCCAGCATTCCTTTAACCACTAACATAACGCGTGTCATCTAGTTCAACAACC
+>remaining part
+TGTCATCTAGTTCAACAACCGTTATGCCGTCCGACTCTTGCGCTCGGATGTCCGCAATGGGTTATCCCTATGTTCCGGTAATCTCTCATCTACTAAGCGCCCTAAAGGTCGTATGGTTGGAGGGCGGTTACACACCCTTAAGTACCGAACGATAGAGCACCCGTCTAGGAGGGCGTGCAGGGTCTCCCGCTAGCTAATGGTCACGGCCTCTCTGGGAAAGCTGAACAACGGATGATACCCATACTGCCACTCCAGTACCTGGGCCGCGTGTTGTACGCTGTGTATCTTGAGAGCGTTTCCAGCAGATAGAACAGGATCACATGTACATG
+>that's the EC
+TGTCATCTAGTTCAACAACCAAAAAAA
+>contig that is split in two by the EC, containing the last kmer of the ec (CTAGTTCAACAACCAAAAAAA)
+GGTGAACAGCACATCTTTTCGTCCTGAGGCCATATTAATTCTACTCAGATTGTCTGTAACCGGAGCTTCGGGCGTATTTTTGCGTAAGACACTGCCTAAAGGGAACATATGTGTCCAGAATAGGGTTCAACGGTGTATGAGCAAA
+CTAGTTCAACAACCAAAAAAA
+TTGTGTGCAAGCTACTTCTAGACCTTATTAAGTGCCCAGGAATTCCTAGGAAGGCGCGCAGCTCAAGCAATCATACATGGCGGAATGCCTGTCCACCGGGGGTTCTACTGTACCACAGTGGCCTGGATAGCTAAGCAGGTCCTGGATTGGCATGTCATCCGGAGTGATAGGCACTGCTCACGACCAGCTTGCGGACAAACGGGGTGCCCGCGCCTGCGTCCGGTAGACGAGCGATGGATTTAGACCGTTCACTGAACCCTCTAATAGGACCTCTTGCCCATCCGAGGCTTAAGC
+>the rest below is just repeated for coverage reasons
+A
+>the rest below is just repeated for coverage reasons
+A
+>the rest below is just repeated for coverage reasons
+A
+>contig that is split in two by the EC, containing the last kmer of the ec (CTAGTTCAACAACCAAAAAAA)
+GGTGAACAGCACATCTTTTCGTCCTGAGGCCATATTAATTCTACTCAGATTGTCTGTAACCGGAGCTTCGGGCGTATTTTTGCGTAAGACACTGCCTAAAGGGAACATATGTGTCCAGAATAGGGTTCAACGGTGTATGAGCAAA
+CTAGTTCAACAACCAAAAAAA
+TTGTGTGCAAGCTACTTCTAGACCTTATTAAGTGCCCAGGAATTCCTAGGAAGGCGCGCAGCTCAAGCAATCATACATGGCGGAATGCCTGTCCACCGGGGGTTCTACTGTACCACAGTGGCCTGGATAGCTAAGCAGGTCCTGGATTGGCATGTCATCCGGAGTGATAGGCACTGCTCACGACCAGCTTGCGGACAAACGGGGTGCCCGCGCCTGCGTCCGGTAGACGAGCGATGGATTTAGACCGTTCACTGAACCCTCTAATAGGACCTCTTGCCCATCCGAGGCTTAAGC
+>contig that is split in two by the EC, containing the last kmer of the ec (CTAGTTCAACAACCAAAAAAA)
+GGTGAACAGCACATCTTTTCGTCCTGAGGCCATATTAATTCTACTCAGATTGTCTGTAACCGGAGCTTCGGGCGTATTTTTGCGTAAGACACTGCCTAAAGGGAACATATGTGTCCAGAATAGGGTTCAACGGTGTATGAGCAAA
+CTAGTTCAACAACCAAAAAAA
+TTGTGTGCAAGCTACTTCTAGACCTTATTAAGTGCCCAGGAATTCCTAGGAAGGCGCGCAGCTCAAGCAATCATACATGGCGGAATGCCTGTCCACCGGGGGTTCTACTGTACCACAGTGGCCTGGATAGCTAAGCAGGTCCTGGATTGGCATGTCATCCGGAGTGATAGGCACTGCTCACGACCAGCTTGCGGACAAACGGGGTGCCCGCGCCTGCGTCCGGTAGACGAGCGATGGATTTAGACCGTTCACTGAACCCTCTAATAGGACCTCTTGCCCATCCGAGGCTTAAGC
+>works well for k=21; part of genome10K.fasta
+CATCGATGCGAGACGCCTGTCGCGGGGAATTGTGGGGCGGACCACGCTCTGGCTAACGAGCTACCGTTTCCTTTAACCTGCCAGACGGTGACCAGGGCCGTTCGGCGTTGCATCGAGCGGTGTCGCTAGCGCAATGCGCAAGATTTTGACATTTACAAGGCAACATTGCAGCGTCCGATGGTCCGGTGGCCTCCAGATAGTGTCCAGTCGCTCTAACTGTATGGAGACCATAGGCATTTACCTTATTCTCATCGCCACGCCCCAAGATCTTTAGGACCCAGCATTCCTTTAACCACTAACATAACGCGTGTCATCTAGTTCAACAACC
+>remaining part
+TGTCATCTAGTTCAACAACCGTTATGCCGTCCGACTCTTGCGCTCGGATGTCCGCAATGGGTTATCCCTATGTTCCGGTAATCTCTCATCTACTAAGCGCCCTAAAGGTCGTATGGTTGGAGGGCGGTTACACACCCTTAAGTACCGAACGATAGAGCACCCGTCTAGGAGGGCGTGCAGGGTCTCCCGCTAGCTAATGGTCACGGCCTCTCTGGGAAAGCTGAACAACGGATGATACCCATACTGCCACTCCAGTACCTGGGCCGCGTGTTGTACGCTGTGTATCTTGAGAGCGTTTCCAGCAGATAGAACAGGATCACATGTACATG
+>works well for k=21; part of genome10K.fasta
+CATCGATGCGAGACGCCTGTCGCGGGGAATTGTGGGGCGGACCACGCTCTGGCTAACGAGCTACCGTTTCCTTTAACCTGCCAGACGGTGACCAGGGCCGTTCGGCGTTGCATCGAGCGGTGTCGCTAGCGCAATGCGCAAGATTTTGACATTTACAAGGCAACATTGCAGCGTCCGATGGTCCGGTGGCCTCCAGATAGTGTCCAGTCGCTCTAACTGTATGGAGACCATAGGCATTTACCTTATTCTCATCGCCACGCCCCAAGATCTTTAGGACCCAGCATTCCTTTAACCACTAACATAACGCGTGTCATCTAGTTCAACAACC
+>remaining part
+TGTCATCTAGTTCAACAACCGTTATGCCGTCCGACTCTTGCGCTCGGATGTCCGCAATGGGTTATCCCTATGTTCCGGTAATCTCTCATCTACTAAGCGCCCTAAAGGTCGTATGGTTGGAGGGCGGTTACACACCCTTAAGTACCGAACGATAGAGCACCCGTCTAGGAGGGCGTGCAGGGTCTCCCGCTAGCTAATGGTCACGGCCTCTCTGGGAAAGCTGAACAACGGATGATACCCATACTGCCACTCCAGTACCTGGGCCGCGTGTTGTACGCTGTGTATCTTGAGAGCGTTTCCAGCAGATAGAACAGGATCACATGTACATG
+>contig that is split in two by the EC, containing the last kmer of the ec (CTAGTTCAACAACCAAAAAAA)
+GGTGAACAGCACATCTTTTCGTCCTGAGGCCATATTAATTCTACTCAGATTGTCTGTAACCGGAGCTTCGGGCGTATTTTTGCGTAAGACACTGCCTAAAGGGAACATATGTGTCCAGAATAGGGTTCAACGGTGTATGAGCAAA
+CTAGTTCAACAACCAAAAAAA
+TTGTGTGCAAGCTACTTCTAGACCTTATTAAGTGCCCAGGAATTCCTAGGAAGGCGCGCAGCTCAAGCAATCATACATGGCGGAATGCCTGTCCACCGGGGGTTCTACTGTACCACAGTGGCCTGGATAGCTAAGCAGGTCCTGGATTGGCATGTCATCCGGAGTGATAGGCACTGCTCACGACCAGCTTGCGGACAAACGGGGTGCCCGCGCCTGCGTCCGGTAGACGAGCGATGGATTTAGACCGTTCACTGAACCCTCTAATAGGACCTCTTGCCCATCCGAGGCTTAAGC
+>contig that is split in two by the EC, containing the last kmer of the ec (CTAGTTCAACAACCAAAAAAA)
+GGTGAACAGCACATCTTTTCGTCCTGAGGCCATATTAATTCTACTCAGATTGTCTGTAACCGGAGCTTCGGGCGTATTTTTGCGTAAGACACTGCCTAAAGGGAACATATGTGTCCAGAATAGGGTTCAACGGTGTATGAGCAAA
+CTAGTTCAACAACCAAAAAAA
+TTGTGTGCAAGCTACTTCTAGACCTTATTAAGTGCCCAGGAATTCCTAGGAAGGCGCGCAGCTCAAGCAATCATACATGGCGGAATGCCTGTCCACCGGGGGTTCTACTGTACCACAGTGGCCTGGATAGCTAAGCAGGTCCTGGATTGGCATGTCATCCGGAGTGATAGGCACTGCTCACGACCAGCTTGCGGACAAACGGGGTGCCCGCGCCTGCGTCCGGTAGACGAGCGATGGATTTAGACCGTTCACTGAACCCTCTAATAGGACCTCTTGCCCATCCGAGGCTTAAGC
+>works well for k=21; part of genome10K.fasta
+CATCGATGCGAGACGCCTGTCGCGGGGAATTGTGGGGCGGACCACGCTCTGGCTAACGAGCTACCGTTTCCTTTAACCTGCCAGACGGTGACCAGGGCCGTTCGGCGTTGCATCGAGCGGTGTCGCTAGCGCAATGCGCAAGATTTTGACATTTACAAGGCAACATTGCAGCGTCCGATGGTCCGGTGGCCTCCAGATAGTGTCCAGTCGCTCTAACTGTATGGAGACCATAGGCATTTACCTTATTCTCATCGCCACGCCCCAAGATCTTTAGGACCCAGCATTCCTTTAACCACTAACATAACGCGTGTCATCTAGTTCAACAACC
+>remaining part
+TGTCATCTAGTTCAACAACCGTTATGCCGTCCGACTCTTGCGCTCGGATGTCCGCAATGGGTTATCCCTATGTTCCGGTAATCTCTCATCTACTAAGCGCCCTAAAGGTCGTATGGTTGGAGGGCGGTTACACACCCTTAAGTACCGAACGATAGAGCACCCGTCTAGGAGGGCGTGCAGGGTCTCCCGCTAGCTAATGGTCACGGCCTCTCTGGGAAAGCTGAACAACGGATGATACCCATACTGCCACTCCAGTACCTGGGCCGCGTGTTGTACGCTGTGTATCTTGAGAGCGTTTCCAGCAGATAGAACAGGATCACATGTACATG
+>works well for k=21; part of genome10K.fasta
+CATCGATGCGAGACGCCTGTCGCGGGGAATTGTGGGGCGGACCACGCTCTGGCTAACGAGCTACCGTTTCCTTTAACCTGCCAGACGGTGACCAGGGCCGTTCGGCGTTGCATCGAGCGGTGTCGCTAGCGCAATGCGCAAGATTTTGACATTTACAAGGCAACATTGCAGCGTCCGATGGTCCGGTGGCCTCCAGATAGTGTCCAGTCGCTCTAACTGTATGGAGACCATAGGCATTTACCTTATTCTCATCGCCACGCCCCAAGATCTTTAGGACCCAGCATTCCTTTAACCACTAACATAACGCGTGTCATCTAGTTCAACAACC
+>remaining part
+TGTCATCTAGTTCAACAACCGTTATGCCGTCCGACTCTTGCGCTCGGATGTCCGCAATGGGTTATCCCTATGTTCCGGTAATCTCTCATCTACTAAGCGCCCTAAAGGTCGTATGGTTGGAGGGCGGTTACACACCCTTAAGTACCGAACGATAGAGCACCCGTCTAGGAGGGCGTGCAGGGTCTCCCGCTAGCTAATGGTCACGGCCTCTCTGGGAAAGCTGAACAACGGATGATACCCATACTGCCACTCCAGTACCTGGGCCGCGTGTTGTACGCTGTGTATCTTGAGAGCGTTTCCAGCAGATAGAACAGGATCACATGTACATG
+