changeset 7:9006e5836729 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit cde78013f2df8cc06372d73f6332f7ab1f120a25
author iuc
date Mon, 30 Oct 2017 07:20:13 -0400
parents 65c2d63fcbe6
children 884dc0264950
files spades.xml test-data/kmer_33_output.fa test-data/kmer_77_output.fa
diffstat 3 files changed, 99 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/spades.xml	Sat May 13 15:51:58 2017 -0400
+++ b/spades.xml	Mon Oct 30 07:20:13 2017 -0400
@@ -1,7 +1,7 @@
-<tool id="spades" name="SPAdes" version="3.9.0">
+<tool id="spades" name="SPAdes" version="3.11.1">
     <description>genome assembler for regular and single-cell projects</description>
     <requirements>
-        <requirement type="package" version="3.9.0">spades</requirement>
+        <requirement type="package" version="3.11.1">spades</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
@@ -74,6 +74,21 @@
     && cat scaffolds.fasta | python '$write_tsv_script' > '$out_scaffold_stats'
     ]]>
     </command>
+    
+    <configfiles>
+        <configfile name="write_tsv_script"><![CDATA[#!/usr/bin/env python
+import sys,re
+search_str = r'^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*).*\$'
+replace_str = r'\1_\2\t\3\t\4'
+cmd = re.compile(search_str)
+sys.stdout.write('#name\tlength\tcoverage\n')
+for i,line in enumerate(sys.stdin):
+    if cmd.match(line):
+        sys.stdout.write(cmd.sub(replace_str,line))
+]]>
+         </configfile>
+    </configfiles>
+    
     <inputs>
         <param argument="--sc" falsevalue="" help="This option is required for MDA (single-cell) data." label="Single-cell?" name="sc" truevalue="--sc" type="boolean">
             <option value="false">No</option>
@@ -142,38 +157,33 @@
         <param optional="true" format="fasta,fastq" label="Sanger reads" multiple="true" name="sanger_reads" type="data" />
         <param optional="true" format="fasta,fastq" label="Trusted contigs" multiple="true" name="trusted_contigs" type="data" />
         <param optional="true" format="fasta,fastq" label="Untrusted contigs" multiple="true" name="untrusted_contigs" type="data" />
+        <param name="contig_graph_out" type="boolean" checked="False" label="Output final assembly graph (contigs)?" help="Will output the final assembly graph (contigs) in fastg format for visualisation" />
+        <param name="scaffold_graph_out" type="boolean" checked="False" label="Output final assembly graph with scaffolds?" help="Will output the final assembly graph with scaffold information in gfa format for visualisation" />
     </inputs>
-    <configfiles>
-        <configfile name="write_tsv_script"><![CDATA[#!/usr/bin/env python
-import sys,re
-search_str = r'^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*).*\$'
-replace_str = r'\1_\2\t\3\t\4'
-cmd = re.compile(search_str)
-sys.stdout.write('#name\tlength\tcoverage\n')
-for i,line in enumerate(sys.stdin):
-    if cmd.match(line):
-        sys.stdout.write(cmd.sub(replace_str,line))
-]]>
-         </configfile>
-    </configfiles>
 
     <outputs>
-        <data format="tabular" label="SPAdes contig stats" name="out_contig_stats" >
+        <data format="tabular" label="${tool.name} on ${on_string}: contig stats" name="out_contig_stats" >
             <actions>
                 <action name="column_names" type="metadata" default="name,length,coverage"/>
             </actions>
         </data>
-        <data format="tabular" label="SPAdes scaffold stats" name="out_scaffold_stats" >
+        <data format="tabular" label="${tool.name} on ${on_string}: scaffold stats" name="out_scaffold_stats" >
             <actions>
                 <action name="column_names" type="metadata" default="name,length,coverage"/>
             </actions>
         </data>
-        <data format="fasta" from_work_dir="contigs.fasta" label="SPAdes contigs (fasta)" name="out_contigs" />
-        <data format="fasta" from_work_dir="scaffolds.fasta" label="SPAdes scaffolds (fasta)" name="out_scaffolds" />
-        <data format="txt" from_work_dir="spades.log" label="SPAdes log" name="out_log" />
+        <data format="fasta" from_work_dir="contigs.fasta" label="${tool.name} on ${on_string}: contigs (fasta)" name="out_contigs" />
+        <data format="fasta" from_work_dir="scaffolds.fasta" label="${tool.name} on ${on_string}: scaffolds (fasta)" name="out_scaffolds" />
+        <data format="txt" from_work_dir="spades.log" label="${tool.name} on ${on_string}: log" name="out_log" />
+        <data format="txt" from_work_dir="assembly_graph.fastg" label="${tool.name} on ${on_string}: assembly graph" name="contig_graph">
+            <filter>contig_graph_out</filter>
+        </data>
+        <data format="txt" from_work_dir="assembly_graph_with_scaffolds.gfa" label="${tool.name} on ${on_string}: assembly graph with scaffolds" name="scaffold_graph">
+            <filter>scaffold_graph_out</filter>
+        </data>
     </outputs>
     <tests>
-        <test>
+        <test> <!-- Test 1 - basic test with k=33 -->
             <param name="sc" value="false" />
             <param name="careful" value="false" />
             <param name="kmers" value="33" />
@@ -183,11 +193,11 @@
             <output compare="re_match" file="kmer_33_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" />
             <output name="out_contig_stats">
                 <assert_contents>
-                    <has_text_matching expression="NODE_1\t1000\t225"/>
+                    <has_text_matching expression="NODE_1\t1000"/>
                 </assert_contents>
             </output>
         </test>
-        <test>
+        <test> <!-- Test 2 - auto k -->
             <param name="sc" value="false" />
             <param name="careful" value="false" />
             <param name="auto_kmer_choice" value="true" />
@@ -196,7 +206,7 @@
             <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" />
             <output compare="re_match" file="auto_kmer_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" />
         </test>
-        <test>
+        <test> <!-- Test 3 - k=77 -->
             <param name="sc" value="false" />
             <param name="careful" value="false" />
             <param name="kmers" value="77" />
@@ -205,6 +215,32 @@
             <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" />
             <output compare="re_match" file="kmer_77_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" />
         </test>
+        <test> <!-- Test 4 - test for extra graph outputs -->
+            <param name="sc" value="false" />
+            <param name="careful" value="false" />
+            <param name="kmers" value="33" />
+            <param name="lib_type" value="paired_end" />
+            <param ftype="fastq" name="fwd_reads" value="ecoli_1K_1.fq" />
+            <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" />
+            <param name="contig_graph_out" value="true" />
+            <param name="scaffold_graph_out" value="true" />
+            <output compare="re_match" file="kmer_33_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" />
+            <output name="out_contig_stats">
+                <assert_contents>
+                    <has_text_matching expression="NODE_1\t1000"/>
+                </assert_contents>
+            </output>
+            <output name="contig_graph">
+                <assert_contents>
+                    <has_text text=">EDGE_"/>
+                </assert_contents>
+            </output>
+            <output name="scaffold_graph">
+                <assert_contents>
+                    <has_text text="NODE_"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -212,7 +248,7 @@
 
 SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes.
 
-This wrapper runs SPAdes 3.9, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage.
+This wrapper runs SPAdes, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage.
 
 **License**
 
@@ -231,6 +267,8 @@
 Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes.
 
 Nicola Soranzo fixed various bugs.
+
+Simon Gladman added fastg optional outputs.
 ]]>
     </help>
     <citations>
--- a/test-data/kmer_33_output.fa	Sat May 13 15:51:58 2017 -0400
+++ b/test-data/kmer_33_output.fa	Mon Oct 30 07:20:13 2017 -0400
@@ -1,18 +1,18 @@
->NODE_1_length_1000_cov_140.62
-AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC
-TGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG
-TCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTAC
-ACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGT
-AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGG
-CTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT
-ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
-AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTG
-GCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAA
-CGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCG
-CAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATT
-AGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAA
-ATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATC
-GATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT
-GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCA
-GGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGAC
-TACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATT
+>NODE_1_length_1000_cov_225.838676
+AATCGGCGCGTAAACAGGCAGCCAGCACCGCAGCAGAGTAGTCGGAACCGTTGCGTCCAA
+GCACCACCAGTTCGCCTTTTTCATTACCGGCGGTGAAACCTGCCATCAGCACCATGTGAT
+CAGCCGGAATGCGGCTTGCCGCAATACGGCGGGTGGACTCAGCAATATCGACGGTAGATT
+CGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGAC
+CGCGCGCTTCTAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCA
+GCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTT
+TTATTTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCT
+GGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGG
+GTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTT
+TGGCGGGGGCAGAGAGGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATAT
+CGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTC
+GCATGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGG
+CTTTTTTCTGTGTTTCCTGTACGCGTCAGCCCGCACCGTTACCTGTGGTAATGGTGATGG
+TGGTGGTAATGGTGGTGCTAATGCGTTTCATGGATGTTGTGTACTCTGTAATTTTTATCT
+GTCTGTGCGCTATGCCTATATTGGTTAAAGTATTTAGTGACCTAAGTCAATAAAATTTTA
+ATTTACTCACGGCAGGTAACCAGTTCAGAAGCTGCTATCAGACACTCTTTTTTTAATCCA
+CACAGAGACATATTGCCCGTTGCAGTCAGAATGAAAAGCT
--- a/test-data/kmer_77_output.fa	Sat May 13 15:51:58 2017 -0400
+++ b/test-data/kmer_77_output.fa	Mon Oct 30 07:20:13 2017 -0400
@@ -1,18 +1,18 @@
->NODE_1_length_976_cov_64.4594
-GCACCGCAGCAGAGTAGTCGGAACCGTTGCGTCCAAGCACCACCAGTTCGCCTTTTTCAT
-TACCGGCGGTGAAACCTGCCATCAGCACCATGTGATCAGCCGGAATGCGGCTTGCCGCAA
-TACGGCGGGTGGACTCAGCAATATCGACGGTAGATTCGAGGTAATGCCCCACTGCCAGCA
-GTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCA
-TAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGC
-ACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGA
-CGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAA
-GTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGG
-TTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGCGGGGGCAGAGAGGACGGTGG
-CCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCGGCAACACGCAGAAAACGTTCTG
-CATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTT
-TGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGGCTTTTTTCTGTGTTTCCTGTACGC
-GTCAGCCCGCACCGTTACCTGTGGTAATGGTGATGGTGGTGGTAATGGTGGTGCTAATGC
-GTTTCATGGATGTTGTGTACTCTGTAATTTTTATCTGTCTGTGCGCTATGCCTATATTGG
-TTAAAGTATTTAGTGACCTAAGTCAATAAAATTTTAATTTACTCACGGCAGGTAACCAGT
-TCAGAAGCTGCTATCAGACACTCTTTTTTTAATCCACACAGAGACATATTGCCCGTTGCA
-GTCAGAATGAAAAGCT
+>NODE_1_length_976_cov_64.459399
+AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC
+TGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG
+TCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTAC
+ACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGT
+AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGG
+CTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT
+ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC
+AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTG
+GCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAA
+CGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCG
+CAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATT
+AGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAA
+ATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATC
+GATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT
+GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCA
+GGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGAC
+TACTCTGCTGCGGTGC