Mercurial > repos > nml > spades
changeset 7:9006e5836729 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit cde78013f2df8cc06372d73f6332f7ab1f120a25
author | iuc |
---|---|
date | Mon, 30 Oct 2017 07:20:13 -0400 |
parents | 65c2d63fcbe6 |
children | 884dc0264950 |
files | spades.xml test-data/kmer_33_output.fa test-data/kmer_77_output.fa |
diffstat | 3 files changed, 99 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- a/spades.xml Sat May 13 15:51:58 2017 -0400 +++ b/spades.xml Mon Oct 30 07:20:13 2017 -0400 @@ -1,7 +1,7 @@ -<tool id="spades" name="SPAdes" version="3.9.0"> +<tool id="spades" name="SPAdes" version="3.11.1"> <description>genome assembler for regular and single-cell projects</description> <requirements> - <requirement type="package" version="3.9.0">spades</requirement> + <requirement type="package" version="3.11.1">spades</requirement> </requirements> <stdio> <exit_code range="1:" /> @@ -74,6 +74,21 @@ && cat scaffolds.fasta | python '$write_tsv_script' > '$out_scaffold_stats' ]]> </command> + + <configfiles> + <configfile name="write_tsv_script"><![CDATA[#!/usr/bin/env python +import sys,re +search_str = r'^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*).*\$' +replace_str = r'\1_\2\t\3\t\4' +cmd = re.compile(search_str) +sys.stdout.write('#name\tlength\tcoverage\n') +for i,line in enumerate(sys.stdin): + if cmd.match(line): + sys.stdout.write(cmd.sub(replace_str,line)) +]]> + </configfile> + </configfiles> + <inputs> <param argument="--sc" falsevalue="" help="This option is required for MDA (single-cell) data." label="Single-cell?" name="sc" truevalue="--sc" type="boolean"> <option value="false">No</option> @@ -142,38 +157,33 @@ <param optional="true" format="fasta,fastq" label="Sanger reads" multiple="true" name="sanger_reads" type="data" /> <param optional="true" format="fasta,fastq" label="Trusted contigs" multiple="true" name="trusted_contigs" type="data" /> <param optional="true" format="fasta,fastq" label="Untrusted contigs" multiple="true" name="untrusted_contigs" type="data" /> + <param name="contig_graph_out" type="boolean" checked="False" label="Output final assembly graph (contigs)?" help="Will output the final assembly graph (contigs) in fastg format for visualisation" /> + <param name="scaffold_graph_out" type="boolean" checked="False" label="Output final assembly graph with scaffolds?" help="Will output the final assembly graph with scaffold information in gfa format for visualisation" /> </inputs> - <configfiles> - <configfile name="write_tsv_script"><![CDATA[#!/usr/bin/env python -import sys,re -search_str = r'^>(NODE|\S+)_(\d+)(?:_|\s)length_(\d+)_cov_(\d+\.*\d*).*\$' -replace_str = r'\1_\2\t\3\t\4' -cmd = re.compile(search_str) -sys.stdout.write('#name\tlength\tcoverage\n') -for i,line in enumerate(sys.stdin): - if cmd.match(line): - sys.stdout.write(cmd.sub(replace_str,line)) -]]> - </configfile> - </configfiles> <outputs> - <data format="tabular" label="SPAdes contig stats" name="out_contig_stats" > + <data format="tabular" label="${tool.name} on ${on_string}: contig stats" name="out_contig_stats" > <actions> <action name="column_names" type="metadata" default="name,length,coverage"/> </actions> </data> - <data format="tabular" label="SPAdes scaffold stats" name="out_scaffold_stats" > + <data format="tabular" label="${tool.name} on ${on_string}: scaffold stats" name="out_scaffold_stats" > <actions> <action name="column_names" type="metadata" default="name,length,coverage"/> </actions> </data> - <data format="fasta" from_work_dir="contigs.fasta" label="SPAdes contigs (fasta)" name="out_contigs" /> - <data format="fasta" from_work_dir="scaffolds.fasta" label="SPAdes scaffolds (fasta)" name="out_scaffolds" /> - <data format="txt" from_work_dir="spades.log" label="SPAdes log" name="out_log" /> + <data format="fasta" from_work_dir="contigs.fasta" label="${tool.name} on ${on_string}: contigs (fasta)" name="out_contigs" /> + <data format="fasta" from_work_dir="scaffolds.fasta" label="${tool.name} on ${on_string}: scaffolds (fasta)" name="out_scaffolds" /> + <data format="txt" from_work_dir="spades.log" label="${tool.name} on ${on_string}: log" name="out_log" /> + <data format="txt" from_work_dir="assembly_graph.fastg" label="${tool.name} on ${on_string}: assembly graph" name="contig_graph"> + <filter>contig_graph_out</filter> + </data> + <data format="txt" from_work_dir="assembly_graph_with_scaffolds.gfa" label="${tool.name} on ${on_string}: assembly graph with scaffolds" name="scaffold_graph"> + <filter>scaffold_graph_out</filter> + </data> </outputs> <tests> - <test> + <test> <!-- Test 1 - basic test with k=33 --> <param name="sc" value="false" /> <param name="careful" value="false" /> <param name="kmers" value="33" /> @@ -183,11 +193,11 @@ <output compare="re_match" file="kmer_33_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" /> <output name="out_contig_stats"> <assert_contents> - <has_text_matching expression="NODE_1\t1000\t225"/> + <has_text_matching expression="NODE_1\t1000"/> </assert_contents> </output> </test> - <test> + <test> <!-- Test 2 - auto k --> <param name="sc" value="false" /> <param name="careful" value="false" /> <param name="auto_kmer_choice" value="true" /> @@ -196,7 +206,7 @@ <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" /> <output compare="re_match" file="auto_kmer_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" /> </test> - <test> + <test> <!-- Test 3 - k=77 --> <param name="sc" value="false" /> <param name="careful" value="false" /> <param name="kmers" value="77" /> @@ -205,6 +215,32 @@ <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" /> <output compare="re_match" file="kmer_77_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" /> </test> + <test> <!-- Test 4 - test for extra graph outputs --> + <param name="sc" value="false" /> + <param name="careful" value="false" /> + <param name="kmers" value="33" /> + <param name="lib_type" value="paired_end" /> + <param ftype="fastq" name="fwd_reads" value="ecoli_1K_1.fq" /> + <param ftype="fastq" name="rev_reads" value="ecoli_1K_2.fq" /> + <param name="contig_graph_out" value="true" /> + <param name="scaffold_graph_out" value="true" /> + <output compare="re_match" file="kmer_33_output.fa" ftype="fasta" lines_diff="1" name="out_contigs" /> + <output name="out_contig_stats"> + <assert_contents> + <has_text_matching expression="NODE_1\t1000"/> + </assert_contents> + </output> + <output name="contig_graph"> + <assert_contents> + <has_text text=">EDGE_"/> + </assert_contents> + </output> + <output name="scaffold_graph"> + <assert_contents> + <has_text text="NODE_"/> + </assert_contents> + </output> + </test> </tests> <help> <![CDATA[ @@ -212,7 +248,7 @@ SPAdes – St. Petersburg genome assembler – is intended for both standard isolates and single-cell MDA bacteria assemblies. See http://bioinf.spbau.ru/en/spades for more details on SPAdes. -This wrapper runs SPAdes 3.9, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. +This wrapper runs SPAdes, collects the output, and throws away all the temporary files. It also produces a tab file with contig names, length and coverage. **License** @@ -231,6 +267,8 @@ Anton Korobeynikov greatlty helped understanding how SPAdes work, and integrated handy features into SPAdes. Nicola Soranzo fixed various bugs. + +Simon Gladman added fastg optional outputs. ]]> </help> <citations>
--- a/test-data/kmer_33_output.fa Sat May 13 15:51:58 2017 -0400 +++ b/test-data/kmer_33_output.fa Mon Oct 30 07:20:13 2017 -0400 @@ -1,18 +1,18 @@ ->NODE_1_length_1000_cov_140.62 -AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC -TGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG -TCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTAC -ACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGT -AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGG -CTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT -ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC -AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTG -GCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAA -CGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCG -CAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATT -AGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAA -ATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATC -GATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT -GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCA -GGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGAC -TACTCTGCTGCGGTGCTGGCTGCCTGTTTACGCGCCGATT +>NODE_1_length_1000_cov_225.838676 +AATCGGCGCGTAAACAGGCAGCCAGCACCGCAGCAGAGTAGTCGGAACCGTTGCGTCCAA +GCACCACCAGTTCGCCTTTTTCATTACCGGCGGTGAAACCTGCCATCAGCACCATGTGAT +CAGCCGGAATGCGGCTTGCCGCAATACGGCGGGTGGACTCAGCAATATCGACGGTAGATT +CGAGGTAATGCCCCACTGCCAGCAGTTTTTCGACCGGATCGATAACAGTAACGTTGTGAC +CGCGCGCTTCTAATACGCCGGCCATAATGGCGATCGACATTTTCTCGCCACGGCAAATCA +GCGCAGCGTTGATGCTATCCGGGCACTGCCCCAACAAACTAATGCCATGCAGGACATGTT +TTATTTGGGCAAATTCCTGATCGACGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCT +GGGCGGCGGCGAGTCCCGTCAAAAGTTCGGCAAAAATACGTTCGGCATCGCTGATATTGG +GTAAAGCATCCTGGCCGCTAATGGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTT +TGGCGGGGGCAGAGAGGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATAT +CGGCAACACGCAGAAAACGTTCTGCATTTGCCACTGATGTACCGCCGAACTTCAACACTC +GCATGGTTGTTACCTCGTTACCTTTGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGG +CTTTTTTCTGTGTTTCCTGTACGCGTCAGCCCGCACCGTTACCTGTGGTAATGGTGATGG +TGGTGGTAATGGTGGTGCTAATGCGTTTCATGGATGTTGTGTACTCTGTAATTTTTATCT +GTCTGTGCGCTATGCCTATATTGGTTAAAGTATTTAGTGACCTAAGTCAATAAAATTTTA +ATTTACTCACGGCAGGTAACCAGTTCAGAAGCTGCTATCAGACACTCTTTTTTTAATCCA +CACAGAGACATATTGCCCGTTGCAGTCAGAATGAAAAGCT
--- a/test-data/kmer_77_output.fa Sat May 13 15:51:58 2017 -0400 +++ b/test-data/kmer_77_output.fa Mon Oct 30 07:20:13 2017 -0400 @@ -1,18 +1,18 @@ ->NODE_1_length_976_cov_64.4594 -GCACCGCAGCAGAGTAGTCGGAACCGTTGCGTCCAAGCACCACCAGTTCGCCTTTTTCAT -TACCGGCGGTGAAACCTGCCATCAGCACCATGTGATCAGCCGGAATGCGGCTTGCCGCAA -TACGGCGGGTGGACTCAGCAATATCGACGGTAGATTCGAGGTAATGCCCCACTGCCAGCA -GTTTTTCGACCGGATCGATAACAGTAACGTTGTGACCGCGCGCTTCTAATACGCCGGCCA -TAATGGCGATCGACATTTTCTCGCCACGGCAAATCAGCGCAGCGTTGATGCTATCCGGGC -ACTGCCCCAACAAACTAATGCCATGCAGGACATGTTTTATTTGGGCAAATTCCTGATCGA -CGAAAGTTTTCAATTGCGCCAGCGGGAACCCCGGCTGGGCGGCGGCGAGTCCCGTCAAAA -GTTCGGCAAAAATACGTTCGGCATCGCTGATATTGGGTAAAGCATCCTGGCCGCTAATGG -TTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGCGGGGGCAGAGAGGACGGTGG -CCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCGGCAACACGCAGAAAACGTTCTG -CATTTGCCACTGATGTACCGCCGAACTTCAACACTCGCATGGTTGTTACCTCGTTACCTT -TGGTCGAAAAAAAAAGCCCGCACTGTCAGGTGCGGGCTTTTTTCTGTGTTTCCTGTACGC -GTCAGCCCGCACCGTTACCTGTGGTAATGGTGATGGTGGTGGTAATGGTGGTGCTAATGC -GTTTCATGGATGTTGTGTACTCTGTAATTTTTATCTGTCTGTGCGCTATGCCTATATTGG -TTAAAGTATTTAGTGACCTAAGTCAATAAAATTTTAATTTACTCACGGCAGGTAACCAGT -TCAGAAGCTGCTATCAGACACTCTTTTTTTAATCCACACAGAGACATATTGCCCGTTGCA -GTCAGAATGAAAAGCT +>NODE_1_length_976_cov_64.459399 +AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC +TGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG +TCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTAC +ACAACATCCATGAAACGCATTAGCACCACCATTACCACCACCATCACCATTACCACAGGT +AACGGTGCGGGCTGACGCGTACAGGAAACACAGAAAAAAGCCCGCACCTGACAGTGCGGG +CTTTTTTTTTCGACCAAAGGTAACGAGGTAACAACCATGCGAGTGTTGAAGTTCGGCGGT +ACATCAGTGGCAAATGCAGAACGTTTTCTGCGTGTTGCCGATATTCTGGAAAGCAATGCC +AGGCAGGGGCAGGTGGCCACCGTCCTCTCTGCCCCCGCCAAAATCACCAACCACCTGGTG +GCGATGATTGAAAAAACCATTAGCGGCCAGGATGCTTTACCCAATATCAGCGATGCCGAA +CGTATTTTTGCCGAACTTTTGACGGGACTCGCCGCCGCCCAGCCGGGGTTCCCGCTGGCG +CAATTGAAAACTTTCGTCGATCAGGAATTTGCCCAAATAAAACATGTCCTGCATGGCATT +AGTTTGTTGGGGCAGTGCCCGGATAGCATCAACGCTGCGCTGATTTGCCGTGGCGAGAAA +ATGTCGATCGCCATTATGGCCGGCGTATTAGAAGCGCGCGGTCACAACGTTACTGTTATC +GATCCGGTCGAAAAACTGCTGGCAGTGGGGCATTACCTCGAATCTACCGTCGATATTGCT +GAGTCCACCCGCCGTATTGCGGCAAGCCGCATTCCGGCTGATCACATGGTGCTGATGGCA +GGTTTCACCGCCGGTAATGAAAAAGGCGAACTGGTGGTGCTTGGACGCAACGGTTCCGAC +TACTCTGCTGCGGTGC