# HG changeset patch # User iuc # Date 1642973721 0 # Node ID 42a39792aaae747d173263bf56eb6b954229e15a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/spades commit 8734db131db6f76697b500b30f18ee7723d61813" diff -r 000000000000 -r 42a39792aaae biosyntheticspades.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/biosyntheticspades.xml Sun Jan 23 21:35:21 2022 +0000 @@ -0,0 +1,183 @@ + + biosynthetic gene cluster assembly + + macros.xml + + + + + + + + +
+ +
+ + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ `_ of the manual. + +**Output** + + +biosyntheticSPAdes outputs four files of interest: + +- Scaffolds: contains DNA sequences from putative biosynthetic gene clusters (BGC). Since each sample may contain multiple BGCs and biosyntheticSPAdes can output several putative DNA sequences for eash cluster, for each contig name we append suffix _cluster_X_candidate_Y, where X is the id of the BGC and Y is the id of the candidate from the BGC. +- Raw_scaffolds: SPAdes scaffolds generated without domain-graph related algorithms. Very close to regular scaffolds.fasta file. +- HMM statistics: contains statistics about BGC composition in the sample. First, it outputs number of domain hits in the sample. Then, for each BGC candidate we output domain order with positions on the corresponding DNA sequence from scaffolds.fasta. +- Domain graphs: contains domain graph structure, that can be used to assess complexity of the sample and structure of BGCs. + +A detailed description can be found in the `output section `_ of the manual. + +.. class:: infomark + +**References** + +More information can be found on `github `_ and on the `project website `_. + ]]> + + 10.1101/gr.243477.118 + +
diff -r 000000000000 -r 42a39792aaae macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Jan 23 21:35:21 2022 +0000 @@ -0,0 +1,749 @@ + + 3.15.3 + 0 + + + spades + zip + + + + + + + + + + + + &1 | awk -F 'v' '{print $2}']]> + + + + + + + 10.1093/bioinformatics/btv688 + 10.1093/bioinformatics/btu266 + 10.1093/bioinformatics/btv337 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '$out_cs' || echo 'No contigs.fasta.' +#end if +#if 'ss' in $optional_output + && test -f 'output/scaffolds.fasta' && python '$__tool_directory__/write_tsv_script.py' < 'output/scaffolds.fasta' > '$out_ss' || echo 'No scaffolds.fasta.' +#end if +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [0-9,]+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 'ag' in optional_output + operation_mode != '--only-error-correction' + + + + + 'ags' in optional_output + operation_mode != '--only-error-correction' + + + + + 'cn' in optional_output + operation_mode != '--only-error-correction' + + + + + 'cp' in optional_output + operation_mode != '--only-error-correction' + + + + + + 'cr' in optional_output + operation_mode != '--only-assembler' + + + + + + + + + + 'cs' in optional_output + operation_mode != '--only-error-correction' + + + + + 'l' in optional_output + + + + + 'sc' in optional_output + operation_mode != '--only-error-correction' + + + + + 'sp' in optional_output + operation_mode != '--only-error-correction' + + + + + + + + 'ss' in optional_output + operation_mode != '--only-error-correction' + + + + + 'rs' in optional_output + + + + + 'b' in optional_output + + + + + 'dg' in optional_output + + + + + `_ of the manual. + ]]> + +- Assembly graph + + +- Assembly graph with scaffolds + + +- Contigs + + +- Contigs paths in the assembly graph + + +- Contigs stats + + +- Corrected reads by BayesHammer + + +- Log file + + +- Scaffolds (recommended for use as resulting sequences) + + +- Scaffolds paths in the assembly graph + + +- Scaffolds stats + + +SPAdes - St. Petersburg genome assembler - is an assembly toolkit containing various assembly pipelines. + + `_ (e.g. assemble using k-mer lengths 21,33,55,77,99,127). However, due to increased error rate some changes of k-mer lengths (e.g. selection of shorter ones) may be required. For example, if you ran SPAdes with k-mer lengths 21,33,55,77 and then decided to assemble the same data set using more iterations and larger values of K, you can run SPAdes once again specifying the same output folder and the following options: --restart-from k77 -k 21,33,55,77,99,127 --mismatch-correction -o . Do not forget to copy contigs and scaffolds from the previous run. We're planning to tackle issue of selecting k-mer lengths for IonTorrent reads in next versions. + +You may need no error correction for Hi-Q enzyme at all. However, we suggest trying to assemble your data with and without error correction and select the best variant. + +For non-trivial datasets (e.g. with high GC, low or uneven coverage) we suggest to enable single-cell mode (setting --sc option) and use k-mer lengths of 21,33,55. + + ]]> + + diff -r 000000000000 -r 42a39792aaae test-data/A_R1.fastq.gz Binary file test-data/A_R1.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/A_R2.fastq.gz Binary file test-data/A_R2.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/B_R1.fastq.gz Binary file test-data/B_R1.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/B_R2.fastq.gz Binary file test-data/B_R2.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/corona_scaffold.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/corona_scaffold.fasta Sun Jan 23 21:35:21 2022 +0000 @@ -0,0 +1,18 @@ +>NODE_1_length_1009_cluster_1_candidate_1_domains_2 +GTTCAAGCTGAGGCAAAACGCCTTTTTCAACTTCTACTAAGCCACAAGTGCCATCTTTAG +GATGTTGACGTGCCTCTGATAAGACCGCCTCCACTGGAGGATACACAGGTTTAAAGGTTT +ATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAAC +GAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATA +ATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCT +GCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGT +GACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAAC +TCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGG +AGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAG +TTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATG +CTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTC +AGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAG +TGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTT +ACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAG +ATTTAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAA +CAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCCTGGTCATGTTGAG +CTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACAC diff -r 000000000000 -r 42a39792aaae test-data/covid.fastq.gz Binary file test-data/covid.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K.fasta.gz Binary file test-data/ecoli_1K.fasta.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K.fastq.gz Binary file test-data/ecoli_1K.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K_1.fasta.gz Binary file test-data/ecoli_1K_1.fasta.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K_1.fastq.gz Binary file test-data/ecoli_1K_1.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K_2.fasta.gz Binary file test-data/ecoli_1K_2.fasta.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/ecoli_1K_2.fastq.gz Binary file test-data/ecoli_1K_2.fastq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/pl1.fq.gz Binary file test-data/pl1.fq.gz has changed diff -r 000000000000 -r 42a39792aaae test-data/pl2.fq.gz Binary file test-data/pl2.fq.gz has changed