comparison nextalign.xml @ 0:d1dd7d1b07f6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nextclade commit 66df2726d24d9f37eaaa31fea967a0553cc5c3e6"
author iuc
date Thu, 08 Apr 2021 07:52:26 +0000
parents
children 74d1e42a87e0
comparison
equal deleted inserted replaced
-1:000000000000 0:d1dd7d1b07f6
1 <tool id="nextalign" name="NextAlign" version="@TOOL_VERSION@+galaxy0" profile="20.01">
2 <macros>
3 <import>macros.xml</import>
4 </macros>
5 <expand macro="requirements">
6 <requirement type="package" version="@TOOL_VERSION@">nextalign</requirement>
7 </expand>
8 <version_command>nextalign --version-detailed</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 @REF_FASTA@
11 ln -s '$sequences' sequences.fasta &&
12 nextalign
13 --sequences sequences.fasta
14 --reference reference.fa
15 --output-fasta '$output_fasta'
16 #if $output_insertions:
17 --output-insertions '$output_csv'
18 #end if
19 #if $translation.translation_select == "yes":
20 --genes '${translation.genes}'
21 --genemap '${translation.genemap}'
22 #end if
23 --min-length '${min_length}'
24 --penalty-gap-extend '${penalty_gap_extend}'
25 --penalty-gap-open '${penalty_gap_open}'
26 --penalty-gap-open-in-frame '${penalty_gap_open_in_frame}'
27 --penalty-gap-open-out-of-frame '${penalty_gap_open_out_of_frame}'
28 --penalty-mismatch '${penalty_mismatch}'
29 --score-match '${score_match}'
30 --max-indel '${max_indel}'
31 --nuc-seed-length '${nuc_seed_length}'
32 --nuc-min-seeds '${nuc_min_seeds}'
33 --nuc-seed-spacing '${nuc_seed_spacing}'
34 --nuc-mismatches-allowed '${nuc_mismatches_allowed}'
35 --aa-seed-length '${aa_seed_length}'
36 --aa-min-seeds '${aa_min_seeds}'
37 --aa-seed-spacing '${aa_seed_spacing}'
38 --aa-mismatches-allowed '${aa_mismatches_allowed}'
39 ]]></command>
40 <inputs>
41 <expand macro="reference"/>
42 <param argument="--sequences" type="data" format="fasta" label="FASTA file with input sequences"/>
43 <param argument="--output-insertions" type="boolean" checked="true" label="Output insertion sequences?" help="Outputs stripped insertions relative to reference as CSV"/>
44 <conditional name="translation">
45 <param name="translation_select" type="select" label="Translate annotated genes based on GFF and gene list?">
46 <option value="yes">Translate annotated genes</option>
47 <option value="no">Don't translate genes</option>
48 </param>
49 <when value="yes">
50 <param argument="--genes" type="text" label="Comma separated list of genes to translate.">
51 <sanitizer invalid_char="">
52 <valid initial="string.ascii_letters,string.digits">
53 <add value="-" />
54 <add value="." />
55 <add value="," />
56 </valid>
57 </sanitizer>
58 </param>
59 <param argument="--genemap" type="data" format="gtf" label="GTF file containing custom gene map"/> <!-- TODO - make sure they need GFF and not GTF or GFF3 -->
60 </when>
61 <when value="no"/>
62 </conditional>
63 <param argument="--min-length" type="integer" value="100" min="0"
64 label="Minimum length of nucleotide sequence to consider for alignment"
65 help=" If a sequence is shorter than that, alignment will not be attempted and a warning will be emitted. When adjusting this parameter, note that alignment of short sequences can be unreliable." />
66 <param argument="--penalty-gap-extend" type="integer" value="0"
67 label="Penalty for extending a gap."
68 help="If zero, all gaps regardless of length incur the same penalty." />
69 <param argument="--penalty-gap-open" type="integer" value="6"
70 label="Penalty for opening of a gap."
71 help="A higher penalty results in fewer gaps and more mismatches. Should be less than the penalty value of opening a gap in frame to avoid gaps in genes." />
72 <param argument="--penalty-gap-open-in-frame" type="integer" value="7" min="1"
73 label="Penalty for opening gaps at the beginning of a codon."
74 help="Should be greater than the penalty of opening a and less than penalty of opening a gap out of frame, to avoid gaps in genes, but favor gaps that align with codons." />
75 <param argument="--penalty-gap-open-out-of-frame" type="integer" value="8" min="1"
76 label="Penalty for opening gaps in the body of a codon."
77 help="Should be greater than the penalty for opening gaps in-frame to favor gaps that align with codons." />
78 <param argument="--penalty-mismatch" type="integer" value="1" min="1"
79 label="Penalty for aligned nucleotides or aminoacids that differ in state during alignment"
80 help="Note that this is redundantly parameterized with score match." />
81 <param argument="--score-match" type="integer" value="3" min="1"
82 label="Score for encouraging aligned nucelotides or aminoacids with matching state."
83 help="Note that this is redundantly parameterized with mismatch penalty." />
84 <param argument="--max-indel" type="integer" value="400" min="0"
85 label="Maximum length of insertions or deletions allowed to proceed with alignment."
86 help="Alignments with long indels are slow to compute and require substantial memory in the current implementation. Alignment of sequences with indels that are longer than this value will not be attempted and a warning will be emitted." />
87 <param argument="--nuc-seed-length" type="integer" value="21" min="1"
88 label="Seed length for nucleotide alignment."
89 help="Seeds should be long enough to be unique, but short enough to match with high probability." />
90 <param argument="--nuc-min-seeds" type="integer" value="10" min="1"
91 label="Minimum number of seeds to search for during nucleotide alignment."
92 help="Relevant for short sequences. In long sequences, the number of seeds is determined by nucleotide seed spacing. This should be a positive integer." />
93 <param argument="--nuc-seed-spacing" type="integer" value="100" min="0"
94 label="Spacing between seeds during nucleotide alignment." />
95 <param argument="--nuc-mismatches-allowed" type="integer" value="3" min="0"
96 label="Maximum number of mismatching nucleotides."
97 help="Maximum number of mismatching nucleotides allowed for a seed to be considered a match." />
98 <param argument="--aa-seed-length" type="integer" value="12" min="1"
99 label="Seed length for aminoacid alignment" />
100 <param argument="--aa-min-seeds" type="integer" value="10" min="1"
101 label="Minimum number of seeds to search for during aminoacid alignment."
102 help="Relevant for short sequences. In long sequences, the number of seeds is determined by the aminoacid seed spacing." />
103 <param argument="--aa-seed-spacing" type="integer" value="100" min="0"
104 label="Spacing between seeds during aminoacid alignment." />
105 <param argument="--aa-mismatches-allowed" type="integer" value="2" min="0"
106 label="Maximum number of mismatching aminoacids."
107 help="Maximum number of mismatching aminoacids allowed for a seed to be considered a match." />
108 </inputs>
109 <outputs>
110 <data name="output_fasta" format="fasta" label="${tool.name} on ${on_string} (FASTA)"/>
111 <data name="output_csv" format="csv" label="${tool.name} on ${on_string} (CSV)">
112 <filter>output_insertions</filter>
113 </data>
114 <collection name="translations" type="list" label="${tool.name} on ${on_string} - Translations (FASTA)">
115 <discover_datasets pattern="sequences\.gene\.(?P&lt;designation&gt;[a-zA-Z0-9\-]+)\.fasta" format="fasta"/>
116 <filter>translation['translation_select'] == 'yes'</filter>
117 </collection>
118 </outputs>
119 <tests>
120 <!--
121 Defaults, all outputs, reference from history
122 -->
123 <test expect_num_outputs="3">
124 <conditional name="reference_source">
125 <param name="reference_source_selector" value="history"/>
126 <param name="ref_file" value="reference.fasta"/>
127 </conditional>
128 <param name="output_insertions" value="true"/>
129 <param name="sequences" value="subsampled.fasta"/>
130 <conditional name="translation">
131 <param name="translation_select" value="yes"/>
132 <param name="genes" value="E,M,N,ORF10,ORF14,ORF1a,ORF1b,ORF3a,ORF6,ORF7a,ORF7b,ORF8,ORF9b,S"/>
133 <param name="genemap" value="genemap.gtf" ftype="gtf"/>
134 </conditional>
135 <output name="output_fasta" file="output.fasta" sort="true"/>
136 <output name="output_csv" file="insertions.csv" sort="true"/>
137 <output_collection name="translations" type="list" count="14">
138 <element name="E" file="subsampled.gene.E.fasta" ftype="fasta" sort="true"/>
139 <element name="M" file="subsampled.gene.M.fasta" ftype="fasta" sort="true"/>
140 <element name="N" file="subsampled.gene.N.fasta" ftype="fasta" sort="true"/>
141 <element name="ORF10" file="subsampled.gene.ORF10.fasta" ftype="fasta" sort="true"/>
142 <element name="ORF14" file="subsampled.gene.ORF14.fasta" ftype="fasta" sort="true"/>
143 <element name="ORF1a" file="subsampled.gene.ORF1a.fasta" ftype="fasta" sort="true"/>
144 <element name="ORF1b" file="subsampled.gene.ORF1b.fasta" ftype="fasta" sort="true"/>
145 <element name="ORF3a" file="subsampled.gene.ORF3a.fasta" ftype="fasta" sort="true"/>
146 <element name="ORF6" file="subsampled.gene.ORF6.fasta" ftype="fasta" sort="true"/>
147 <element name="ORF7a" file="subsampled.gene.ORF7a.fasta" ftype="fasta" sort="true"/>
148 <element name="ORF7b" file="subsampled.gene.ORF7b.fasta" ftype="fasta" sort="true"/>
149 <element name="ORF8" file="subsampled.gene.ORF8.fasta" ftype="fasta" sort="true"/>
150 <element name="ORF9b" file="subsampled.gene.ORF9b.fasta" ftype="fasta" sort="true"/>
151 <element name="S" file="subsampled.gene.S.fasta" ftype="fasta" sort="true"/>
152 </output_collection>
153 </test>
154 <!--
155 Defaults, only fasta output, reference from history
156 -->
157 <test expect_num_outputs="1">
158 <conditional name="reference_source">
159 <param name="reference_source_selector" value="history"/>
160 <param name="ref_file" value="reference.fasta"/>
161 </conditional>
162 <param name="output_insertions" value="false"/>
163 <param name="sequences" value="subsampled.fasta"/>
164 <conditional name="translation">
165 <param name="translation_select" value="no"/>
166 </conditional>
167 <output name="output_fasta" file="output.fasta" sort="true"/>
168 </test>
169 <!--
170 Defaults, only fasta output, reference from cache
171 -->
172 <test expect_num_outputs="1">
173 <conditional name="reference_source">
174 <param name="reference_source_selector" value="cached"/>
175 <param name="ref_file" value="reference_fasta"/>
176 </conditional>
177 <param name="output_insertions" value="false"/>
178 <param name="sequences" value="subsampled.fasta"/>
179 <conditional name="translation">
180 <param name="translation_select" value="no"/>
181 </conditional>
182 <output name="output_fasta" file="output.fasta" sort="true"/>
183 </test>
184 </tests>
185 <help><![CDATA[
186
187 **What it does**
188
189 Nextalign is a viral genome sequence alignment algorithm used in Nextclade.
190
191 It will perform a pairwise alignment of provided sequences against a given reference sequence using banded local alignment algorithm with
192 affine gap-cost. Band width and rough relative positions are determined through seed matching.
193
194 Nextalign will strip insertions relative to the reference and output them in a separate CSV file.
195
196 Optionally, when provided with a gene map and a list of genes, Nextalign can perform translation of these genes.
197
198 Currently Nextalign primarily focuses on SARS-CoV-2 genome, but it can be used on any virus, given a sufficiently similar
199 reference sequence (less than a 5% divergence).
200
201 ]]></help>
202 <expand macro="citations" />
203 </tool>