comparison macros.xml @ 0:14307de7bbab draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/phabox commit 3c8e23e1dea22f625fb6f77ae61e1f4b605aaae2
author ufz
date Tue, 12 Nov 2024 09:31:26 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:14307de7bbab
1 <macros>
2 <token name="@TOOL_VERSION@">2.1.5</token>
3 <token name="@VERSION_SUFFIX@">0</token>
4 <xml name="citations">
5 <citations>
6 <citation type="doi">10.1093/bioadv/vbad101</citation>
7 <yield/>
8 </citations>
9 </xml>
10
11 <xml name="general">
12 <param argument="--dbdir" type="select" label="Phabox2 database">
13 <options from_data_table="phabox"/>
14 </param>
15 <param argument="--contigs" type="data" format="fasta" optional="false" label="Contig sequences"/>
16 <param argument="--proteins" type="data" format="fasta" optional="true" label="Predicted proteins"/>
17 <param argument="--len" type="integer" value="3000" min="0" label="Minimum contig length" help="Contigs with length smaller than this value will not proceed"/>
18 </xml>
19 <token name="@GENERAL@"><![CDATA[
20 --dbdir '$dbdir.fields.path'
21 --outpth output/
22 --contigs '$contigs'
23 #if $proteins
24 --proteins '$proteins'
25 #end if
26 --midfolder intermediate/
27 --len $len
28 --threads "\${GALAXY_SLOTS:-1}"
29 ]]></token>
30
31
32 <xml name="phamer">
33 <section name="phamer" title="Options for virus identification" help="">
34 <param argument="--reject" type="float" value="10" min="0" max="20" label="Minimum known proteins percentage" help="Reject sequences in which the percent proteins aligned to known phages is smaller than the value"/>
35 </section>
36 </xml>
37 <token name="@PHAMER@"><![CDATA[
38 --reject $phamer.reject
39 ]]></token>
40
41
42 <xml name="network">
43 <section name="network" title="Options for virus-virus connections" help="The options below are used to generate a network for virus-virus connections. The current parameters are optimized for the ICTV 2024 and are highly accurate for grouping genus-level vOTUs. When making changes, make sure you understand what they are.">
44 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity"/>
45 <param argument="--share" type="float" value="15" min="0" max="100" label="Minimum shared number of proteins"/>
46 <param argument="--pcov" type="float" value="80" min="0" max="100" label="Protein-based coverage"/>
47 <!-- \-\-draw not recommended top be used according to CLI help -->
48 </section>
49 </xml>
50 <token name="@NETWORK@"><![CDATA[
51 --aai $network.aai
52 --share $network.share
53 --pcov $network.pcov
54 ]]></token>
55
56 <xml name="crispr">
57 <section name="crispr" title="Options used to predict CRISPRs based on MAGs" help="">
58 <param argument="--bfolder" type="data" format="true" optional="true" label="MAGS"/>
59 <param argument="--cpident" type="float" value="90" min="90" max="100" label="Alignment identity for CRISPRs"/>
60 <param argument="--ccov" type="float" value="90" min="0" max="100" label="Alignment coverage for CRISPRs"/>
61 <param argument="--blast" type="select" label="BLAST program for CRISPRs" help="blastn-short will lead to more sensitive results but require more time to execute the program">
62 <option value="blastn">blastn</option>
63 <option value="blastn-short">blastn-short</option>
64 </param>
65 </section>
66 </xml>
67 <token name="@CRISPR_PRE@"><![CDATA[
68 #if $crispr.bfolder
69 mkdir bfolder &&
70 #for b in $crispr.bfolder
71 #set bname = re.sub('[^\w\-_\.]', '_', $b.element_identifier)
72 ln -s '$b' '$bname' &&
73 #end for
74 #end if
75 ]]></token>
76 <token name="@CRISPR@"><![CDATA[
77 #if $crispr.bfolder
78 --bfolder bfolder
79 #end if
80 --cpident $crispr.cpident
81 --ccov $crispr.cpident
82 --blast $crispr.blast
83 ]]></token>
84
85 <xml name="contamination">
86 <section name="contamination" title="Options for contamination detection" help="">
87 <param argument="--sensitive" type="boolean" truevalue="--sensitive Y" falsevalue="--sensitive N" label="Sensitive search for prokaryotic genes" help="Enabling this will lead to more sensitive results but require more time to execute the program"/>
88 </section>
89 </xml>
90 <token name="@CONTAMINATION@"><![CDATA[
91 $contamination.sensitive
92 ]]></token>
93
94 <xml name="aai">
95 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity for AAI based genus grouping"/>
96 </xml>
97
98 <xml name="votu">
99 <section name="votu" title="Options vOTU grouping" help="">
100 <conditional name="mode_cond">
101 <param argument="--mode" type="select" label="Clustering mode" >
102 <option value="ANI">ANI</option>
103 <option value="AAI">AAI</option>
104 </param>
105 <when value="ANI">
106 <param argument="--ani" type="float" value="95" min="0" max="100" label="Alignment identity for ANI-based clustering"/>
107 <param argument="--tcov" type="float" value="85" min="0" max="100" label="Alignment coverage for ANI-based clustering"/>
108 </when>
109 <when value="AAI">
110 <param argument="--aai" type="float" value="75" min="0" max="100" label="Average amino acids identity for AAI based genus grouping"/>
111 <param argument="--pcov" type="float" value="80" min="0" max="100" label="Protein-level coverage for AAI based genus grouping"/>
112 <param argument="--share" type="float" value="15" min="0" max="100" label="Minimum shared number of proteins for AAI based genus grouping"/>
113 </when>
114 </conditional>
115 </section>
116 </xml>
117 <token name="@VOTU@"><![CDATA[
118 --mode $votu.mode_cond.mode
119 #if $votu.mode_cond.mode == "AAI"
120 --aai $votu.mode_cond.aai
121 --pcov $votu.mode_cond.pcov
122 --share $votu.mode_cond.share
123 #else if $votu.mode_cond.mode == "ANI"
124 --ani $votu.mode_cond.ani
125 --tcov $votu.mode_cond.tcov
126 #end if
127 ]]></token>
128
129 <xml name="tree">
130 <section name="tree" title="Options for tree building" help="">
131 <param argument="--marker" type="select" multiple="true" label="Markers used to generate tree" help="Using combinations of these markers can improve the accuracy of the tree. But will decrease the number of sequences in the tree. Numbers in parentheses give the percentage of prokaryotic viruses that have the corresponding protein.">
132 <option value="endolysin">endolysin (91)</option>
133 <option value="holin">holin (75)</option>
134 <option value="head">marjor head (77)</option>
135 <option value="portal" selected="true">portal (84) </option>
136 <option value="terl" selected="true">terminase large subunit (92)</option>
137 </param>
138 <param argument="--mcov" type="float" value="50" min="0" max="100" label="Alignment coverage for matching marker genes"/>
139 <param argument="--mpident" type="float" value="25" min="0" max="100" label="Alignment identitiy for matching marker genes"/>
140 </section>
141 </xml>
142 <token name="@TREE@"><![CDATA[
143 --marker
144 #for m in $tree.marker
145 $m
146 #end for
147 --mcov $tree.mcov
148 --mpident $tree.mpident
149 ## constructing the MSA and building the tree
150 ## (the program would use mafft and fasttree)
151 ## can be done more flexibly in Galaxy
152 ## (leavinh this here to ensure it won't be implemented)
153 ## --msa Y
154 ## --tree Y
155 ]]></token>
156
157 <xml name="supp_out" tokens="task">
158 <collection name="@TASK@_supp_out" type="list" label="${tool.name} on ${on_string}: @TASK@ supplement">
159 <discover_datasets pattern="(?P&lt;designation&gt;.+).fa" format="fasta" directory="output/final_prediction/@TASK@_supplementary"/>
160 <discover_datasets pattern="(?P&lt;designation&gt;.+).tsv" format="tabular" directory="output/final_prediction/@TASK@_supplementary"/>
161 <discover_datasets pattern="(?P&lt;designation&gt;.+).tab" format="tabular" directory="output/final_prediction/@TASK@_supplementary"/>
162 <filter>supplements and "@TASK@" in supplements</filter>
163 </collection>
164 </xml>
165
166 <token name="@COMMON_OUTPUT_DOC@"><![CDATA[
167 A tabular dataset with the following columns:
168
169 - Accession: the accession or the name of the input contigs.
170 - Length: the length of input contigs.
171 ]]></token>
172 <token name="@PHAMER_OUTPUT_DOC@"><![CDATA[
173 - Pred: virus or non-virus.
174 - Proportion: the proportion of the proteins that can be aligned to the virus database (from 0 to 1).
175 - PhaMerScore: the prediction score given by the deep learning model.
176 - PhaMerConfidence: the confidence of prediction, determined by both Proportion and PhaMerScore (high-confidence, medium-confidence, low-confidence, lower than reject threshold (according to the --reject parameter, default: 0.1)).
177 For the virus with low-confidence or lower than reject threshold, we recommend you to run the contamination task to check their sequence quality.
178 ]]></token>
179 <token name="@PHAGCN_OUTPUT_DOC@"><![CDATA[
180 - Lineage: the predicted taxonomy lineage (NCBI version) of the contigs. Each rank is separated by the ';'.
181 - PhaGCNScore: the predicted score for each rank in the lineage. Each rank is separated by the ';'.
182 - Genus: whether the contig has a genus level name ('-' means unknown).
183 - GenusCluster: if the Genus is '-', the program will assign a genus-level grouping result: group_idx (idx = 1, 2, 3, ...) or singleton. This can be viewed as genus-level OTUs based on the average shared protein identities between sequences.
184 ]]></token>
185 <token name="@PHATYP_OUTPUT_DOC@"><![CDATA[
186 - TYPE: virulent or temperate (virus).
187 - PhaTYPScore: the prediction score given by the deep learning model.
188 ]]></token>
189 <token name="@CHERRY_OUTPUT_DOC@"><![CDATA[
190 - Host: the predicted host (NCBI taxonomy) of the contigs. '-' means unknown host.
191 - CHERRYScore: the predicted score from the model.
192 - Method:
193 - CRISPR-based(MAG): CRISPRs alignment results from provided MAG (if any)
194 - CRISPR-based(DB): CRISPRs alignment results from database.
195 - AAI-based: predicting host based on virus-simil
196 - Host_NCBI_lineage
197 - Host_GTDB_lineage
198 ]]></token>
199
200 <token name="@COMMON_INPUT_DOC@"><![CDATA[
201 **Input**
202
203 - Contig sequences in FASTA format
204 - Optionally own predicted protein sequences can be given (by default the tool will use prodigal and diamond blastp for the prediction)
205 ]]></token>
206
207 </macros>