comparison prot-scriber.xml @ 3:863ab6ebcafc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prot-scriber commit bac22f562727babce8e0f456c82408c3063a683d
author iuc
date Sat, 18 May 2024 20:36:38 +0000
parents 4d4df9779b7b
children
comparison
equal deleted inserted replaced
2:4d4df9779b7b 3:863ab6ebcafc
1 <tool id="prot_scriber" name="prot-scriber" version="@TOOL_VERSION@" profile="21.05"> 1 <tool id="prot_scriber" name="prot-scriber" version="@TOOL_VERSION@" profile="21.05">
2 <description>Protein annotation of short human readable descriptions</description> 2 <description>Protein annotation of short human readable descriptions</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">0.1.4</token> 4 <token name="@TOOL_VERSION@">0.1.5</token>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement> 7 <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement>
8 </requirements> 8 </requirements>
9 <stdio> 9 <stdio>
10 <regex match="panicked" level="fatal" source="stderr" /> 10 <regex match="panicked" level="fatal" source="stderr"/>
11 </stdio> 11 </stdio>
12 <command> 12 <command>
13 <![CDATA['prot-scriber' 13 <![CDATA['prot-scriber'
14 #if str($input_config.input_config_selector) == "basic" 14 #if str($input_config.input_config_selector) == "basic"
15 #for $sst in $input_config.seq_sim_table 15 #for $sst in $input_config.seq_sim_table
16 -s '$sst' 16 -s '$sst'
17 #end for 17 #end for
73 -x 73 -x
74 #end if 74 #end if
75 -o '$output' 75 -o '$output'
76 ]]> 76 ]]>
77 </command> 77 </command>
78 <inputs> 78 <inputs>
79 <conditional name="input_config"> 79 <conditional name="input_config">
80 <param type="select" name="input_config_selector" label="Choose input configuration options"> 80 <param name="input_config_selector" type="select" label="Choose input configuration options">
81 <option value="basic" selected="true">Basic</option> 81 <option value="basic" selected="true">Basic</option>
82 <option value="advanced">Advanced</option> 82 <option value="advanced">Advanced</option>
83 </param> 83 </param>
84 <when value="basic"> 84 <when value="basic">
85 <param type="data" multiple="true" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them. 85 <param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them. Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." multiple="true"/>
86 Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." /> 86 </when>
87 </when> 87 <when value="advanced">
88 <when value="advanced"> 88 <repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1">
89 <repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1"> 89 <param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them. Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)."/>
90 <param type="data" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them. 90 <param name="field_separator" argument="-p" type="text" optional="true" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default">
91 Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." /> 91 <sanitizer>
92 <param type="text" optional="true" name="field_separator" argument="-p" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default"> 92 <valid initial="default">
93 <sanitizer> 93 <add preset="string.printable"/>
94 <valid initial="default"> 94 </valid>
95 <add preset="string.printable" /> 95 </sanitizer>
96 </valid> 96 </param>
97 </sanitizer> 97 <param name="header" argument="-e" type="text" optional="true" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default"/>
98 </param> 98 <param name="blacklist_regexs" argument="-b" type="data" format="tabular" optional="true" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default"/>
99 <param type="text" optional="true" name="header" argument="-e" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the 99 <param name="capture_replace_pairs" argument="-c" type="data" format="tabular" optional="true" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default"/>
100 in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default" /> 100 <param name="filter_regexs" argument="-l" type="data" format="tabular" optional="true" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default"/>
101 <param type="data" optional="true" name="blacklist_regexs" argument="-b" format="tabular" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these 101 </repeat>
102 regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default" /> 102 <section title="Expert options" name="expert_options">
103 <param type="data" optional="true" name="capture_replace_pairs" argument="-c" format="tabular" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions 103 <param name="non_informative_words_regexs" argument="-w" type="data" format="tabular" optional="true" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description."/>
104 defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default" /> 104 <param name="description_split_regex" argument="-r" type="text" optional="true" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast terminology) into words. Default is '([~_\-/|\;,':.\s]+)'.">
105 <param type="data" optional="true" name="filter_regexs" argument="-l" format="tabular" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these 105 <sanitizer>
106 regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default" /> 106 <valid initial="default">
107 </repeat> 107 <add preset="string.printable"/>
108 <section title="Expert options" name="expert_options"> 108 </valid>
109 <param type="data" optional="true" name="non_informative_words_regexs" argument="-w" format="tabular" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These 109 </sanitizer>
110 regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description." /> 110 </param>
111 <param type="text" optional="true" name="description_split_regex" argument="-r" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast 111 <param name="center_inverse_word_information_content_at_quantile" argument="-q" type="integer" optional="true" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information content to center these values. Value between 0 and 1."/>
112 terminology) into words. Default is '([~_\-/|\;,':.\s]+)'."> 112 <param name="polish_capture_replace_pairs" argument="-d" type="data" format="txt" optional="true" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/>
113 <sanitizer> 113 </section>
114 <valid initial="default"> 114 </when>
115 <add preset="string.printable" /> 115 </conditional>
116 </valid> 116 <section title="Sequence family annotation" name="seq_family">
117 </sanitizer> 117 <param name="seq_families" argument="-f" type="data" format="tabular" optional="true" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in more than one family."/>
118 </param> 118 <param name="annotate_non_family_queries" argument="-a" type="boolean" optional="true" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family."/>
119 <param type="integer" optional="true" name="center_inverse_word_information_content_at_quantile" argument="-q" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information 119 <param name="seq_family_gene_ids_separator" argument="-g" type="text" optional="true" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'.">
120 content to center these values. Value between 0 and 1." /> 120 <sanitizer>
121 <param type="data" optional="true" name="polish_capture_replace_pairs" argument="-d" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace 121 <valid initial="default">
122 pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/> 122 <add preset="string.printable"/>
123 </valid>
124 </sanitizer>
125 </param>
126 <param name="seq_family_id_genes_separator" argument="-i" type="text" optional="true" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'.">
127 <sanitizer>
128 <valid initial="default">
129 <add preset="string.printable"/>
130 </valid>
131 </sanitizer>
132 </param>
123 </section> 133 </section>
124 </when> 134 <param name="exclude_not_annotated_queries" argument="-x" type="boolean" optional="true" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/>
125 </conditional> 135 </inputs>
126 <section title="Sequence family annotation" name="seq_family"> 136 <outputs>
127 <param type="data" optional="true" name="seq_families" argument="-f" format="tabular" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each 137 <data format="tabular" name="output"/>
128 line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in 138 </outputs>
129 more than one family." /> 139 <tests>
130 <param type="boolean" optional="true" name="annotate_non_family_queries" argument="-a" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family." /> 140 <test>
131 <param type="text" optional="true" name="seq_family_gene_ids_separator" argument="-g" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the 141 <param name="input_config_selector" value="basic"/>
132 argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'."> 142 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt"/>
133 <sanitizer> 143 <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
134 <valid initial="default"> 144 </test>
135 <add preset="string.printable" /> 145 <test>
136 </valid> 146 <param name="input_config_selector" value="advanced"/>
137 </sanitizer> 147 <repeat name="advanced_input_repeat">
138 </param> 148 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
139 <param type="text" optional="true" name="seq_family_id_genes_separator" argument="-i" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This 149 <param name="field_separator" value="default"/>
140 string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'."> 150 <param name="header" value="qacc sacc stitle"/>
141 <sanitizer> 151 </repeat>
142 <valid initial="default"> 152 <repeat name="advanced_input_repeat">
143 <add preset="string.printable" /> 153 <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
144 </valid> 154 <param name="field_separator" value="default"/>
145 </sanitizer> 155 <param name="header" value="qacc sacc stitle"/>
146 </param> 156 </repeat>
147 </section> 157 <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
148 <param type="boolean" optional="true" name="exclude_not_annotated_queries" argument="-x" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/> 158 </test>
149 </inputs> 159 <test>
150 <outputs> 160 <param name="input_config_selector" value="advanced"/>
151 <data format="tabular" name="output" /> 161 <repeat name="advanced_input_repeat">
152 </outputs> 162 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
153 <tests> 163 <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
154 <test> 164 </repeat>
155 <param name="input_config_selector" value="basic"/> 165 <repeat name="advanced_input_repeat">
156 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt" /> 166 <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
157 <output name="output" file="8_Proteins_prot-scriber.out" sort="true" /> 167 <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
158 </test> 168 </repeat>
159 <test> 169 <param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)"/>
160 <param name="input_config_selector" value="advanced" /> 170 <param name="center_inverse_word_information_content_at_quantile" value="50"/>
161 <repeat name="advanced_input_repeat"> 171 <output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
162 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" /> 172 </test>
163 <param name="field_separator" value="default" /> 173 </tests>
164 <param name="header" value="qacc sacc stitle" /> 174 <help>
165 </repeat>
166 <repeat name="advanced_input_repeat">
167 <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
168 <param name="field_separator" value="default" />
169 <param name="header" value="qacc sacc stitle" />
170 </repeat>
171 <output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
172 </test>
173 <test>
174 <param name="input_config_selector" value="advanced" />
175 <repeat name="advanced_input_repeat">
176 <param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" />
177 <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
178 </repeat>
179 <repeat name="advanced_input_repeat">
180 <param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
181 <param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
182 </repeat>
183 <param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)" />
184 <param name="center_inverse_word_information_content_at_quantile" value="50" />
185 <output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
186 </test>
187 </tests>
188 <help>
189 <![CDATA[ 175 <![CDATA[
190 176
191 **What it does** 177 **What it does**
192 178
193 prot-scriber_ assigns short human readable descriptions (HRD) to query biological sequences using reference candidate descriptions. 179 prot-scriber_ assigns short human readable descriptions (HRD) to query biological sequences using reference candidate descriptions.
341 Exclude results from the output table that could not be annotated, i.e. 'unknown 327 Exclude results from the output table that could not be annotated, i.e. 'unknown
342 protein' or 'unknown sequence family', respectively. 328 protein' or 'unknown sequence family', respectively.
343 329
344 ]]> 330 ]]>
345 </help> 331 </help>
332 <citations>
333 <citation type="bibtex">
334 @misc{githubprot-scriber,
335 author = {Asis Hallab},
336 year = {2024},
337 title = {prot-scriber},
338 publisher = {Github},
339 journal = {Github repository},
340 url = {https://github.com/usadellab/prot-scriber},
341 }</citation>
342 </citations>
346 </tool> 343 </tool>