prot_scriber: prot-scriber.xml comparison

comparison prot-scriber.xml @ 3:863ab6ebcafc draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prot-scriber commit bac22f562727babce8e0f456c82408c3063a683d

author	iuc
date	Sat, 18 May 2024 20:36:38 +0000
parents	4d4df9779b7b
children

comparison

equal deleted inserted replaced

-:4d4df9779b7b
+:863ab6ebcafc
 <tool id="prot_scriber" name="prot-scriber" version="@TOOL_VERSION@" profile="21.05">
 <description>Protein annotation of short human readable descriptions</description>
 <macros>
-<token name="@TOOL_VERSION@">0.1.4</token>
+<token name="@TOOL_VERSION@">0.1.5</token>
 </macros>
 <requirements>
 <requirement type="package" version="@TOOL_VERSION@">prot-scriber</requirement>
 </requirements>
 <stdio>
-<regex match="panicked" level="fatal" source="stderr" />
+<regex match="panicked" level="fatal" source="stderr"/>
 </stdio>
 <command>
 <![CDATA['prot-scriber'
 #if str($input_config.input_config_selector) == "basic"
 #for $sst in $input_config.seq_sim_table
 -s '$sst'
 #end for
 -x
 #end if
 -o '$output'
 ]]>
 </command>
 <inputs>
 <conditional name="input_config">
-<param type="select" name="input_config_selector" label="Choose input configuration options">
+<param name="input_config_selector" type="select" label="Choose input configuration options">
 <option value="basic" selected="true">Basic</option>
 <option value="advanced">Advanced</option>
 </param>
 <when value="basic">
-<param type="data" multiple="true" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.
+<param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search results in tabular format (-s)" help="Files in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.         Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." multiple="true"/>
-Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." />
+</when>
-</when>
+<when value="advanced">
-<when value="advanced">
+<repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1">
-<repeat name="advanced_input_repeat" title="Sequence similarity table" min="1" default="1">
+<param name="seq_sim_table" argument="-s" type="data" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.           Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)."/>
-<param type="data" name="seq_sim_table" argument="-s" format="tabular" label="Sequence similarity search result in tabular format (-s)" help="File in which to find sequence similarity search results in tabular format (SSST). Use e.g. Blast or Diamond to produce them.
+<param name="field_separator" argument="-p" type="text" optional="true" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default">
-Required columns are: 'qacc sacc stitle' (Blast) or 'qseqid sseqid stitle' (Diamond)." />
+<sanitizer>
-<param type="text" optional="true" name="field_separator" argument="-p" label="Field separator (-p)" help="Field-Separator of the (-s) sequence similarity table. The default value is the 'TAB' character. Set to 'default' to use the hard coded default">
+<valid initial="default">
-<sanitizer>
+<add preset="string.printable"/>
-<valid initial="default">
+</valid>
-<add preset="string.printable" />
+</sanitizer>
-</valid>
+</param>
-</sanitizer>
+<param name="header" argument="-e" type="text" optional="true" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the             in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default"/>
-</param>
+<param name="blacklist_regexs" argument="-b" type="data" format="tabular" optional="true" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these             regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default"/>
-<param type="text" optional="true" name="header" argument="-e" label="Header of the sequence similarity tables (-e)" help="Header of the (-s) sequence similarity table. Separated by space (' ') the names of the
+<param name="capture_replace_pairs" argument="-c" type="data" format="tabular" optional="true" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions             defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default"/>
-in order of appearance in the respective table. Required and default columns are 'qacc sacc stitle'. Set to 'default' to use the hard coded default" />
+<param name="filter_regexs" argument="-l" type="data" format="tabular" optional="true" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these             regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default"/>
-<param type="data" optional="true" name="blacklist_regexs" argument="-b" format="tabular" label="Blacklist Regexs (-b)" help="A file with regular expressions, one per line. Any match to any of these
+</repeat>
-regular expressions causes sequence similarity search result descriptions ('stitle' in Blast terminology) to be discarded from the prot-scriber annotation process. Set to 'default' to use the hard coded default" />
+<section title="Expert options" name="expert_options">
-<param type="data" optional="true" name="capture_replace_pairs" argument="-c" format="tabular" label="Capture replace pairs (-c)" help="A file with pairs of lines. Within each pair the first line is a regular expressions
+<param name="non_informative_words_regexs" argument="-w" type="data" format="tabular" optional="true" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These             regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description."/>
-defining one or more capture groups. The second line of a pair is the string used to replace the match in the regular expression with. Set to 'default' to use the hard coded default" />
+<param name="description_split_regex" argument="-r" type="text" optional="true" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast             terminology) into words. Default is '([~_\-/|\;,':.\s]+)'.">
-<param type="data" optional="true" name="filter_regexs" argument="-l" format="tabular" label="Filter regexs (-l)" help="A file with regular expressions, one per line. Any match to any of these
+<sanitizer>
-regular expressions causes the matched sub-string to be deleted, i.e. filtered out. Set to 'default' to use the hard coded default" />
+<valid initial="default">
-</repeat>
+<add preset="string.printable"/>
-<section title="Expert options" name="expert_options">
+</valid>
-<param type="data" optional="true" name="non_informative_words_regexs" argument="-w" format="tabular" label="Non informative words regexs (-w)" help="A file in which regular expressions (regexs) are stored, one per line. These
+</sanitizer>
-regexs are used to recognize non-informative words, which will only receive a minimum score in the prot-scriber process that generates human readable description." />
+</param>
-<param type="text" optional="true" name="description_split_regex" argument="-r" label="Description split regex (-r)" help="A regular expression to be used to split descriptions (`stitle` in Blast
+<param name="center_inverse_word_information_content_at_quantile" argument="-q" type="integer" optional="true" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information             content to center these values. Value between 0 and 1."/>
-terminology) into words. Default is '([~_\-/|\;,':.\s]+)'.">
+<param name="polish_capture_replace_pairs" argument="-d" type="data" format="txt" optional="true" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace             pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/>
-<sanitizer>
+</section>
-<valid initial="default">
+</when>
-<add preset="string.printable" />
+</conditional>
-</valid>
+<section title="Sequence family annotation" name="seq_family">
-</sanitizer>
+<param name="seq_families" argument="-f" type="data" format="tabular" optional="true" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each         line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in         more than one family."/>
-</param>
+<param name="annotate_non_family_queries" argument="-a" type="boolean" optional="true" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family."/>
-<param type="integer" optional="true" name="center_inverse_word_information_content_at_quantile" argument="-q" label="Center inverse word-information-content at quantile (-q)" help="The quantile (percentile) to be subtracted from calculated inverse word information
+<param name="seq_family_gene_ids_separator" argument="-g" type="text" optional="true" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the         argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'.">
-content to center these values. Value between 0 and 1." />
+<sanitizer>
-<param type="data" optional="true" name="polish_capture_replace_pairs" argument="-d" label="Polishing capture replace pairs (-d)" help="A file with pairs of lines. Defines pairs of regex / replace
+<valid initial="default">
-pairs for post polishing of annotation results. Set to 'none' or provide an empty file to supress polishing."/>
+<add preset="string.printable"/>
+</valid>
+</sanitizer>
+</param>
+<param name="seq_family_id_genes_separator" argument="-i" type="text" optional="true" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This         string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'.">
+<sanitizer>
+<valid initial="default">
+<add preset="string.printable"/>
+</valid>
+</sanitizer>
+</param>
 </section>
-</when>
+<param name="exclude_not_annotated_queries" argument="-x" type="boolean" optional="true" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/>
-</conditional>
+</inputs>
-<section title="Sequence family annotation" name="seq_family">
+<outputs>
-<param type="data" optional="true" name="seq_families" argument="-f" format="tabular" label="Families of biological sequences (-f)" help="A file in which families of biological sequences are stored, one family per line. Each
+<data format="tabular" name="output"/>
-line must have format 'fam_name TAB gene1,gene2,gene3'. Make sure no gene appears in
+</outputs>
-more than one family." />
+<tests>
-<param type="boolean" optional="true" name="annotate_non_family_queries" argument="-a" label="Annotate non family query sequences (-a)" help="Set this to true to also annotate sequences are not member of a sequence family." />
+<test>
-<param type="text" optional="true" name="seq_family_gene_ids_separator" argument="-g" label="Sequence family file gene-id separator (-g)" help=" A regular expression used to split the list of gene_identifiers in the
+<param name="input_config_selector" value="basic"/>
-argument --seq-families (-f) gene families file. Default is '(\s*,\s*|\s+)'.">
+<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt"/>
-<sanitizer>
+<output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
-<valid initial="default">
+</test>
-<add preset="string.printable" />
+<test>
-</valid>
+<param name="input_config_selector" value="advanced"/>
-</sanitizer>
+<repeat name="advanced_input_repeat">
-</param>
+<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
-<param type="text" optional="true" name="seq_family_id_genes_separator" argument="-i" label="Sequence family file family - gene-id separator (-i)" help="A string used as separator in the argument --seq-families (-f) gene families file. This
+<param name="field_separator" value="default"/>
-string separates the gene_family_identifier (name) from the gene_identifier list that family comprises. Default is 'TAB'.">
+<param name="header" value="qacc sacc stitle"/>
-<sanitizer>
+</repeat>
-<valid initial="default">
+<repeat name="advanced_input_repeat">
-<add preset="string.printable" />
+<param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
-</valid>
+<param name="field_separator" value="default"/>
-</sanitizer>
+<param name="header" value="qacc sacc stitle"/>
-</param>
+</repeat>
-</section>
+<output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
-<param type="boolean" optional="true" name="exclude_not_annotated_queries" argument="-x" label="Exclude not annotated query sequences (-x)" help="Use this option to exclude results from the output table that could not be annotated."/>
+</test>
-</inputs>
+<test>
-<outputs>
+<param name="input_config_selector" value="advanced"/>
-<data format="tabular" name="output" />
+<repeat name="advanced_input_repeat">
-</outputs>
+<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt"/>
-<tests>
+<param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
-<test>
+</repeat>
-<param name="input_config_selector" value="basic"/>
+<repeat name="advanced_input_repeat">
-<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt,8_Proteins_vs_Trembl_blastp.txt" />
+<param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt"/>
-<output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
+<param name="blacklist_regexs" value="blacklist_stitle_regexs.txt"/>
-</test>
+</repeat>
-<test>
+<param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)"/>
-<param name="input_config_selector" value="advanced" />
+<param name="center_inverse_word_information_content_at_quantile" value="50"/>
-<repeat name="advanced_input_repeat">
+<output name="output" file="8_Proteins_prot-scriber.out" sort="true"/>
-<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" />
+</test>
-<param name="field_separator" value="default" />
+</tests>
-<param name="header" value="qacc sacc stitle" />
+<help>
-</repeat>
-<repeat name="advanced_input_repeat">
-<param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
-<param name="field_separator" value="default" />
-<param name="header" value="qacc sacc stitle" />
-</repeat>
-<output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
-</test>
-<test>
-<param name="input_config_selector" value="advanced" />
-<repeat name="advanced_input_repeat">
-<param name="seq_sim_table" value="8_Proteins_vs_Swissprot_blastp.txt" />
-<param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
-</repeat>
-<repeat name="advanced_input_repeat">
-<param name="seq_sim_table" value="8_Proteins_vs_Trembl_blastp.txt" />
-<param name="blacklist_regexs" value="blacklist_stitle_regexs.txt" />
-</repeat>
-<param name="description_split_regex" value="([~_\-/|;,'\'':.\s]+)" />
-<param name="center_inverse_word_information_content_at_quantile" value="50" />
-<output name="output" file="8_Proteins_prot-scriber.out" sort="true" />
-</test>
-</tests>
-<help>
 <![CDATA[
 **What it does**
 prot-scriber_ assigns short human readable descriptions (HRD) to query biological sequences using reference candidate descriptions.
 Exclude results from the output table that could not be annotated, i.e. 'unknown
 protein' or 'unknown sequence family', respectively.
 ]]>
 </help>
+<citations>
+<citation type="bibtex">
+@misc{githubprot-scriber,
+author = {Asis Hallab},
+year = {2024},
+title = {prot-scriber},
+publisher = {Github},
+journal = {Github repository},
+url = {https://github.com/usadellab/prot-scriber},
+}</citation>
+</citations>
 </tool>

Mercurial > repos > iuc > prot_scriber

comparison prot-scriber.xml @ 3:863ab6ebcafc draft default tip