Mercurial > repos > galaxyp > unipept
view unipept.xml @ 9:21a560af5913 draft default tip
planemo upload for repository https://unipept.ugent.be/apidocs commit 19735e85caae264d98562f6fdb3b213841087fc7
author | galaxyp |
---|---|
date | Tue, 12 Mar 2024 11:44:08 +0000 |
parents | 7863f1abcdda |
children |
line wrap: on
line source
<tool id="unipept" name="Unipept" version="4.5.1" profile="22.05"> <description>retrieve taxonomy for peptides</description> <macros> <xml name="equate_il"> <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="true" label="Equate isoleucine and leucine"> <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help> </param> </xml> <xml name="extra"> <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="false" label="retrieve extra information"> <yield/> </param> </xml> <xml name="extra_true"> <param name="extra" type="boolean" truevalue="-x" falsevalue="" checked="true" label="retrieve extra information"> <yield/> </param> </xml> <xml name="names"> <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names"> <help>return the names in complete taxonomic lineage</help> </param> <param name="allfields" type="boolean" truevalue="-A" falsevalue="" checked="false" label="allfields"> <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help> </param> </xml> <xml name="domains"> <param name="domains" type="boolean" truevalue="-D" falsevalue="" checked="false" label="group responses by GO namespace (biological process, molecular function, cellular component)"> <yield/> </param> </xml> <xml name="selected_outputs"> <param name="selected_outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs"> <option value="tsv" selected="true">Tabular with one line per peptide</option> <option value="csv">Comma Separated Values (.csv) with one line per peptide</option> <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option> <yield/> <option value="unmatched">Unmatched peptides</option> </param> </xml> </macros> <xrefs> <xref type="bio.tools">unipept</xref> </xrefs> <requirements> <requirement type="package" version="3">python</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/unipept.py' ## --url 'http://morty.ugent.be/api/v1' -g -M 1 --api=$unipept.api $unipept.equate_il $unipept.extra #if $unipept.api in ['pept2lca', 'pept2taxa', 'peptinfo']: $unipept.names $unipept.allfields #end if #if $unipept.api in ['pept2go', 'pept2funct', 'peptinfo']: $unipept.domains #end if $strict #if str($peptide_src.fmt) == 'proteomic': #if $peptide_src.input.datatype.file_ext == 'fasta': --fasta="$peptide_src.input" #elif $peptide_src.input.datatype.file_ext == 'mzid': --mzid="$peptide_src.input" #elif $peptide_src.input.datatype.file_ext == 'pepxml': --pepxml="$peptide_src.input" #end if #elif str($peptide_src.fmt) == 'tabular': --tabular="$peptide_src.input_tsv" #set $col = int(str($peptide_src.column)) - 1 --column=$col #elif str($peptide_src.fmt) == 'fasta': --fasta="$peptide_src.input_fasta" #elif str($peptide_src.fmt) == 'mzid': --mzid="$peptide_src.input_mzid" #elif str($peptide_src.fmt) == 'pepxml': --pepxml="$peptide_src.input_pepxml" #end if --peptide_match $peptide_match #if $peptide_match == 'report' and $unmatched_aa != 'default': --unmatched_aa $unmatched_aa #end if #if 'json' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2lca', 'pept2taxa', 'peptinfo']: --json $output_json #end if #if 'ec_json' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2ec', 'pept2funct', 'peptinfo']: --ec_json $output_ec_json #end if #if 'tsv' in str($selected_outputs).split(','): --tsv $output_tsv #end if #if 'csv' in str($selected_outputs).split(','): --csv $output_csv #end if #if 'ec_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2ec', 'pept2funct', 'peptinfo']: --ec_tsv $output_ec_tsv #end if #if 'go_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2go', 'pept2funct', 'peptinfo']: --go_tsv $output_go_tsv #end if #if 'ipr_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2interpro', 'pept2funct', 'peptinfo']: --ipr_tsv $output_ipr_tsv #end if #if 'unmatched' in str($selected_outputs).split(','): --unmatched $output_unmatched #end if ]]></command> <inputs> <conditional name="unipept"> <param name="api" type="select" label="Unipept application"> <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option> <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option> <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option> <option value="pept2ec">pept2ec: Tryptic peptides and associated EC terms</option> <option value="pept2go">pept2go: Tryptic peptides and associated GO terms</option> <option value="pept2interpro">pept2interpro: Tryptic peptides and associated InterPro entries</option> <option value="pept2funct">pept2funct: Tryptic peptides and associated EC and GO terms</option> <option value="peptinfo">peptinfo: Tryptic peptides and associated EC and GO terms and lowest common ancestor taxonomy</option> </param> <when value="pept2lca"> <expand macro="equate_il"/> <expand macro="extra"> <help>Return the complete lineage of the taxonomic lowest common ancestor, and include ID fields.</help> </expand> <expand macro="names"/> </when> <when value="pept2taxa"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the complete lineage of each organism, and include ID fields.</help> </expand> <expand macro="names"/> </when> <when value="pept2prot"> <expand macro="equate_il"/> <expand macro="extra"> <help>Return additional information fields: taxon_name, ec_references, go_references, refseq_ids, refseq_protein_ids, insdc_ids, insdc_protein_ids WARNING: Huge perfomance penalty! Only use for small number of peptides when the extra infomation is required. </help> </expand> </when> <when value="pept2ec"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the name of the EC-number. </help> </expand> </when> <when value="pept2go"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the name of the GO-term. </help> </expand> <expand macro="domains"/> </when> <when value="pept2interpro"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the name of the GO-term. </help> </expand> <expand macro="domains"/> </when> <when value="pept2funct"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the name of the EC-number and GO-term. </help> </expand> <expand macro="domains"/> </when> <when value="peptinfo"> <expand macro="equate_il"/> <expand macro="extra_true"> <help>Return the name of the EC-number and GO-term. </help> </expand> <expand macro="domains"/> <expand macro="names"/> </when> </conditional> <conditional name="peptide_src"> <param name="fmt" type="select" label="Peptides input format"> <option value="proteomic">proteomics formats: mzid, pepxml, fasta</option> <option value="tabular">tabular</option> <option value="fasta">fasta</option> <option value="mzid">mzid</option> <option value="pepxml">pepxml</option> </param> <when value="proteomic"> <param name="input" type="data" format="mzid,pepxml,fasta" label="Peptide Input"/> </when> <when value="tabular"> <param name="input_tsv" type="data" format="tabular" label="Tabular Input Containing Peptide column"/> <param name="column" label="Select column with peptides" type="data_column" numerical="false" data_ref="input_tsv"/> </when> <when value="fasta"> <param name="input_fasta" type="data" format="fasta" label="Peptide Fasta Input"/> </when> <when value="mzid"> <param name="input_mzid" type="data" format="mzid" label="mzIndetML Input"/> </when> <when value="pepxml"> <param name="input_pepxml" type="data" format="pepxml" label="mzIndetML Input"/> </when> </conditional> <param name="peptide_match" type="select" label="Match input peptides by"> <option value="full">Match to the full input peptide</option> <option value="best" selected="true">Best match to tryptic parts of the input peptide</option> <option value="report">Best match to tryptic parts, add tryptic_match column</option> </param> <param name="unmatched_aa" type="select" label="Show tryptic_match as:"> <option value="default">List of matched tryptic parts</option> <option value=".">peptide with unmatched AAs as .</option> <option value="x">peptide with unmatched AAs as x</option> </param> <param name="selected_outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs"> <option value="tsv" selected="true">Tabular with one line per peptide</option> <option value="csv">Comma Separated Values (.csv) with one line per peptide</option> <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option> <option value="go_tsv">Peptide GO terms in normalized tabular (for pept2go, pept2funct, and peptinfo)</option> <option value="ipr_tsv">Peptide InterPro entries in normalized tabular (for pept2interpro, pept2funct, and peptinfo)</option> <option value="ec_tsv">Peptide EC terms in normalized tabular (for pept2ec, pept2funct, and peptinfo)</option> <option value="ec_json">JSON EC Coverage Tree (for pept2ec, pep2funct, and peptinfo)</option> <option value="unmatched">Unmatched peptides</option> </param> <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/> </inputs> <outputs> <data name="output_json" format="d3_hierarchy" label="${tool.name} ${unipept.api} on ${on_string} Taxonomy json"> <filter>'json' in selected_outputs and unipept['api'] in ('pept2lca', 'pept2taxa', 'peptinfo')</filter> <change_format> <when input="api" value="pept2prot" format="json"/> </change_format> </data> <data name="output_ec_json" format="d3_hierarchy" label="${tool.name} ${unipept.api} on ${on_string} EC json"> <filter>'ec_json' in selected_outputs and unipept['api'] in ('pept2ec', 'pept2funct', 'peptinfo')</filter> </data> <data name="output_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} tsv"> <filter>'tsv' in selected_outputs</filter> <actions> <action name="comment_lines" type="metadata" default="1"/> <!-- <conditional name="unipept.api"> <when value="pept2funct"> <action name="column_names" type="metadata" default="peptide,total_protein_count,ec_numbers,ec_protein_counts,ec_names,go_terms,go_protein_counts,go_names" /> </when> <when value="pept2go"> <action name="column_names" type="metadata" default="peptide,total_protein_count,go_terms,go_protein_counts,go_names" /> </when> <when value="pept2ec"> <action name="column_names" type="metadata" default="peptide,total_protein_count,ec_numbers,ec_protein_counts,ec_names" /> </when> </conditional> --> </actions> </data> <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv"> <filter>'csv' in selected_outputs</filter> </data> <data name="output_ec_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} EC tsv"> <filter>'ec_tsv' in selected_outputs and unipept['api'] in ('pept2ec', 'pept2funct', 'peptinfo')</filter> <actions> <action name="column_names" type="metadata" default="Peptide,Total Protein Count,EC Number,Protein Count,EC Name"/> </actions> </data> <data name="output_go_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} GO tsv"> <filter>'go_tsv' in selected_outputs and unipept['api'] in ('pept2go', 'pept2funct', 'peptinfo')</filter> <actions> <action name="column_names" type="metadata" default="Peptide,Total Protein Count,GO Term,Protein Count,GO Name"/> </actions> </data> <data name="output_ipr_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} InterPro tsv"> <filter>'ipr_tsv' in selected_outputs and unipept['api'] in ('pept2interpro', 'pept2funct', 'peptinfo')</filter> <actions> <action name="column_names" type="metadata" default="Peptide,Total Protein Count,InterPro Code,Protein Count,InterPro Type,InterPro Name"/> </actions> </data> <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched"> <filter>'unmatched' in selected_outputs</filter> <actions> <action name="column_names" type="metadata" default="Unmatched Peptide"/> </actions> </data> </outputs> <tests> <!-- Test-1 --> <test expect_num_outputs="2"> <param name="api" value="pept2lca"/> <param name="fmt" value="tabular"/> <param name="input_tsv" value="tryptic.tsv"/> <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> <param name="selected_outputs" value="tsv,unmatched"/> <output name="output_tsv"> <assert_contents> <has_text text="Homininae"/> </assert_contents> </output> <output name="output_unmatched"> <assert_contents> <has_text text="QTAMAV"/> </assert_contents> </output> </test> <!-- Test-2 --> <test expect_num_outputs="2"> <param name="api" value="pept2lca"/> <param name="fmt" value="fasta"/> <param name="input_fasta" value="peptide.fa"/> <param name="equate_il" value="True"/> <param name="extra" value="True"/> <param name="names" value="True"/> <param name="selected_outputs" value="json,tsv"/> <output name="output_json"> <assert_contents> <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV"/> </assert_contents> </output> <output name="output_tsv"> <assert_contents> <has_text text="9606"/> <has_text text="9596"/> </assert_contents> </output> </test> <!-- Test-3 --> <test expect_num_outputs="1"> <param name="api" value="pept2taxa"/> <param name="fmt" value="fasta"/> <param name="input_fasta" value="peptide.fa"/> <param name="equate_il" value="True"/> <param name="extra" value="False"/> <param name="names" value="False"/> <param name="selected_outputs" value="tsv"/> <output name="output_tsv"> <assert_contents> <has_text text="sapiens"/> <has_text text="Cercocebus"/> </assert_contents> </output> </test> <!-- Test-4 --> <test expect_num_outputs="5"> <param name="api" value="pept2funct"/> <param name="fmt" value="tabular"/> <param name="input_tsv" value="input.tsv"/> <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,ipr_tsv,unmatched"/> <output name="output_tsv"> <assert_contents> <has_text text="GO:0004802"/> <has_text text="2.2.1.1"/> <has_text text="IPR005475"/> </assert_contents> </output> <output name="output_ec_tsv"> <assert_contents> <has_text text="2.2.1.1"/> </assert_contents> </output> <output name="output_go_tsv"> <assert_contents> <has_text text="GO:0004802"/> </assert_contents> </output> <output name="output_ipr_tsv"> <assert_contents> <has_text text="IPR005475"/> </assert_contents> </output> </test> <!-- Test-5 --> <test expect_num_outputs="5"> <param name="api" value="pept2funct"/> <param name="fmt" value="tabular"/> <param name="input_tsv" value="input.tsv"/> <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> <param name="peptide_match" value="full"/> <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,ipr_tsv,unmatched"/> <output name="output_tsv"> <assert_contents> <has_text text="GO:0004802"/> <has_text text="2.2.1.1"/> <has_text text="IPR005475"/> <has_text_matching expression="FAPLLEEYKAEDWVQK\t0"/> </assert_contents> </output> <output name="output_ec_tsv"> <assert_contents> <has_text text="2.2.1.1"/> </assert_contents> </output> <output name="output_go_tsv"> <assert_contents> <has_text text="GO:0004802"/> </assert_contents> </output> <output name="output_ipr_tsv"> <assert_contents> <has_text text="IPR005475"/> </assert_contents> </output> </test> <!-- Test-6 --> <test expect_num_outputs="5"> <param name="api" value="pept2funct"/> <param name="fmt" value="tabular"/> <param name="input_tsv" value="input.tsv"/> <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> <param name="peptide_match" value="report"/> <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,ipr_tsv,unmatched"/> <output name="output_tsv"> <assert_contents> <has_text text="GO:0004802"/> <has_text text="2.2.1.1"/> <has_text text="IPR005475"/> <has_text_matching expression="AAEGGLSR\tAAEGGLSR\t6"/> </assert_contents> </output> <output name="output_ec_tsv"> <assert_contents> <has_text text="2.2.1.1"/> </assert_contents> </output> <output name="output_go_tsv"> <assert_contents> <has_text text="GO:0004802"/> </assert_contents> </output> <output name="output_ipr_tsv"> <assert_contents> <has_text text="IPR005475"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ **Unipept** Retrieve Uniprot and taxanomic information for trypic peptides. Unipept API documentation - https://unipept.ugent.be/apidocs **Input** Input peptides can be retrieved from tabular, fasta, mzid, or pepxml datasets. Processing deatils:: The input peptides are split into typtic peptide fragments in order to match the Unipept records. Only fragments that are complete tryptic peptides between 5 and 50 animo acid in length will be matched by Unipept. The match to the most specific tryptic fragment is reported. **Unipept APIs** **pept2prot** - https://unipept.ugent.be/apidocs/pept2prot Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record:: peptide: the peptide that matched this record uniprot_id: the UniProt accession number of the matching record taxon_id: the NCBI taxon id of the organism associated with the matching record When the extra parameter is set to true, objects contain the following additional fields extracted from the UniProt record:: taxon_name: the name of the organism associated with the matching UniProt record ec_references: a space separated list of associated EC numbers go_references: a space separated list of associated GO terms refseq_ids: a space separated list of associated RefSeq accession numbers refseq_protein_ids: a space separated list of associated RefSeq protein accession numbers insdc_ids: a space separated list of associated insdc accession numbers insdc_protein_ids: a space separated list of associated insdc protein accession numbers **pept2taxa** - http://unipept.ugent.be/apidocs/pept2taxa Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record taxon_id: the NCBI taxon id of the organism associated with the matching record taxon_name: the name of the organism associated with the matching record taxon_rank: the taxonomic rank of the organism associated with the matching record When the extra parameter is set to true, objects contain additional information about the lineages of the organism extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields:: superkingdom_id kingdom_id subkingdom_id superphylum_id phylum_id subphylum_id superclass_id class_id subclass_id infraclass_id superorder_id order_id suborder_id infraorder_id parvorder_id superfamily_id family_id subfamily_id tribe_id subtribe_id genus_id subgenus_id species_group_id species_subgroup_id species_id subspecies_id varietas_id forma_id **pept2lca** - https://unipept.ugent.be/apidocs/pept2lca Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record taxon_id: the NCBI taxon id of the organism associated with the matching record taxon_name: the name of the organism associated with the matching record taxon_rank: the taxonomic rank of the organism associated with the matching record When the extra parameter is set to true, objects contain additional information about the lineage of the taxonomic lowest common ancestor extracted from the NCBI taxonomy. The taxon id of each rank in the lineage is specified using the following information fields:: superkingdom_id kingdom_id subkingdom_id superphylum_id phylum_id subphylum_id superclass_id class_id subclass_id infraclass_id superorder_id order_id suborder_id infraorder_id parvorder_id superfamily_id family_id subfamily_id tribe_id subtribe_id genus_id subgenus_id species_group_id species_subgroup_id species_id subspecies_id varietas_id forma_id **pept2ec** - https://unipept.ugent.be/apidocs/pept2ec Returns the functional EC-numbers associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record total_protein_count: Total amount of proteins matched with the given peptide ec_number: EC-number associated with the current tryptic peptide. protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number. name: Optional, name of the EC-number. Included when the extra parameter is set to true. **pept2go** - https://unipept.ugent.be/apidocs/pept2go Returns the functional GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record total_protein_count: Total amount of proteins matched with the given peptide go_term: The GO-term associated with the current tryptic peptide. protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term. name: Optional, name of the GO-term. Included when the extra parameter is set to true. **pept2interpro** - https://unipept.ugent.be/apidocs/pept2interpro Returns the functional InterPro entries associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record total_protein_count: Total amount of proteins matched with the given peptide code: InterPro entry code associated with the current tryptic peptide protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current InterPro code. type: Optional, type of the InterPro entry. Included when the extra parameter is set to true. name: Optional, name of the InterPro entry. Included when the extra parameter is set to true. **pept2funct** - https://unipept.ugent.be/apidocs/pept2funct Returns the functional EC-numbers and GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: peptide: the peptide that matched this record total_protein_count: Total amount of proteins matched with the given peptide ec_number: EC-number associated with the current tryptic peptide. protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number. name: Optional, name of the EC-number. Included when the extra parameter is set to true. go_term: The GO-term associated with the current tryptic peptide. protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term. name: Optional, name of the GO-term. Included when the extra parameter is set to true. code: InterPro entry code associated with the current tryptic peptide protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current InterPro code. type: Optional, type of the InterPro entry. Included when the extra parameter is set to true. name: Optional, name of the InterPro entry. Included when the extra parameter is set to true. **Attributions** The Unipept metaproteomics analysis pipeline Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1 Article first published online: 11 FEB 2015 DOI: 10.1002/pmic.201400361 ]]></help> <citations> <citation type="doi">doi:10.1002/pmic.201400361</citation> </citations> </tool>