Previous changeset 4:4953dcd7dd39 (2019-01-23) Next changeset 6:9aaa46d45472 (2020-06-02) |
Commit message:
"planemo upload for repository http://unipept.ugent.be/apidocs commit dd464f03c32f657fc555081117da18ba4c091af6-dirty" |
modified:
unipept.py unipept.xml |
b |
diff -r 4953dcd7dd39 -r 917fd3ebc223 unipept.py --- a/unipept.py Wed Jan 23 09:16:38 2019 -0500 +++ b/unipept.py Thu Apr 30 07:39:28 2020 -0400 |
[ |
b'@@ -1,663 +1,732 @@\n #!/usr/bin/env python\n """\n #\n-#------------------------------------------------------------------------------\n-# University of Minnesota\n-# Copyright 2015, Regents of the University of Minnesota\n-#------------------------------------------------------------------------------\n # Author:\n #\n # James E Johnson\n #\n #------------------------------------------------------------------------------\n """\n-\n import json\n-import logging\n import optparse\n-from optparse import OptionParser\n-import os\n import sys\n import re\n-import urllib\n-import urllib2\n+import urllib.error\n+import urllib.parse\n+import urllib.request\n \n-"""\n-pept2taxa\tjson\n-pept2lca\tjson\n-pept2prot\t\n-pept2ec\t\tecjson\tec\n-pept2go\t\t\tgo\n-pept2funct\tgo\tec\n-peptinfo\tjson ecjson ec go\n-\n-"""\n \n try:\n import xml.etree.cElementTree as ET\n except ImportError:\n import xml.etree.ElementTree as ET\n \n-def warn_err(msg,exit_code=1):\n+\n+def warn_err(msg, exit_code=1):\n sys.stderr.write(msg)\n if exit_code:\n- sys.exit(exit_code)\n+ sys.exit(exit_code)\n+\n \n go_types = [\'biological process\', \'molecular function\', \'cellular component\']\n+ipr_types = [\'Domain\', \'Family\', \'Homologous_superfamily\', \'Repeat\', \'Conserved_site\', \'Active_site\', \'Binding_site\', \'PTM\']\n ec_name_dict = {\n-\'1\' : \'Oxidoreductase\',\n-\'1.1\' : \'act on the CH-OH group of donors\',\n-\'1.2\' : \'act on the aldehyde or oxo group of donors\',\n-\'1.3\' : \'act on the CH-CH group of donors\',\n-\'1.4\' : \'act on the CH-NH2 group of donors\',\n-\'1.5\' : \'act on CH-NH group of donors\',\n-\'1.6\' : \'act on NADH or NADPH\',\n-\'1.7\' : \'act on other nitrogenous compounds as donors\',\n-\'1.8\' : \'act on a sulfur group of donors\',\n-\'1.9\' : \'act on a heme group of donors\',\n-\'1.10\' : \'act on diphenols and related substances as donors\',\n-\'1.11\' : \'act on peroxide as an acceptor -- peroxidases\',\n-\'1.12\' : \'act on hydrogen as a donor\',\n-\'1.13\' : \'act on single donors with incorporation of molecular oxygen\',\n-\'1.14\' : \'act on paired donors with incorporation of molecular oxygen\',\n-\'1.15\' : \'act on superoxide radicals as acceptors\',\n-\'1.16\' : \'oxidize metal ions\',\n-\'1.17\' : \'act on CH or CH2 groups\',\n-\'1.18\' : \'act on iron-sulfur proteins as donors\',\n-\'1.19\' : \'act on reduced flavodoxin as donor\',\n-\'1.20\' : \'act on phosphorus or arsenic as donors\',\n-\'1.21\' : \'act on X-H and Y-H to form an X-Y bond\',\n-\'1.97\' : \'other oxidoreductases\',\n-\'2\' : \'Transferase\',\n-\'2.1\' : \'transfer one-carbon groups, Methylase\',\n-\'2.2\' : \'transfer aldehyde or ketone groups\',\n-\'2.3\' : \'acyltransferases\',\n-\'2.4\' : \'glycosyltransferases\',\n-\'2.5\' : \'transfer alkyl or aryl groups, other than methyl groups\',\n-\'2.6\' : \'transfer nitrogenous groups\',\n-\'2.7\' : \'transfer phosphorus-containing groups\',\n-\'2.8\' : \'transfer sulfur-containing groups\',\n-\'2.9\' : \'transfer selenium-containing groups\',\n-\'3\' : \'Hydrolase\',\n-\'3.1\' : \'act on ester bonds\',\n-\'3.2\' : \'act on sugars - glycosylases\',\n-\'3.3\' : \'act on ether bonds\',\n-\'3.4\' : \'act on peptide bonds - Peptidase\',\n-\'3.5\' : \'act on carbon-nitrogen bonds, other than peptide bonds\',\n-\'3.6\' : \'act on acid anhydrides\',\n-\'3.7\' : \'act on carbon-carbon bonds\',\n-\'3.8\' : \'act on halide bonds\',\n-\'3.9\' : \'act on phosphorus-nitrogen bonds\',\n-\'3.10\' : \'act on sulfur-nitrogen bonds\',\n-\'3.11\' : \'act on carbon-phosphorus bonds\',\n-\'3.12\' : \'act on sulfur-sulfur bonds\',\n-\'3.13\' : \'act on carbon-sulfur bonds\',\n-\'4\' : \'Lyase\',\n-\'4.1\' : \'carbon-carbon lyases\',\n-\'4.2\' : \'carbon-oxygen lyases\',\n-\'4.3\' : \'carbon-nitrogen lyases\',\n-\'4.4\' : \'carbon-sulfur lyases\',\n-\'4.5\' : \'carbon-halide lyases\',\n-\'4.6\' : \'phosphorus-oxygen lyases\',\n-\'5\' : \'Isomerase\',\n-\'5.1\' : \'racemases and epimerases\',\n-\'5.2\' : \'cis-trans-isomerases\',\n-\'5.3\' : \'intramolecular oxidoreductases\',\n-\'5.4\' : \'intramolecular transferases -- mutases\',\n-\'5.5\' : \'intramolecular lyases\',\n-\'5.99\' : \'other isomerases\',\n-\'6\' : \'Ligase\',\n-\'6.1\' : \'form carbon-oxygen bonds\',\n-\'6.2\' : \'form carbon-sulfur bonds\',\n-\'6.3\' : \'form carbon-nitrogen bonds\',\n-\'6.'..b')\n+ elif options.unipept in [\'pept2prot\', \'pept2ec\', \'pept2go\', \'pept2interpro\', \'pept2funct\']:\n+ with open(options.json, \'w\') as outputFile:\n+ outputFile.write(str(resp))\n+ if options.ec_json:\n+ if options.unipept in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ root = get_ec_json(resp)\n+ with open(options.ec_json, \'w\') as outputFile:\n+ outputFile.write(json.dumps(root))\n+ if options.tsv or options.csv:\n+ rows = []\n+ column_names = None\n+ if options.unipept in [\'pept2ec\', \'pept2go\', \'pept2interpro\', \'pept2funct\', \'peptinfo\']:\n+ taxa = None\n+ ec_dict = None\n+ go_dict = None\n+ ipr_dict = None\n+ if options.unipept in [\'peptinfo\']:\n+ (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)\n+ if options.unipept in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ (ec_dict, ec_cols) = get_ec_dict(resp, extra=options.extra)\n+ if options.unipept in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ (go_dict, go_cols) = get_go_dict(resp, extra=options.extra)\n+ if options.unipept in [\'pept2interpro\', \'pept2funct\', \'peptinfo\']:\n+ (ipr_dict, ipr_cols) = get_ipr_dict(resp, extra=options.extra)\n+ for i, pdict in enumerate(resp):\n+ peptide = pdict[\'peptide\']\n+ total_protein_count = str(pdict[\'total_protein_count\']) if \'total_protein_count\' in pdict else \'0\'\n+ column_names = [\'peptide\', \'total_protein_count\']\n+ vals = [peptide, total_protein_count]\n+ if ec_dict:\n+ vals += ec_dict[peptide]\n+ column_names += ec_cols\n+ if go_dict:\n+ vals += go_dict[peptide]\n+ column_names += go_cols\n+ if ipr_dict:\n+ vals += ipr_dict[peptide]\n+ column_names += ipr_cols\n+ if taxa:\n+ vals += taxa[peptide][1:]\n+ column_names += taxon_cols[1:]\n+ rows.append(vals)\n+ elif options.unipept in [\'pept2lca\', \'pept2taxa\', \'pept2prot\']:\n+ (taxa, taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)\n+ column_names = taxon_cols\n+ rows = list(taxa.values())\n+ for peptide, vals in taxa.items():\n+ rows.append(vals)\n+ if options.tsv:\n+ with open(options.tsv, \'w\') as outputFile:\n+ if column_names:\n+ outputFile.write("#%s\\n" % \'\\t\'.join(column_names))\n+ for vals in rows:\n+ outputFile.write("%s\\n" % \'\\t\'.join(vals))\n+ if options.csv:\n+ with open(options.csv, \'w\') as outputFile:\n+ if column_names:\n+ outputFile.write("%s\\n" % \',\'.join(column_names))\n+ for vals in rows:\n+ outputFile.write("%s\\n" % \',\'.join([\'"%s"\' % (v if v else \'\') for v in vals]))\n+ if options.ec_tsv and options.unipept in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order\n+ write_ec_table(options.ec_tsv, resp, column_order)\n+ if options.go_tsv and options.unipept in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ column_order = pept2go_extra_column_order if options.extra else pept2go_column_order\n+ write_go_table(options.go_tsv, resp, column_order)\n+ if options.ipr_tsv and options.unipept in [\'pept2interpro\', \'pept2funct\', \'peptinfo\']:\n+ column_order = pept2interpro_extra_column_order if options.extra else pept2interpro_column_order\n+ write_ipr_table(options.ipr_tsv, resp, column_order)\n \n-if __name__ == "__main__" : __main__()\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 4953dcd7dd39 -r 917fd3ebc223 unipept.xml --- a/unipept.xml Wed Jan 23 09:16:38 2019 -0500 +++ b/unipept.xml Thu Apr 30 07:39:28 2020 -0400 |
[ |
@@ -1,4 +1,4 @@ -<tool id="unipept" name="Unipept" version="4.0.0"> +<tool id="unipept" name="Unipept" version="4.3.0"> <description>retrieve taxonomy for peptides</description> <macros> <xml name="equate_il"> @@ -40,7 +40,7 @@ </xml> </macros> <requirements> - <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="3">python</requirement> </requirements> <stdio> <exit_code range="1:" /> @@ -94,6 +94,9 @@ #if 'go_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2go', 'pept2funct', 'peptinfo']: --go_tsv $output_go_tsv #end if + #if 'ipr_tsv' in str($selected_outputs).split(',') and str($unipept.api) in ['pept2interpro', 'pept2funct', 'peptinfo']: + --ipr_tsv $output_ipr_tsv + #end if #if 'unmatched' in str($selected_outputs).split(','): --unmatched $output_unmatched #end if @@ -106,6 +109,7 @@ <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option> <option value="pept2ec">pept2ec: Tryptic peptides and associated EC terms</option> <option value="pept2go">pept2go: Tryptic peptides and associated GO terms</option> + <option value="pept2interpro">pept2interpro: Tryptic peptides and associated InterPro entries</option> <option value="pept2funct">pept2funct: Tryptic peptides and associated EC and GO terms</option> <option value="peptinfo">peptinfo: Tryptic peptides and associated EC and GO terms and lowest common ancestor taxonomy</option> </param> @@ -146,6 +150,14 @@ </expand> <expand macro="domains" /> </when> + <when value="pept2interpro"> + <expand macro="equate_il" /> + <expand macro="extra_true"> + <help>Return the name of the GO-term. + </help> + </expand> + <expand macro="domains" /> + </when> <when value="pept2funct"> <expand macro="equate_il" /> <expand macro="extra_true"> @@ -194,6 +206,7 @@ <option value="csv">Comma Separated Values (.csv) with one line per peptide</option> <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option> <option value="go_tsv">Peptide GO terms in normalized tabular (for pept2go, pept2funct, and peptinfo)</option> + <option value="ipr_tsv">Peptide InterPro entries in normalized tabular (for pept2interpro, pept2funct, and peptinfo)</option> <option value="ec_tsv">Peptide EC terms in normalized tabular (for pept2ec, pept2funct, and peptinfo)</option> <option value="ec_json">JSON EC Coverage Tree (for pept2ec, pep2funct, and peptinfo)</option> <option value="unmatched">Unmatched peptides</option> @@ -244,6 +257,13 @@ <action name="column_names" type="metadata" default="Peptide,Total Protein Count,GO Term,Protein Count,GO Name" /> </actions> </data> + <data name="output_ipr_tsv" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} InterPro tsv"> + <filter>'ipr_tsv' in selected_outputs and unipept['api'] in ('pept2interpro', 'pept2funct', 'peptinfo')</filter> + <actions> + <action name="column_names" type="metadata" default="Peptide,Total Protein Count,InterPro Code,Protein Count,InterPro Type,InterPro Name" /> + </actions> + </data> + <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched"> <filter>'unmatched' in selected_outputs</filter> <actions> @@ -279,6 +299,8 @@ <param name="extra" value="True"/> <param name="names" value="True"/> <param name="selected_outputs" value="json,tsv"/> +<!-- +--> <output name="output_json"> <assert_contents> <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV" /> @@ -303,7 +325,6 @@ <assert_contents> <has_text text="sapiens" /> <has_text text="paniscus" /> - <has_text text="Gorilla" /> </assert_contents> </output> </test> @@ -314,11 +335,12 @@ <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> - <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,unmatched"/> + <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,ipr_tsv,unmatched"/> <output name="output_tsv"> <assert_contents> <has_text text="GO:0004802" /> <has_text text="2.2.1.1" /> + <has_text text="IPR005475" /> </assert_contents> </output> <output name="output_ec_tsv"> @@ -331,6 +353,11 @@ <has_text text="GO:0004802" /> </assert_contents> </output> + <output name="output_ipr_tsv"> + <assert_contents> + <has_text text="IPR005475" /> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ @@ -485,6 +512,19 @@ name: Optional, name of the GO-term. Included when the extra parameter is set to true. + **pept2interpro** - http://unipept.ugent.be/apidocs/pept2interpro + + Returns the functional InterPro entries associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. + + By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: + + peptide: the peptide that matched this record + total_protein_count: Total amount of proteins matched with the given peptide + code: InterPro entry code associated with the current tryptic peptide + protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current InterPro code. + type: Optional, type of the InterPro entry. Included when the extra parameter is set to true. + name: Optional, name of the InterPro entry. Included when the extra parameter is set to true. + **pept2funct** - http://unipept.ugent.be/apidocs/pept2funct Returns the functional EC-numbers and GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. @@ -499,6 +539,10 @@ go_term: The GO-term associated with the current tryptic peptide. protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term. name: Optional, name of the GO-term. Included when the extra parameter is set to true. + code: InterPro entry code associated with the current tryptic peptide + protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current InterPro code. + type: Optional, type of the InterPro entry. Included when the extra parameter is set to true. + name: Optional, name of the InterPro entry. Included when the extra parameter is set to true. **Attributions** |