Previous changeset 3:34758ab8aaa4 (2017-02-20) Next changeset 5:917fd3ebc223 (2020-04-30) |
Commit message:
planemo upload for repository http://unipept.ugent.be/apidocs commit e91b0fe16bf468b34884508652359b91847d1f95-dirty |
modified:
unipept.py unipept.xml |
b |
diff -r 34758ab8aaa4 -r 4953dcd7dd39 unipept.py --- a/unipept.py Mon Feb 20 10:32:03 2017 -0500 +++ b/unipept.py Wed Jan 23 09:16:38 2019 -0500 |
[ |
b'@@ -21,6 +21,18 @@\n import re\n import urllib\n import urllib2\n+\n+"""\n+pept2taxa\tjson\n+pept2lca\tjson\n+pept2prot\t\n+pept2ec\t\tecjson\tec\n+pept2go\t\t\tgo\n+pept2funct\tgo\tec\n+peptinfo\tjson ecjson ec go\n+\n+"""\n+\n try:\n import xml.etree.cElementTree as ET\n except ImportError:\n@@ -31,11 +43,87 @@\n if exit_code:\n sys.exit(exit_code)\n \n+go_types = [\'biological process\', \'molecular function\', \'cellular component\']\n+ec_name_dict = {\n+\'1\' : \'Oxidoreductase\',\n+\'1.1\' : \'act on the CH-OH group of donors\',\n+\'1.2\' : \'act on the aldehyde or oxo group of donors\',\n+\'1.3\' : \'act on the CH-CH group of donors\',\n+\'1.4\' : \'act on the CH-NH2 group of donors\',\n+\'1.5\' : \'act on CH-NH group of donors\',\n+\'1.6\' : \'act on NADH or NADPH\',\n+\'1.7\' : \'act on other nitrogenous compounds as donors\',\n+\'1.8\' : \'act on a sulfur group of donors\',\n+\'1.9\' : \'act on a heme group of donors\',\n+\'1.10\' : \'act on diphenols and related substances as donors\',\n+\'1.11\' : \'act on peroxide as an acceptor -- peroxidases\',\n+\'1.12\' : \'act on hydrogen as a donor\',\n+\'1.13\' : \'act on single donors with incorporation of molecular oxygen\',\n+\'1.14\' : \'act on paired donors with incorporation of molecular oxygen\',\n+\'1.15\' : \'act on superoxide radicals as acceptors\',\n+\'1.16\' : \'oxidize metal ions\',\n+\'1.17\' : \'act on CH or CH2 groups\',\n+\'1.18\' : \'act on iron-sulfur proteins as donors\',\n+\'1.19\' : \'act on reduced flavodoxin as donor\',\n+\'1.20\' : \'act on phosphorus or arsenic as donors\',\n+\'1.21\' : \'act on X-H and Y-H to form an X-Y bond\',\n+\'1.97\' : \'other oxidoreductases\',\n+\'2\' : \'Transferase\',\n+\'2.1\' : \'transfer one-carbon groups, Methylase\',\n+\'2.2\' : \'transfer aldehyde or ketone groups\',\n+\'2.3\' : \'acyltransferases\',\n+\'2.4\' : \'glycosyltransferases\',\n+\'2.5\' : \'transfer alkyl or aryl groups, other than methyl groups\',\n+\'2.6\' : \'transfer nitrogenous groups\',\n+\'2.7\' : \'transfer phosphorus-containing groups\',\n+\'2.8\' : \'transfer sulfur-containing groups\',\n+\'2.9\' : \'transfer selenium-containing groups\',\n+\'3\' : \'Hydrolase\',\n+\'3.1\' : \'act on ester bonds\',\n+\'3.2\' : \'act on sugars - glycosylases\',\n+\'3.3\' : \'act on ether bonds\',\n+\'3.4\' : \'act on peptide bonds - Peptidase\',\n+\'3.5\' : \'act on carbon-nitrogen bonds, other than peptide bonds\',\n+\'3.6\' : \'act on acid anhydrides\',\n+\'3.7\' : \'act on carbon-carbon bonds\',\n+\'3.8\' : \'act on halide bonds\',\n+\'3.9\' : \'act on phosphorus-nitrogen bonds\',\n+\'3.10\' : \'act on sulfur-nitrogen bonds\',\n+\'3.11\' : \'act on carbon-phosphorus bonds\',\n+\'3.12\' : \'act on sulfur-sulfur bonds\',\n+\'3.13\' : \'act on carbon-sulfur bonds\',\n+\'4\' : \'Lyase\',\n+\'4.1\' : \'carbon-carbon lyases\',\n+\'4.2\' : \'carbon-oxygen lyases\',\n+\'4.3\' : \'carbon-nitrogen lyases\',\n+\'4.4\' : \'carbon-sulfur lyases\',\n+\'4.5\' : \'carbon-halide lyases\',\n+\'4.6\' : \'phosphorus-oxygen lyases\',\n+\'5\' : \'Isomerase\',\n+\'5.1\' : \'racemases and epimerases\',\n+\'5.2\' : \'cis-trans-isomerases\',\n+\'5.3\' : \'intramolecular oxidoreductases\',\n+\'5.4\' : \'intramolecular transferases -- mutases\',\n+\'5.5\' : \'intramolecular lyases\',\n+\'5.99\' : \'other isomerases\',\n+\'6\' : \'Ligase\',\n+\'6.1\' : \'form carbon-oxygen bonds\',\n+\'6.2\' : \'form carbon-sulfur bonds\',\n+\'6.3\' : \'form carbon-nitrogen bonds\',\n+\'6.4\' : \'form carbon-carbon bonds\',\n+\'6.5\' : \'form phosphoric ester bonds\',\n+\'6.6\' : \'form nitrogen-metal bonds\',\n+}\n pept2lca_column_order = [\'peptide\',\'taxon_rank\',\'taxon_id\',\'taxon_name\']\n pept2lca_extra_column_order = [\'peptide\',\'superkingdom\',\'kingdom\',\'subkingdom\',\'superphylum\',\'phylum\',\'subphylum\',\'superclass\',\'class\',\'subclass\',\'infraclass\',\'superorder\',\'order\',\'suborder\',\'infraorder\',\'parvorder\',\'superfamily\',\'family\',\'subfamily\',\'tribe\',\'subtribe\',\'genus\',\'subgenus\',\'species_group\',\'species_subgroup\',\'species\',\'subspecies\',\'varietas\',\'forma\' ]\n pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[1:]\n pept2prot_column_order = [\'peptide\',\'uniprot_id\',\'taxon_id\']\n pept2prot_extra_column_order = pept2prot_column_order + [\'taxon_name\',\'ec_references\',\'go_references\',\'refseq_ids\',\'refseq_protein_ids\',\'insdc_ids\',\'insdc_protein_ids\']\n+pep'..b',\'pept2taxa\',\'pept2prot\'\n- found_keys = set()\n- results = []\n- for i,pdict in enumerate(resp):\n- results.append(pdict)\n- found_keys |= set(pdict.keys())\n- # print >> sys.stderr, "%s\\n%s" % (pdict.keys(),found_keys)\n- column_names = []\n- column_keys = []\n- for col in column_order:\n- if col in found_keys:\n- column_names.append(col)\n- column_keys.append(col)\n- elif options.extra or options.names:\n- col_id = col+\'_id\'\n- col_name = col+\'_name\'\n- if options.extra:\n- if col_id in found_keys:\n- column_names.append(col_id)\n- column_keys.append(col_id)\n- if options.names:\n- if col_name in found_keys:\n- column_names.append(col)\n- column_keys.append(col_name)\n- else:\n- if col+\'_name\' in found_keys:\n- column_names.append(col)\n- column_keys.append(col+\'_name\')\n- elif col+\'_id\' in found_keys:\n- column_names.append(col)\n- column_keys.append(col+\'_id\')\n- # print >> sys.stderr, "%s\\n%s" % (column_names,column_keys)\n- taxa = []\n- for i,pdict in enumerate(results):\n- vals = [str(pdict[x]) if x in pdict and pdict[x] else \'\' for x in column_keys]\n- if vals not in taxa:\n- taxa.append(vals)\n+ rows = []\n+ column_names = None\n+ if options.unipept in [\'pept2ec\', \'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ taxa = None\n+ ec_dict = None\n+ go_dict = None\n+ if options.unipept in [\'peptinfo\']:\n+ (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)\n+ if options.unipept in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ (ec_dict,ec_cols) = get_ec_dict(resp, extra=options.extra)\n+ if options.unipept in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ (go_dict,go_cols) = get_go_dict(resp, extra=options.extra)\n+ for i,pdict in enumerate(resp):\n+ peptide = pdict[\'peptide\'] \n+ total_protein_count = str(pdict[\'total_protein_count\']) if \'total_protein_count\' in pdict else \'0\'\n+ column_names = [\'peptide\', \'total_protein_count\']\n+ vals = [peptide,total_protein_count] \n+ if ec_dict:\n+ vals += ec_dict[peptide]\n+ column_names += ec_cols\n+ if go_dict:\n+ vals += go_dict[peptide]\n+ column_names += go_cols\n+ if taxa:\n+ vals += taxa[peptide][1:]\n+ column_names += taxon_cols[1:]\n+ rows.append(vals)\n+ elif options.unipept in [\'pept2lca\', \'pept2taxa\', \'pept2prot\']:\n+ (taxa,taxon_cols) = get_taxon_dict(resp, column_order, extra=options.extra, names=options.names)\n+ column_names = taxon_cols\n+ rows = taxa.values()\n+ for peptide,vals in taxa.iteritems():\n+ rows.append(vals)\n if options.tsv:\n with open(options.tsv,\'w\') as outputFile:\n- outputFile.write("#%s\\n"% \'\\t\'.join(column_names))\n- for vals in taxa:\n+ if column_names:\n+ outputFile.write("#%s\\n"% \'\\t\'.join(column_names))\n+ for vals in rows:\n outputFile.write("%s\\n"% \'\\t\'.join(vals))\n if options.csv:\n with open(options.csv,\'w\') as outputFile:\n- outputFile.write("%s\\n"% \',\'.join(column_names))\n- for vals in taxa:\n+ if column_names:\n+ outputFile.write("%s\\n"% \',\'.join(column_names))\n+ for vals in rows:\n outputFile.write("%s\\n"% \',\'.join([\'"%s"\' % (v if v else \'\') for v in vals]))\n+ if options.ec_tsv and options.unipept in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ column_order = pept2ec_extra_column_order if options.extra else pept2ec_column_order\n+ write_ec_table(options.ec_tsv, resp, column_order)\n+ if options.go_tsv and options.unipept in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ column_order = pept2go_extra_column_order if options.extra else pept2go_column_order\n+ write_go_table(options.go_tsv, resp, column_order)\n \n if __name__ == "__main__" : __main__()\n' |
b |
diff -r 34758ab8aaa4 -r 4953dcd7dd39 unipept.xml --- a/unipept.xml Mon Feb 20 10:32:03 2017 -0500 +++ b/unipept.xml Wed Jan 23 09:16:38 2019 -0500 |
[ |
b'@@ -1,4 +1,4 @@\n-<tool id="unipept" name="Unipept" version="2.0.1">\n+<tool id="unipept" name="Unipept" version="4.0.0">\n <description>retrieve taxonomy for peptides</description>\n <macros>\n <xml name="equate_il">\n@@ -24,6 +24,20 @@\n <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help>\n </param>\n </xml>\n+ <xml name="domains">\n+ <param name="domains" type="boolean" truevalue="-D" falsevalue="" checked="false" label="group responses by GO namespace (biological process, molecular function, cellular component)">\n+ <yield/>\n+ </param>\n+ </xml>\n+ <xml name="selected_outputs">\n+ <param name="selected_outputs" type="select" multiple="true" display="checkboxes" label="Choose outputs">\n+ <option value="tsv" selected="true">Tabular with one line per peptide</option>\n+ <option value="csv">Comma Separated Values (.csv) with one line per peptide</option>\n+ <option value="json">JSON Taxomony Tree (for pept2lca, pep2taxa, and peptinfo)</option>\n+ <yield/>\n+ <option value="unmatched">Unmatched peptides</option>\n+ </param>\n+ </xml>\n </macros>\n <requirements>\n <requirement type="package" version="2.7">python</requirement>\n@@ -33,11 +47,15 @@\n </stdio>\n <command><![CDATA[\n python \'$__tool_directory__/unipept.py\' \n+ ## --url \'http://morty.ugent.be/api/v1\' -g -M 1 \n --api=$unipept.api\n $unipept.equate_il $unipept.extra \n- #if $unipept.api != \'pept2prot\':\n+ #if $unipept.api in [\'pept2lca\', \'pept2taxa\', \'peptinfo\']:\n $unipept.names $unipept.allfields\n #end if\n+ #if $unipept.api in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ $unipept.domains\n+ #end if\n $strict\n #if str($peptide_src.fmt) == \'proteomic\':\n #if $peptide_src.input.datatype.file_ext == \'fasta\':\n@@ -58,16 +76,25 @@\n #elif str($peptide_src.fmt) == \'pepxml\':\n --pepxml="$peptide_src.input_pepxml"\n #end if\n- #if \'json\' in str($outputs).split(\',\') and str($unipept.api) != \'pept2prot\':\n+ #if \'json\' in str($selected_outputs).split(\',\') and str($unipept.api) in [\'pept2lca\', \'pept2taxa\', \'peptinfo\']:\n --json $output_json\n #end if\n- #if \'tsv\' in str($outputs).split(\',\'):\n+ #if \'ec_json\' in str($selected_outputs).split(\',\') and str($unipept.api) in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ --ec_json $output_ec_json\n+ #end if\n+ #if \'tsv\' in str($selected_outputs).split(\',\'):\n --tsv $output_tsv\n #end if\n- #if \'csv\' in str($outputs).split(\',\'):\n+ #if \'csv\' in str($selected_outputs).split(\',\'):\n --csv $output_csv\n #end if\n- #if \'unmatched\' in str($outputs).split(\',\'):\n+ #if \'ec_tsv\' in str($selected_outputs).split(\',\') and str($unipept.api) in [\'pept2ec\', \'pept2funct\', \'peptinfo\']:\n+ --ec_tsv $output_ec_tsv\n+ #end if\n+ #if \'go_tsv\' in str($selected_outputs).split(\',\') and str($unipept.api) in [\'pept2go\', \'pept2funct\', \'peptinfo\']:\n+ --go_tsv $output_go_tsv\n+ #end if\n+ #if \'unmatched\' in str($selected_outputs).split(\',\'):\n --unmatched $output_unmatched\n #end if\n ]]></command>\n@@ -77,6 +104,10 @@\n <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option>\n <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option>\n <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option>\n+ <option value="pept2ec">pept2ec: Tryptic peptides and associated EC terms</option>\n+ <option value="pept2go">pept2go: Tryptic peptides and associated GO terms</option>\n+ <option value="'..b'ext="paniscus" />\n <has_text text="Gorilla" />\n- <has_text text="Macaca" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <test>\n+ <param name="api" value="pept2funct"/>\n+ <param name="fmt" value="tabular"/>\n+ <param name="input_tsv" value="input.tsv"/>\n+ <param name="column" value="2"/>\n+ <param name="extra" value="True"/>\n+ <param name="names" value="True"/>\n+ <param name="selected_outputs" value="tsv,ec_tsv,go_tsv,unmatched"/>\n+ <output name="output_tsv">\n+ <assert_contents>\n+ <has_text text="GO:0004802" />\n+ <has_text text="2.2.1.1" />\n+ </assert_contents>\n+ </output>\n+ <output name="output_ec_tsv">\n+ <assert_contents>\n+ <has_text text="2.2.1.1" />\n+ </assert_contents>\n+ </output>\n+ <output name="output_go_tsv">\n+ <assert_contents>\n+ <has_text text="GO:0004802" />\n </assert_contents>\n </output>\n </test>\n@@ -335,6 +459,47 @@\n varietas_id\n forma_id\n \n+ **pept2ec** - http://unipept.ugent.be/apidocs/pept2ec\n+\n+ Returns the functional EC-numbers associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.\n+\n+ By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::\n+\n+ peptide: the peptide that matched this record\n+ total_protein_count: Total amount of proteins matched with the given peptide\n+ ec_number: EC-number associated with the current tryptic peptide.\n+ protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number.\n+ name: Optional, name of the EC-number. Included when the extra parameter is set to true.\n+\n+\n+ **pept2go** - http://unipept.ugent.be/apidocs/pept2go\n+\n+ Returns the functional GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.\n+\n+ By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::\n+\n+ peptide: the peptide that matched this record\n+ total_protein_count: Total amount of proteins matched with the given peptide\n+ go_term: The GO-term associated with the current tryptic peptide.\n+ protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term.\n+ name: Optional, name of the GO-term. Included when the extra parameter is set to true.\n+\n+\n+ **pept2funct** - http://unipept.ugent.be/apidocs/pept2funct\n+\n+ Returns the functional EC-numbers and GO-terms associated with a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface.\n+\n+ By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy::\n+\n+ peptide: the peptide that matched this record\n+ total_protein_count: Total amount of proteins matched with the given peptide\n+ ec_number: EC-number associated with the current tryptic peptide.\n+ protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current EC-number.\n+ name: Optional, name of the EC-number. Included when the extra parameter is set to true.\n+ go_term: The GO-term associated with the current tryptic peptide.\n+ protein_count: amount of proteins matched with the given tryptic peptide that are labeled with the current GO-term.\n+ name: Optional, name of the GO-term. Included when the extra parameter is set to true.\n+\n \n **Attributions**\n \n' |