Mercurial > repos > galaxyp > unipept
changeset 1:0c1ee95282fa draft
Uploaded
author | galaxyp |
---|---|
date | Tue, 14 Apr 2015 16:44:22 -0400 |
parents | 6430407e5869 |
children | 503ab8a39006 |
files | .shed.yml test-data/peptide.fa test-data/tryptic.fa test-data/tryptic.tsv unipept.py unipept.xml |
diffstat | 6 files changed, 268 insertions(+), 108 deletions(-) [+] |
line wrap: on
line diff
--- a/.shed.yml Fri Apr 03 14:55:49 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -categories: - - Proteomics - - Metaproteomics -description: Unipept retrieves metaproteomics information -homepage_url: https://github.com/galaxyproteomics/tools-galaxyp -long_description: 'Unipept retrieves taxonomy for tryptic peptides using the unipept API. - http://unipept.ugent.be - http://unipept.ugent.be/apidocs - - The Unipept metaproteomics analysis pipeline - Bart Mesuere1,*, Griet Debyser2, Maarten Aerts3, Bart Devreese2, Peter Vandamme3 andPeter Dawyndt1 - Article first published online: 11 FEB 2015 - DOI: 10.1002/pmic.201400361 - http://onlinelibrary.wiley.com/doi/10.1002/pmic.201400361/abstract;jsessionid=BFF1994E4C14DA73D7C907EB208AD710.f04t04 - ' -name: unipept -owner: galaxyp -remote_repository_url: http://unipept.ugent.be/apidocs
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/peptide.fa Tue Apr 14 16:44:22 2015 -0400 @@ -0,0 +1,9 @@ +>tr|G3RWV1|G3RWV1_GORGO +VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKG +SNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV +>sp|Q9BYE9|CDHR2_HUMAN +VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPRIPIDDLTMVVYDPDKG +SNGTFLLSLGGPDAEAFSVSPERAVGSASVQVLVRVSALVDYERQTAMAV +>tr|H2QS28|H2QS28_PANTR +VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPRIPIDDLTMVVYDPDKG +SNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSGLVDYERQTAMAV
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tryptic.fa Tue Apr 14 16:44:22 2015 -0400 @@ -0,0 +1,19 @@ +>trypticQTAMAV +QTAMAV +>trypticAAGSASVQVLVR +AAGSASVQVLVR +>trypticAVGSASVQVLVR +AVGSASVQVLVR +>trypticIPIDDLTMVVYDPDK +IPIDDLTMVVYDPDK +>trypticVSGLVDYER +VSGLVDYER +>trypticVSALVDYER +VSALVDYER +>trypticGSNGTFLLSLGGPDAEAFSVSPER +GSNGTFLLSLGGPDAEAFSVSPER +>trypticVMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPR +VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPR + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tryptic.tsv Tue Apr 14 16:44:22 2015 -0400 @@ -0,0 +1,8 @@ +1 QTAMAV QTAMAV +2 AAGSASVQVLVR AAGSASJQVLVR +3 AVGSASVQVLVR AVGSASVQVLVR +4 IPIDDLTMVVYDPDK IPIDDLTMVVYDPDK +5 GSNGTFLLSLGGPDAEAFSVSPER GSNGTFLLSLGGPDAEAFSVSPE +6 VSGLVDYER VSGLVDYER +7 VSALVDYER VSALVDYER +8 VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPR VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPR
--- a/unipept.py Fri Apr 03 14:55:49 2015 -0400 +++ b/unipept.py Tue Apr 14 16:44:22 2015 -0400 @@ -31,39 +31,86 @@ if exit_code: sys.exit(exit_code) -def read_fasta(fp): +pept2lca_column_order = ['peptide','taxon_rank','taxon_id','taxon_name'] +pept2lca_extra_column_order = ['peptide','superkingdom','kingdom','subkingdom','superphylum','phylum','subphylum','superclass','class','subclass','infraclass','superorder','order','suborder','infraorder','parvorder','superfamily','family','subfamily','tribe','subtribe','genus','subgenus','species_group','species_subgroup','species','subspecies','varietas','forma' ] +pept2lca_all_column_order = pept2lca_column_order + pept2lca_extra_column_order[1:] +pept2prot_column_order = ['peptide','uniprot_id','taxon_id'] +pept2prot_extra_column_order = pept2prot_column_order + ['taxon_name','ec_references','go_references','refseq_ids','refseq_protein_ids','insdc_ids','insdc_protein_ids'] + +def __main__(): + version = '1.1' + pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$' + + def read_tabular(filepath,col): + peptides = [] + with open(filepath) as fp: + for i,line in enumerate(fp): + if line.strip() == '' or line.startswith('#'): + continue + fields = line.rstrip('\n').split('\t') + peptide = fields[col] + if not re.match(pep_pat,peptide): + warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide,i,col,filepath),exit_code=invalid_ec) + peptides.append(peptide) + return peptides + + def get_fasta_entries(fp): name, seq = None, [] for line in fp: - line = line.rstrip() - if line.startswith(">"): - if name: yield (name, ''.join(seq)) - name, seq = line, [] - else: - seq.append(line) + line = line.rstrip() + if line.startswith(">"): + if name: yield (name, ''.join(seq)) + name, seq = line, [] + else: + seq.append(line) if name: yield (name, ''.join(seq)) -def read_mzid(fp): - peptides = [] - for event, elem in ET.iterparse(fp): - if event == 'end': - if re.search('PeptideSequence',elem.tag): - peptides.append(elem.text) - return peptides + def read_fasta(filepath): + peptides = [] + with open(filepath) as fp: + for id, peptide in get_fasta_entries(fp): + if not re.match(pep_pat,peptide): + warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide,id,filepath),exit_code=invalid_ec) + peptides.append(peptide) + return peptides + + def read_mzid(fp): + peptides = [] + for event, elem in ET.iterparse(fp): + if event == 'end': + if re.search('PeptideSequence',elem.tag): + peptides.append(elem.text) + return peptides -def read_pepxml(fp): - peptides = [] - for event, elem in ET.iterparse(fp): - if event == 'end': - if re.search('search_hit',elem.tag): - peptides.append(elem.get('peptide')) - return peptides + def read_pepxml(fp): + peptides = [] + for event, elem in ET.iterparse(fp): + if event == 'end': + if re.search('search_hit',elem.tag): + peptides.append(elem.get('peptide')) + return peptides -def __main__(): + def best_match(peptide,matches): + if not matches: + return None + elif len(matches) == 1: + return matches[0].copy() + else: + # find the most specific match (peptide is always the first column order field) + for col in reversed(pept2lca_extra_column_order[1:]): + col_id = col+"_id" if options.extra else col + for match in matches: + if 'taxon_rank' in match and match['taxon_rank'] == col: + return match.copy() + if col_id in match and match[col_id]: + return match.copy() + return None + #Parse Command Line parser = optparse.OptionParser() - # unipept API - parser.add_option( '-A', '--api', dest='unipept', default='pept2lca', choices=['pept2lca','pept2taxa','pept2prot'], help='The unipept application: pept2lca, pept2taxa, or pept2prot' ) - # files + # unipept API choice + parser.add_option( '-a', '--api', dest='unipept', default='pept2lca', choices=['pept2lca','pept2taxa','pept2prot'], help='The unipept application: pept2lca, pept2taxa, or pept2prot' ) + # input files parser.add_option( '-t', '--tabular', dest='tabular', default=None, help='A tabular file that contains a peptide column' ) parser.add_option( '-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains peptide sequences' ) parser.add_option( '-f', '--fasta', dest='fasta', default=None, help='A fasta file containing peptide sequences' ) @@ -73,38 +120,33 @@ parser.add_option( '-e', '--equate_il', dest='equate_il', action='store_true', default=False, help='isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records' ) parser.add_option( '-x', '--extra', dest='extra', action='store_true', default=False, help='return the complete lineage of the taxonomic lowest common ancestor' ) parser.add_option( '-n', '--names', dest='names', action='store_true', default=False, help='return the names of all ranks in the lineage of the taxonomic lowest common ancestor' ) + # output fields + parser.add_option( '-A', '--allfields', dest='allfields', action='store_true', default=False, help='inlcude fields: taxon_rank,taxon_id,taxon_name csv and tsv outputs' ) # Warn vs Error Flag parser.add_option( '-S', '--strict', dest='strict', action='store_true', default=False, help='Print exit on invalid peptide' ) - # outputs + # output files parser.add_option( '-J', '--json', dest='json', default=None, help='Output file path for json formatted results') parser.add_option( '-T', '--tsv', dest='tsv', default=None, help='Output file path for TAB-separated-values (.tsv) formatted results') parser.add_option( '-C', '--csv', dest='csv', default=None, help='Output file path for Comma-separated-values (.csv) formatted results') - parser.add_option( '-M', '--mismatch', dest='mismatch', default=None, help='Output file path for peptide with no matches' ) + parser.add_option( '-U', '--unmatched', dest='unmatched', default=None, help='Output file path for peptide with no matches' ) + # debug + parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turning on debugging' ) + parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='pring version and exit' ) (options, args) = parser.parse_args() + if options.version: + print >> sys.stdout,"%s" % version + sys.exit(0) invalid_ec = 2 if options.strict else None peptides = [] - pep_pat = '^([ABCDEFGHIKLMNPQRSTVWXYZ]+)$' ## Get peptide sequences if options.mzid: peptides += read_mzid(options.mzid) if options.pepxml: peptides += read_pepxml(options.pepxml) if options.tabular: - with open(options.tabular) as fp: - for i,line in enumerate(fp): - if line.strip() == '' or line.startswith('#'): - continue - fields = line.rstrip('\n').split('\t') - peptide = fields[options.column] - if not re.match(pep_pat,peptide): - warn_err('"%s" is not a peptide (line %d column %d of tabular file: %s)\n' % (peptide,i,options.column,options.tabular),exit_code=invalid_ec) - peptides.append(peptide) + peptides += read_tabular(options.tabular,options.column) if options.fasta: - with open(options.fasta) as fp: - for id, peptide in read_fasta(fp): - if not re.match(pep_pat,peptide): - warn_err('"%s" is not a peptide (id %s of fasta file: %s)\n' % (peptide,id,options.fasta),exit_code=invalid_ec) - peptides.append(peptide) + peptides += read_fasta(options.fasta) if args and len(args) > 0: for i,peptide in enumerate(args): if not re.match(pep_pat,peptide): @@ -112,6 +154,25 @@ peptides.append(peptide) if len(peptides) < 1: warn_err("No peptides input!",exit_code=1) + column_order = pept2lca_column_order + if options.unipept == 'pept2prot': + column_order = pept2prot_extra_column_order if options.extra else pept2prot_column_order + else: + if options.extra or options.names: + column_order = pept2lca_all_column_order if options.allfields else pept2lca_extra_column_order + else: + column_order = pept2lca_column_order + ## map to tryptic peptides + pepToParts = {p: re.split("\n", re.sub(r'(?<=[RK])(?=[^P])','\n', p)) for p in peptides} + partToPeps = {} + for peptide, parts in pepToParts.iteritems(): + if options.debug: print >> sys.stdout, "peptide: %s\ttryptic: %s\n" % (peptide, parts) + for part in parts: + if len(part) > 50: + warn_err("peptide: %s tryptic fragment len %d > 50 for %s\n" % (peptide,len(part),part),exit_code=None) + if 5 <= len(part) <= 50: + partToPeps.setdefault(part,[]).append(peptide) + trypticPeptides = partToPeps.keys() ## unipept post_data = [] if options.equate_il: @@ -121,30 +182,72 @@ post_data.append(("names","true")) elif options.extra: post_data.append(("extra","true")) - post_data += [('input[]', x) for x in peptides] + post_data += [('input[]', x) for x in trypticPeptides] headers = {'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json'} url = 'http://api.unipept.ugent.be/api/v1/%s' % options.unipept req = urllib2.Request( url, headers = headers, data = urllib.urlencode(post_data) ) - resp = json.loads( urllib2.urlopen( req ).read() ) + unipept_resp = json.loads( urllib2.urlopen( req ).read() ) + unmatched_peptides = [] + peptideMatches = [] + if options.debug: print >> sys.stdout,"unipept response: %s\n" % str(unipept_resp) + if options.unipept == 'pept2prot' or options.unipept == 'pept2taxa': + dupkey = 'uniprot_id' if options.unipept == 'pept2prot' else 'taxon_id' ## should only keep one of these per input peptide + ## multiple entries per trypticPeptide for pep2prot or pep2taxa + mapping = {} + for match in unipept_resp: + mapping.setdefault(match['peptide'],[]).append(match) + for peptide in peptides: + # Get the intersection of matches to the tryptic parts + keyToMatch = None + for part in pepToParts[peptide]: + if part in mapping: + temp = {match[dupkey] : match for match in mapping[part]} + if keyToMatch: + dkeys = set(keyToMatch.keys()) - set(temp.keys()) + for k in dkeys: + del keyToMatch[k] + else: + keyToMatch = temp + ## keyToMatch = keyToMatch.fromkeys([x for x in keyToMatch if x in temp]) if keyToMatch else temp + if not keyToMatch: + unmatched_peptides.append(peptide) + else: + for key,match in keyToMatch.iteritems(): + match['tryptic_peptide'] = match['peptide'] + match['peptide'] = peptide + peptideMatches.append(match) + else: + ## should be one response per trypticPeptide for pep2lca + respMap = {v['peptide']:v for v in unipept_resp} + ## map resp back to peptides + for peptide in peptides: + matches = list() + for part in pepToParts[peptide]: + if part in respMap: + matches.append(respMap[part]) + match = best_match(peptide,matches) + if not match: + unmatched_peptides.append(peptide) + longest_tryptic_peptide = sorted(pepToParts[peptide], key=lambda x: len(x))[-1] + match = {'peptide' : longest_tryptic_peptide} + match['tryptic_peptide'] = match['peptide'] + match['peptide'] = peptide + peptideMatches.append(match) + resp = peptideMatches + if options.debug: print >> sys.stdout,"\nmapped response: %s\n" % str(resp) ## output results - if not (options.mismatch or options.json or options.tsv or options.csv): + if not (options.unmatched or options.json or options.tsv or options.csv): print >> sys.stdout, str(resp) - if options.mismatch: - peptides_matched = [] - for i,pdict in enumerate(resp): - peptides_matched.append(pdict['peptide']) - with open(options.mismatch,'w') as outputFile: + if options.unmatched: + with open(options.unmatched,'w') as outputFile: for peptide in peptides: - if not peptide in peptides_matched: + if peptide in unmatched_peptides: outputFile.write("%s\n" % peptide) if options.json: with open(options.json,'w') as outputFile: outputFile.write(str(resp)) if options.tsv or options.csv: # 'pept2lca','pept2taxa','pept2prot' - pept2lca_column_order = [ 'peptide','superkingdom','kingdom','subkingdom','superphylum','phylum','subphylum','superclass','class_','subclass','infraclass','superorder','order','suborder','infraorder','parvorder','superfamily','family','subfamily','tribe','subtribe','genus','subgenus','species_group','species_subgroup','species','subspecies','varietas','forma' ] - pept2prot_column_order = [ 'peptide','uniprot_id','taxon_id','taxon_name','ec_references','go_references','refseq_ids','refseq_protein_ids','insdc_ids','insdc_protein_ids'] - column_order = pept2prot_column_order if options.unipept == 'pept2prot' else pept2lca_column_order found_keys = set() results = [] for i,pdict in enumerate(resp): @@ -179,7 +282,8 @@ taxa = [] for i,pdict in enumerate(results): vals = [str(pdict[x]) if x in pdict and pdict[x] else '' for x in column_keys] - taxa.append(vals) + if vals not in taxa: + taxa.append(vals) if options.tsv: with open(options.tsv,'w') as outputFile: outputFile.write("#%s\n"% '\t'.join(column_names))
--- a/unipept.xml Fri Apr 03 14:55:49 2015 -0400 +++ b/unipept.xml Tue Apr 14 16:44:22 2015 -0400 @@ -1,8 +1,8 @@ -<tool id="unipept" name="Unipept" version="0.1.0"> +<tool id="unipept" name="Unipept" version="1.1.0"> <description>retrieve taxonomy for peptides</description> <macros> <xml name="equate_il"> - <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Equate isoleucine and leucine"> + <param name="equate_il" type="boolean" truevalue="-e" falsevalue="" checked="true" label="Equate isoleucine and leucine"> <help>isoleucine (I) and leucine (L) are equated when matching tryptic peptides to UniProt records</help> </param > </xml> @@ -13,7 +13,10 @@ </xml> <xml name="names"> <param name="names" type="boolean" truevalue="-n" falsevalue="" checked="true" label="names" > - <help>return the names of taxons</help> + <help>return the names in complete taxonomic lineage</help> + </param > + <param name="allfields" type="boolean" truevalue="-A" falsevalue="" checked="false" label="allfields" > + <help>include fields for most specific taxonomic classification: taxon_rank,taxon_id,taxon_name before lineage</help> </param > </xml> </macros> @@ -27,7 +30,7 @@ --api=$unipept.api $unipept.equate_il $unipept.extra #if $unipept.api != 'pept2prot': - $unipept.names + $unipept.names $unipept.allfields #end if $strict #if str($peptide_src.fmt) == 'proteomic': @@ -58,29 +61,29 @@ #if 'csv' in str($outputs).split(','): --csv $output_csv #end if - #if 'mismatch' in str($outputs).split(','): - --mismatch $output_mismatch + #if 'unmatched' in str($outputs).split(','): + --unmatched $output_unmatched #end if ]]></command> <inputs> <conditional name="unipept"> <param name="api" type="select" label="Unipept application" > - <option value="pept2taxa" selected="true">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option> - <option value="pept2lca">pept2lca: lowest common ancestor</option> + <option value="pept2lca" selected="true">pept2lca: lowest common ancestor</option> + <option value="pept2taxa">pept2taxa: organisms associated with the UniProt entries containing a given tryptic peptide</option> <option value="pept2prot">pept2prot: UniProt entries containing a given tryptic peptide</option> </param> + <when value="pept2lca"> + <expand macro="equate_il" /> + <expand macro="extra"> + <help>Return the complete lineage of the taxonomic lowest common ancestor, and include ID fields.</help> + </expand> + <expand macro="names" /> + </when> <when value="pept2taxa"> <expand macro="equate_il" /> <expand macro="extra"> <checked>true</checked> - <help>Return the complete lineage of each organism.</help> - </expand> - <expand macro="names" /> - </when> - <when value="pept2lca"> - <expand macro="equate_il" /> - <expand macro="extra"> - <help>Return the complete lineage of the taxonomic lowest common ancestor.</help> + <help>Return the complete lineage of each organism, and include ID fields.</help> </expand> <expand macro="names" /> </when> @@ -122,7 +125,7 @@ <option value="tsv" selected="true">tabular</option> <option value="csv">Comma Separated Values (.csv)</option> <option value="json">JSON</option> - <option value="mismatch">Mismatches</option> + <option value="unmatched">Unmatched peptides</option> </param> <param name="strict" type="boolean" truevalue="--strict" falsevalue="" checked="false" label="Exit with error on invalid peptides, otherwise ignore them"/> </inputs> @@ -136,44 +139,64 @@ <data name="output_csv" format="csv" label="${tool.name} ${unipept.api} on ${on_string} csv"> <filter>'csv' in outputs</filter> </data> - <data name="output_mismatch" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} mismatch"> - <filter>'mismatch' in outputs</filter> + <data name="output_unmatched" format="tabular" label="${tool.name} ${unipept.api} on ${on_string} unmatched"> + <filter>'unmatched' in outputs</filter> </data> </outputs> <tests> <test> + <param name="api" value="pept2lca"/> <param name="fmt" value="tabular"/> - <param name="input_tsv" value="input.tsv"/> + <param name="input_tsv" value="tryptic.tsv"/> <param name="column" value="2"/> <param name="extra" value="True"/> <param name="names" value="True"/> - <param name="outputs" value="tsv,mismatch"/> + <param name="outputs" value="tsv,unmatched"/> <output name="output_tsv"> <assert_contents> - <has_text text="AIPQLEVARPADAYETAEAYR" /> + <has_text text="Homininae" /> </assert_contents> </output> - <output name="output_mismatch"> + <output name="output_unmatched"> <assert_contents> - <has_text text="DQIAHEGK" /> + <has_text text="QTAMAV" /> </assert_contents> </output> </test> <test> + <param name="api" value="pept2lca"/> <param name="fmt" value="fasta"/> - <param name="input_tsv" value="input.fasta"/> + <param name="input_fasta" value="peptide.fa"/> <param name="equate_il" value="True"/> <param name="extra" value="True"/> <param name="names" value="True"/> - <param name="outputs" value="json,mismatch"/> + <param name="outputs" value="json,tsv"/> <output name="output_json"> <assert_contents> - <has_text text="AIPQLEVARPADAYETAEAYR" /> + <has_text text="VMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPHIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAAGSASVQVLVRVSALVDYERQTAMAV" /> + </assert_contents> + </output> + <output name="output_tsv"> + <assert_contents> + <has_text text="9606" /> + <has_text text="9598" /> </assert_contents> </output> - <output name="output_mismatch"> + </test> + <test> + <param name="api" value="pept2taxa"/> + <param name="fmt" value="fasta"/> + <param name="input_fasta" value="peptide.fa"/> + <param name="equate_il" value="True"/> + <param name="extra" value="False"/> + <param name="names" value="False"/> + <param name="outputs" value="tsv"/> + <output name="output_tsv"> <assert_contents> - <has_text text="DQIAHEGK" /> + <has_text text="sapiens" /> + <has_text text="troglodytes" /> + <has_text text="Gorilla" /> + <has_text text="Macaca" /> </assert_contents> </output> </test> @@ -182,8 +205,24 @@ **Unipept** Retrieve Uniprot and taxanomic information for trypic peptides. + + Unipept API documentation - http://unipept.ugent.be/apidocs - **pept2prot** + **Input** + + Input peptides can be retrieved from tabular, fasta, mzid, or pepxml datasets. + + Processing deatils:: + + The input peptides are split into typtic peptide fragments in order to match the Unipept records. + Only fragments that are complete tryptic peptides between 5 and 50 animo acid in length will be matched by Unipept. + The match to the most specific tryptic fragment is reported. + + + **Unipept APIs** + + **pept2prot** - http://unipept.ugent.be/apidocs/pept2prot + Returns the list of UniProt entries containing a given tryptic peptide. This is the same information as provided on the Protein matches tab when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record:: @@ -202,9 +241,9 @@ insdc_ids: a space separated list of associated insdc accession numbers insdc_protein_ids: a space separated list of associated insdc protein accession numbers - http://unipept.ugent.be/apidocs/pept2prot - **pept2taxa** + **pept2taxa** - http://unipept.ugent.be/apidocs/pept2taxa + Returns the set of organisms associated with the UniProt entries containing a given tryptic peptide. This is the same information as provided on the Lineage table tab when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: @@ -245,9 +284,9 @@ varietas_id forma_id - http://unipept.ugent.be/apidocs/pept2taxa - **pept2lca** + **pept2lca** - http://unipept.ugent.be/apidocs/pept2lca + Returns the taxonomic lowest common ancestor for a given tryptic peptide. This is the same information as provided when performing a search with the Tryptic Peptide Analysis in the web interface. By default, each object contains the following information fields extracted from the UniProt record and NCBI taxonomy:: @@ -288,7 +327,6 @@ varietas_id forma_id - http://unipept.ugent.be/apidocs/pept2lca **Attributions**