annotate metaphlan2krona.py @ 6:e951f9d38339 default tip

Added metaphlan2krona
author Dannon Baker <dannonbaker@me.com>
date Tue, 08 Apr 2014 14:16:46 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
1 #!/usr/bin/env python
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
2
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
3 # ==============================================================================
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
4 # Conversion script: from MetaPhlAn output to Krona text input file
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
5 # Author: Daniel Brami (daniel.brami@gmail.com)
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
6 # ==============================================================================
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
7
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
8 import sys
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
9 import optparse
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
10 import re
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
11
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
12 def main():
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
13 #Parse Command Line
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
14 parser = optparse.OptionParser()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
15 parser.add_option( '-p', '--profile', dest='profile', default='', action='store', help='The input file is the MetaPhlAn standard result file' )
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
16 parser.add_option( '-k', '--krona', dest='krona', default='krona.out', action='store', help='the Krona output file name' )
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
17 ( options, spillover ) = parser.parse_args()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
18
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
19 if not options.profile or not options.krona:
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
20 parser.print_help()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
21 sys.exit()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
22
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
23 re_candidates = re.compile(r"s__|unclassified\t")
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
24 re_replace = re.compile(r"\w__")
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
25 re_bar = re.compile(r"\|")
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
26
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
27 metaPhLan = list()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
28 with open(options.profile,'r') as f:
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
29 metaPhLan = f.readlines()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
30 f.close()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
31
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
32 krona_tmp = options.krona
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
33 metaPhLan_FH = open(krona_tmp, 'w')
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
34
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
35 for aline in (metaPhLan):
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
36 if(re.search(re_candidates, aline)):
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
37 x=re.sub(re_replace, '\t', aline)
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
38 x=re.sub(re_bar, '', x)
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
39
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
40 x_cells = x.split('\t')
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
41 lineage = '\t'.join(x_cells[0:(len(x_cells) -1)])
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
42 abundance = float(x_cells[-1].rstrip('\n'))
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
43
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
44 metaPhLan_FH.write('%s\n'%(str(abundance) + '\t' + lineage))
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
45
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
46 metaPhLan_FH.close()
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
47
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
48 if __name__ == '__main__':
e951f9d38339 Added metaphlan2krona
Dannon Baker <dannonbaker@me.com>
parents:
diff changeset
49 main()