Mercurial > repos > george-weingart > lefse
comparison qiime2lefse.py @ 0:e7cd19afda2e draft
Lefse
author | george-weingart |
---|---|
date | Tue, 13 May 2014 21:57:00 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e7cd19afda2e |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 | |
5 def read_params(args): | |
6 import argparse as ap | |
7 import textwrap | |
8 | |
9 p = ap.ArgumentParser( description= "TBA" ) | |
10 | |
11 p.add_argument( '--in', metavar='INPUT_FILE', type=str, | |
12 nargs='?', default=sys.stdin, | |
13 help= "the Qiime OTU table file " | |
14 "[ stdin if not present ]" ) | |
15 p.add_argument( '--md', metavar='METADATA_FILE', type=str, | |
16 nargs='?', default=None, | |
17 help= "the Qiime OTU table file " | |
18 "[ only OTU table without metadata if not present ]" ) | |
19 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, | |
20 nargs = '?', default=sys.stdout, | |
21 help= "the output file " | |
22 "[stdout if not present]") | |
23 | |
24 p.add_argument( '-c', metavar="class attribute", | |
25 type=str, | |
26 help = "the attribute to use as class" ) | |
27 p.add_argument( '-s', metavar="subclass attribute", | |
28 type=str, | |
29 help = "the attribute to use as subclass" ) | |
30 p.add_argument( '-u', metavar="subject attribute", | |
31 type=str, | |
32 help = "the attribute to use as subject" ) | |
33 | |
34 | |
35 | |
36 return vars(p.parse_args()) | |
37 | |
38 | |
39 | |
40 def qiime2lefse( fin, fmd, fout, all_md, sel_md ): | |
41 with (fin if fin==sys.stdin else open(fin)) as inpf : | |
42 lines = [list(ll) for ll in | |
43 (zip(*[l.strip().split('\t') | |
44 for l in inpf.readlines()[1:]]) ) ] | |
45 for i,(l1,l2) in enumerate(zip( lines[0], lines[-1] )): | |
46 if not l2 == 'Consensus Lineage': | |
47 lines[-1][i] = l2+"|"+l1 | |
48 | |
49 data = dict([(l[0],l[1:]) for l in lines[1:]]) | |
50 | |
51 md = {} | |
52 if fmd: | |
53 with open(fmd) as inpf: | |
54 mdlines = [l.strip().split('\t') for l in inpf.readlines()] | |
55 | |
56 mdf = mdlines[0][1:] | |
57 | |
58 for l in mdlines: | |
59 mdd = dict(zip(mdf,l[1:])) | |
60 md[l[0]] = mdd | |
61 | |
62 selected_md = md.values()[0].keys() if md else [] | |
63 | |
64 if not all_md: | |
65 selected_md = [s for s in sel_md if s] | |
66 | |
67 out_m = [ selected_md + | |
68 list([d.replace(";","|").replace("\"","") for d in data[ 'Consensus Lineage' ]]) ] | |
69 for k,v in data.items(): | |
70 if k == 'Consensus Lineage': | |
71 continue | |
72 out_m.append( [md[k][kmd] for kmd in selected_md] + list(v) ) | |
73 | |
74 with (fout if fout == sys.stdout else open( fout, "w" )) as outf: | |
75 for l in zip(*out_m): | |
76 outf.write( "\t".join(l) + "\n" ) | |
77 | |
78 if __name__ == '__main__': | |
79 pars = read_params( sys.argv ) | |
80 | |
81 qiime2lefse( fin = pars['in'], | |
82 fmd = pars['md'], | |
83 fout = pars['out'], | |
84 all_md = not pars['c'] and not pars['s'] and not pars['u'], | |
85 sel_md = [pars['c'],pars['s'],pars['u']]) |