1
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4
|
|
5 def read_params(args):
|
|
6 import argparse as ap
|
|
7 import textwrap
|
|
8
|
|
9 p = ap.ArgumentParser( description= "TBA" )
|
|
10
|
|
11 p.add_argument( '--in', metavar='INPUT_FILE', type=str,
|
|
12 nargs='?', default=sys.stdin,
|
|
13 help= "the Qiime OTU table file "
|
|
14 "[ stdin if not present ]" )
|
|
15 p.add_argument( '--md', metavar='METADATA_FILE', type=str,
|
|
16 nargs='?', default=None,
|
|
17 help= "the Qiime OTU table file "
|
|
18 "[ only OTU table without metadata if not present ]" )
|
|
19 p.add_argument( '--out', metavar='OUTPUT_FILE', type=str,
|
|
20 nargs = '?', default=sys.stdout,
|
|
21 help= "the output file "
|
|
22 "[stdout if not present]")
|
|
23
|
|
24 p.add_argument( '-c', metavar="class attribute",
|
|
25 type=str,
|
|
26 help = "the attribute to use as class" )
|
|
27 p.add_argument( '-s', metavar="subclass attribute",
|
|
28 type=str,
|
|
29 help = "the attribute to use as subclass" )
|
|
30 p.add_argument( '-u', metavar="subject attribute",
|
|
31 type=str,
|
|
32 help = "the attribute to use as subject" )
|
|
33
|
|
34
|
|
35
|
|
36 return vars(p.parse_args())
|
|
37
|
|
38
|
|
39
|
|
40 def qiime2lefse( fin, fmd, fout, all_md, sel_md ):
|
|
41 with (fin if fin==sys.stdin else open(fin)) as inpf :
|
|
42 lines = [list(ll) for ll in
|
|
43 (zip(*[l.strip().split('\t')
|
|
44 for l in inpf.readlines()[1:]]) ) ]
|
|
45 for i,(l1,l2) in enumerate(zip( lines[0], lines[-1] )):
|
|
46 if not l2 == 'Consensus Lineage':
|
|
47 lines[-1][i] = l2+"|"+l1
|
|
48
|
|
49 data = dict([(l[0],l[1:]) for l in lines[1:]])
|
|
50
|
|
51 md = {}
|
|
52 if fmd:
|
|
53 with open(fmd) as inpf:
|
|
54 mdlines = [l.strip().split('\t') for l in inpf.readlines()]
|
|
55
|
|
56 mdf = mdlines[0][1:]
|
|
57
|
|
58 for l in mdlines:
|
|
59 mdd = dict(zip(mdf,l[1:]))
|
|
60 md[l[0]] = mdd
|
|
61
|
|
62 selected_md = md.values()[0].keys() if md else []
|
|
63
|
|
64 if not all_md:
|
|
65 selected_md = [s for s in sel_md if s]
|
|
66
|
|
67 out_m = [ selected_md +
|
|
68 list([d.replace(";","|").replace("\"","") for d in data[ 'Consensus Lineage' ]]) ]
|
|
69 for k,v in data.items():
|
|
70 if k == 'Consensus Lineage':
|
|
71 continue
|
|
72 out_m.append( [md[k][kmd] for kmd in selected_md] + list(v) )
|
|
73
|
|
74 with (fout if fout == sys.stdout else open( fout, "w" )) as outf:
|
|
75 for l in zip(*out_m):
|
|
76 outf.write( "\t".join(l) + "\n" )
|
|
77
|
|
78 if __name__ == '__main__':
|
|
79 pars = read_params( sys.argv )
|
|
80
|
|
81 qiime2lefse( fin = pars['in'],
|
|
82 fmd = pars['md'],
|
|
83 fout = pars['out'],
|
|
84 all_md = not pars['c'] and not pars['s'] and not pars['u'],
|
|
85 sel_md = [pars['c'],pars['s'],pars['u']])
|