Mercurial > repos > george-weingart > lefse
view home/ubuntu/lefse_to_export/qiime2lefse.py @ 2:a31c10fe09c8 draft default tip
Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
author | george-weingart |
---|---|
date | Tue, 07 Jul 2015 13:52:29 -0400 |
parents | db64b6287cd6 |
children |
line wrap: on
line source
#!/usr/bin/env python import sys def read_params(args): import argparse as ap import textwrap p = ap.ArgumentParser( description= "TBA" ) p.add_argument( '--in', metavar='INPUT_FILE', type=str, nargs='?', default=sys.stdin, help= "the Qiime OTU table file " "[ stdin if not present ]" ) p.add_argument( '--md', metavar='METADATA_FILE', type=str, nargs='?', default=None, help= "the Qiime OTU table file " "[ only OTU table without metadata if not present ]" ) p.add_argument( '--out', metavar='OUTPUT_FILE', type=str, nargs = '?', default=sys.stdout, help= "the output file " "[stdout if not present]") p.add_argument( '-c', metavar="class attribute", type=str, help = "the attribute to use as class" ) p.add_argument( '-s', metavar="subclass attribute", type=str, help = "the attribute to use as subclass" ) p.add_argument( '-u', metavar="subject attribute", type=str, help = "the attribute to use as subject" ) return vars(p.parse_args()) def qiime2lefse( fin, fmd, fout, all_md, sel_md ): with (fin if fin==sys.stdin else open(fin)) as inpf : lines = [list(ll) for ll in (zip(*[l.strip().split('\t') for l in inpf.readlines()[1:]]) ) ] for i,(l1,l2) in enumerate(zip( lines[0], lines[-1] )): if not l2 == 'Consensus Lineage': lines[-1][i] = l2+"|"+l1 data = dict([(l[0],l[1:]) for l in lines[1:]]) md = {} if fmd: with open(fmd) as inpf: mdlines = [l.strip().split('\t') for l in inpf.readlines()] mdf = mdlines[0][1:] for l in mdlines: mdd = dict(zip(mdf,l[1:])) md[l[0]] = mdd selected_md = md.values()[0].keys() if md else [] if not all_md: selected_md = [s for s in sel_md if s] out_m = [ selected_md + list([d.replace(";","|").replace("\"","") for d in data[ 'Consensus Lineage' ]]) ] for k,v in data.items(): if k == 'Consensus Lineage': continue out_m.append( [md[k][kmd] for kmd in selected_md] + list(v) ) with (fout if fout == sys.stdout else open( fout, "w" )) as outf: for l in zip(*out_m): outf.write( "\t".join(l) + "\n" ) if __name__ == '__main__': pars = read_params( sys.argv ) qiime2lefse( fin = pars['in'], fmd = pars['md'], fout = pars['out'], all_md = not pars['c'] and not pars['s'] and not pars['u'], sel_md = [pars['c'],pars['s'],pars['u']])