view home/ubuntu/lefse_to_export/lefse2circlader.py @ 2:a31c10fe09c8 draft default tip

Fixed bug due to numerical approximation after normalization affecting root-level clades (e.g. "Bacteria" or "Archaea")
author george-weingart
date Tue, 07 Jul 2015 13:52:29 -0400
parents db64b6287cd6
children
line wrap: on
line source

#!/usr/bin/env python

from __future__ import with_statement

import sys
import os
import argparse

def read_params(args):
    parser = argparse.ArgumentParser(description='Convert LEfSe output to '
                        'Circlader input')
    parser.add_argument(    'inp_f', metavar='INPUT_FILE', nargs='?', 
                            default=None, type=str, 
                            help="the input file [stdin if not present]")    
    parser.add_argument(    'out_f', metavar='OUTPUT_FILE', nargs='?', 
                            default=None, type=str, 
                            help="the output file [stdout if not present]")
    parser.add_argument('-l', metavar='levels with label', default=0, type=int)

    return vars(parser.parse_args()) 

def lefse2circlader(par):
    finp,fout = bool(par['inp_f']), bool(par['out_f'])

    with open(par['inp_f']) if finp else sys.stdin as inpf:
        put_bm = (l.strip().split('\t') for l in inpf.readlines()) 
    biomarkers = [p for p in put_bm if len(p) > 2]

    circ = [    [   b[0],
                    "" if b[0].count('.') > par['l'] else b[0].split('.')[-1],
                    b[2],
                    b[2]+"_col" ] for b in biomarkers]

    with open(par['out_f'],'w') if fout else sys.stdout as out_file:
        for c in circ:
            out_file.write( "\t".join( c ) + "\n" )

if __name__ == '__main__':
    params = read_params(sys.argv)
    lefse2circlader(params)