Mercurial > repos > chmaramis > irprofiler
view public_clonotype_computation.py @ 1:acaa8e8a0b88 draft default tip
Uploaded test-data & added tool help
author | chmaramis |
---|---|
date | Mon, 30 Apr 2018 04:47:52 -0400 |
parents | 0e37e5b73273 |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Sat Mar 24 17:18:09 2018 @author: chmaramis """ from __future__ import division import numpy as np from pandas import * from numpy import nan as NA import sys import time clono_def = {'CDR3': ['AA JUNCTION'], 'VCDR3': ['V-GENE','AA JUNCTION'], 'JCDR3': ['J-GENE','AA JUNCTION']} def publicClonotypeComputation(inputs, clono, thres): clono_comps = clono_def[clono] clono=DataFrame() for x in range(0,len(inputs),2): cl = DataFrame() cl = read_csv(inputs[x] , sep = '\t' , index_col = 0) #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) #cl = concat([chunk for chunk in tp]) if (thres != 'null'): cl = cl[cl['Reads'] > int(thres)] x1 = inputs[x+1].split('_') del cl['Reads'] cl.columns = [cl.columns[0], cl.columns[1], x1[0]+' '+cl.columns[2], x1[0]+' Relative '+cl.columns[3]] if clono.empty: clono = cl else: clono = clono.merge(cl, how='outer', on=clono_comps) col = clono.columns freqs = col.map(lambda x: 'Frequency' in x) reads = col.map(lambda x: 'Reads/Total' in x) clono[col[freqs]] = clono[col[freqs]].fillna(0) clono[col[reads]] = clono[col[reads]].fillna('0/*') clono['Num of Patients']= clono[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1) clono = clono[clono['Num of Patients'] > 1] clono.index = range(1,len(clono)+1) return clono if __name__ == '__main__': start=time.time() # Parse input arguments arg = sys.argv[4:] clono = sys.argv[1] output = sys.argv[2] thres = sys.argv[3] # Execute basic function mer = publicClonotypeComputation(arg, clono, thres) # Save output to CSV files if not mer.empty: mer.to_csv(output , sep = '\t') # Print execution time stop=time.time() print('Runtime:' + str(stop-start))