view public_clonotype_computation.py @ 1:acaa8e8a0b88 draft default tip

Uploaded test-data & added tool help
author chmaramis
date Mon, 30 Apr 2018 04:47:52 -0400
parents 0e37e5b73273
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 17:18:09 2018

@author: chmaramis
"""

from __future__ import division
import numpy as np
from pandas import *
from numpy import nan as NA
import sys
import time

clono_def = {'CDR3': ['AA JUNCTION'],
             'VCDR3': ['V-GENE','AA JUNCTION'],
             'JCDR3': ['J-GENE','AA JUNCTION']}



def publicClonotypeComputation(inputs, clono, thres):
    
    clono_comps = clono_def[clono]
    
    clono=DataFrame()

    for x in range(0,len(inputs),2):
            cl = DataFrame()
            cl = read_csv(inputs[x] , sep = '\t' , index_col = 0)
            #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
            #cl = concat([chunk for chunk in tp]) 
            
            if (thres != 'null'):
                cl = cl[cl['Reads'] > int(thres)]
            
            x1 = inputs[x+1].split('_')
            
            del cl['Reads']
            cl.columns = [cl.columns[0], cl.columns[1], x1[0]+' '+cl.columns[2], x1[0]+' Relative '+cl.columns[3]]
            
            if clono.empty:
                clono = cl
            else:
                clono = clono.merge(cl, how='outer', on=clono_comps)
    
    
    col = clono.columns
    freqs = col.map(lambda x: 'Frequency' in x)
    reads = col.map(lambda x: 'Reads/Total' in x)
    
    clono[col[freqs]] = clono[col[freqs]].fillna(0)
    clono[col[reads]] = clono[col[reads]].fillna('0/*')
    
    clono['Num of Patients']= clono[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1)

    clono = clono[clono['Num of Patients'] > 1]

    clono.index = range(1,len(clono)+1)
    
    return clono    


if __name__ == '__main__':   

    start=time.time()

    # Parse input arguments    
    arg = sys.argv[4:]
    clono = sys.argv[1]
    output = sys.argv[2]
    thres = sys.argv[3]
    
    
    
    # Execute basic function
    mer = publicClonotypeComputation(arg, clono, thres)
    
    # Save output to CSV files
    if not mer.empty: 
        mer.to_csv(output , sep = '\t') 
        
    # Print execution time
    stop=time.time()
    print('Runtime:' + str(stop-start))