view exclusive_clonotype_computation.py @ 1:acaa8e8a0b88 draft default tip

Uploaded test-data & added tool help
author chmaramis
date Mon, 30 Apr 2018 04:47:52 -0400
parents 0e37e5b73273
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 17:31:38 2018

@author: chmaramis
"""

from __future__ import division
import numpy as np
from pandas import *
from numpy import nan as NA
import sys
import time

clono_def = {'CDR3': ['AA JUNCTION'],
             'VCDR3': ['V-GENE','AA JUNCTION'],
             'JCDR3': ['J-GENE','AA JUNCTION']}


def exclusiveClonotypeComputation(inputs, clono, thres):
    
    clono_comps = clono_def[clono]

    vClono=DataFrame()
    
    # File A
    cl = DataFrame()
    cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
    if (thres != 'null'):
                cl = cl[cl['Reads'] > int(thres)]
    vClono = cl
    
    # File B
    cl = DataFrame()
    cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
    if (thres != 'null'):
                cl = cl[cl['Reads'] > int(thres)]
    cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
    vClono = vClono.merge(cl[clono_comps+['ReadsB']], how='left', on=clono_comps)
    
    vClono['ReadsB'].fillna(0, inplace=True)
        
    vClono = vClono[vClono['ReadsB'] == 0]
    del vClono['ReadsB']
    
    vClono.index = range(1,len(vClono)+1)
    
    return vClono    


if __name__ == '__main__':   

    start=time.time()    
    
    # Parse input arguments
    arg = sys.argv[4:]
    clono = sys.argv[1]
    output = sys.argv[2]
    threshold = sys.argv[3]
        
    # Execute basic function
    excl = exclusiveClonotypeComputation(arg, clono, threshold)
    
    # Save output to CSV files
    if not excl.empty:
        excl.to_csv(output , sep = '\t') 

    # Print execution time
    stop=time.time()
    print('Runtime:' + str(stop-start))