view cmpb2016/exclus_clono_JCDR3.py @ 0:8be019b173e6 draft

Uploaded included tools
author chmaramis
date Sun, 18 Mar 2018 05:54:20 -0400
parents
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
Created on Mon Feb 29 17:06:09 2016

@author: chmaramis
"""

from __future__ import division
import numpy as np
from pandas import *
from numpy import nan as NA
import sys
import time


def exclusiveJclonoFunc(inputs,thres):

    jClono=DataFrame()
    
    # File A
    cl = DataFrame()
    cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
    if (thres != 'null'):
                cl = cl[cl['Reads'] > int(thres)]
    jClono = cl
    
    # File B
    cl = DataFrame()
    cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
    if (thres != 'null'):
                cl = cl[cl['Reads'] > int(thres)]
    cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
    jClono = jClono.merge(cl[['J-GENE','AA JUNCTION','ReadsB']], how='left', on=['J-GENE','AA JUNCTION'])
    
    jClono['ReadsB'].fillna(0, inplace=True)
        
    jClono = jClono[jClono['ReadsB'] == 0]
    del jClono['ReadsB']
    
    jClono.index = range(1,len(jClono)+1)
    
    return jClono  


if __name__ == '__main__':   

    start=time.time()    
    
    # Parse input arguments
    threshold = sys.argv[2]
    arg = sys.argv[3:]
    output = sys.argv[1]
        
    # Execute basic function
    excl = exclusiveJclonoFunc(arg,threshold)
    
    # Save output to CSV files
    if not excl.empty:
        excl.to_csv(output , sep = '\t') 

    # Print execution time
    stop=time.time()
    print('Runtime:' + str(stop-start))