view gene_comparison.py @ 0:0e37e5b73273 draft

Initial commit
author chmaramis
date Fri, 30 Mar 2018 07:22:29 -0400
parents
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 24 17:45:09 2018

@author: chmaramis
"""

from __future__ import division
import numpy as np
from pandas import *
from numpy import nan as NA
import sys
import time

sw_clonos = lambda x: x.startswith('Clonotypes')
sw_freq = lambda x: x.startswith('Freq')
sw_gene = lambda x: x.endswith('GENE')

def geneComparison(inputs):

    mer=DataFrame()
    
    for x in range(0,len(inputs),2):
        
            ini = read_csv(inputs[x] , sep = '\t' , index_col = 0)
            
            ini.drop(ini.columns[np.where(ini.columns.map(sw_clonos))[0]], axis=1, inplace=True)
            
            x1 = inputs[x+1].split('_')
            ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True)
            
            if mer.empty:
                mer = DataFrame(ini)
            else:
                mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer')
            
    mer=mer.fillna(0)
    mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1)
    fr = 'mean'

    mer=mer.sort_values(by = fr,ascending=False)
    mer[fr] = mer[fr].map('{:.4f}'.format)
    mer.index = range(1,len(mer)+1)
    
    return mer


if __name__ == '__main__':   

    start=time.time()

    # Parse input arguments    
    inputs = sys.argv[2:]
    output = sys.argv[1]
            
    # Execute basic function
    mer = geneComparison(inputs)
    
    # Save output to CSV files
    if not mer.empty: 
        mer.to_csv(output , sep = '\t')  
        
    # Print execution time
    stop=time.time()
    print('Runtime:' + str(stop-start))