view cmpb2016/top10_CDR3_exact_pairing.py @ 1:1319c163872c draft

Uploaded README file
author chmaramis
date Sun, 18 Mar 2018 06:26:39 -0400
parents 8be019b173e6
children
line wrap: on
line source

# -*- coding: utf-8 -*-
"""
Created on Mon Apr 18 09:48:00 2016

@author: chmaramis
"""

import pandas as pd
import numpy as np
import sys


if __name__ == "__main__":
    
    clonosFN = sys.argv[1]
    outFN = sys.argv[2]
    
    Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
    T10 = Cl[:10].copy()
    
    aa_junction = np.array(T10['AA JUNCTION'])
    geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
    
    vG_unique = np.unique(Cl[geneCol])
    noVG = len(vG_unique)
    
    F = np.zeros((noVG,20))
    
    for i in range(0,10):
        taa = T10['AA JUNCTION'][i+1]
        sameAA = np.where(Cl['AA JUNCTION'] == taa)[0]+1
        if Cl[geneCol][sameAA[0]] != T10[geneCol][i+1]:
            print('We have a problem here!')
    
        # Make original Gene -1    
        #orGene = Cl['V-GENE'][sameAA[0]]
        #orGeneUn = np.where(vG_unique == orGene)[0][0]
        #F[orGeneUn,i] = -1
        
        # Other Genes
        for j in range(0,len(sameAA)):
            othGene = Cl[geneCol][sameAA[j]]
            othRead = Cl['Reads'][sameAA[j]]
            othFreq = Cl['Frequency %'][sameAA[j]]
            orGeneUn = np.where(vG_unique == othGene)[0][0]
            F[orGeneUn,2*i] += othRead
            F[orGeneUn,2*i+1] += othFreq
         
         
    K = list(aa_junction+' Reads') 
    L = list(aa_junction+' Freq. %')
    columns = [val for pair in zip(K,L) for val in pair]
    
    D = pd.DataFrame(F,columns=columns,index=vG_unique)
    D.to_csv(outFN,sep='\t')