Mercurial > repos > chmaramis > testirprofiler
changeset 6:6a8ecfdb9462 draft
Deleted selected files
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 07:06:18 -0400 |
| parents | e69b5e717346 |
| children | b5bb2e8e829c |
| files | cmpb2016/comp_clono_JCDR3.py cmpb2016/comp_clono_JCDR3.xml |
| diffstat | 2 files changed, 0 insertions(+), 103 deletions(-) [+] |
line wrap: on
line diff
--- a/cmpb2016/comp_clono_JCDR3.py Sun Mar 18 07:05:35 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Jun 19 17:33:34 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def clonotypeComputationJ(inp_name,out1,t10n,fname): - - frame = DataFrame() - tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - frame = concat([chunk for chunk in tp]) - - grouped = frame.groupby(['J-GENE','AA JUNCTION']) - x=grouped.size() - x1=DataFrame(list(x.index), columns=['J-GENE','AA JUNCTION']) - x1['Reads']=x.values - total = sum(x1['Reads']) - #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']] - x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total)) - x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format) - - final = x1.sort_values(by = ['Reads'] , ascending = False) - - final.index=range(1,len(final)+1) - final.to_csv(out1 , sep = '\t') - - numofclono = len(final) - clust = len(final[final['Reads'] > 1]) - sing = len (final[final['Reads'] == 1]) - top10 = final[['J-GENE','AA JUNCTION','Frequency %']].head(10) - top10.to_csv(t10n , sep = '\t') - - summary = [[str(top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]] - summary.append([top10['Frequency %'].values[0]]) - summary.append([numofclono]) - summary.append([clust,'{:.4f}'.format(100*clust/numofclono)]) - summary.append([sing,'{:.4f}'.format(100*sing/numofclono)]) - - - ind = ['Dominant Clonotype (J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons'] - spl = fname.split('_') - col = [spl[0],'%'] - - frsum = DataFrame(summary,index = ind, columns = col) - - return frsum - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - out1 = sys.argv[2] - t10n = sys.argv[3] - sname = sys.argv[4] - fname = sys.argv[5] - - # Execute basic function - frsum = clonotypeComputationJ(inp_name,out1,t10n,fname) - - # Save output to CSV files - if not frsum.empty: - frsum.to_csv(sname, sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_JCDR3.xml Sun Mar 18 07:05:35 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="compClonoJCDR3" name="J+CDR3 Clonotypes Computation" version="0.9"> - <description>Compute J+CDR3 clonotypes</description> - <command interpreter="python">comp_clono_JCDR3.py $input $clonos $topcl $summ2 ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="Filtered-in File"/> - - - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_clonotypesJCDR3"/> - <data name="topcl" format="tabular" label="${input.name}_top10clonosJCDR3"/> - <data name="summ2" format="tabular" label="${input.name}_SummaryJCDR32"/> - - - - </outputs> - - - <help> -This tool computes the (J-gene, CDR3) clonotypes and their frequencies. - </help> - -</tool>
