Mercurial > repos > chmaramis > testirprofiler
changeset 5:e69b5e717346 draft
Deleted selected files
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 07:05:35 -0400 |
| parents | 91cea576e152 |
| children | 6a8ecfdb9462 |
| files | cmpb2016/comp_clono_CDR3.py cmpb2016/comp_clono_CDR3.xml |
| diffstat | 2 files changed, 0 insertions(+), 97 deletions(-) [+] |
line wrap: on
line diff
--- a/cmpb2016/comp_clono_CDR3.py Sun Mar 18 07:05:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Jun 19 17:33:34 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - - -def cdr3Computation(inp_name, out1, t10n, fname): - - frame = DataFrame() - tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - frame = concat([chunk for chunk in tp]) - - grouped = frame.groupby(['AA JUNCTION']) - x=grouped.size() - x1=DataFrame(x.index, columns=['AA JUNCTION']) - x1['Reads']=x.values - total = sum(x1['Reads']) - #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']] - x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total)) - x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format) - - final = x1.sort_values(by = ['Reads'] , ascending = False) - - final.index=range(1,len(final)+1) - final.to_csv(out1 , sep = '\t') - - numofclono = len(final) - clust = len(final[final['Reads'] > 1]) - sing = len (final[final['Reads'] == 1]) - top10 = final[['AA JUNCTION','Frequency %']].head(10) - top10.to_csv(t10n , sep = '\t') - - summary = [[str(top10['AA JUNCTION'].values[0])]] - summary.append([top10['Frequency %'].values[0]]) - summary.append([numofclono]) - summary.append([clust,'{:.4f}'.format(100*clust/numofclono)]) - summary.append([sing,'{:.4f}'.format(100*sing/numofclono)]) - - ind = ['Dominant Clonotype (CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes','Singletons'] - spl = fname.split('_') - col = [spl[0],'%'] - - frsum = DataFrame(summary,index = ind, columns = col) - - return frsum - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - out1 = sys.argv[2] - t10n = sys.argv[3] - sname = sys.argv[4] - fname = sys.argv[5] - - # Execute basic function - frsum = cdr3Computation(inp_name,out1,t10n,fname) - - # Save output to CSV files - if not frsum.empty: - frsum.to_csv(sname, sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_CDR3.xml Sun Mar 18 07:05:25 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -<tool id="compClonoCDR3" name="CDR3 Clonotypes Computation" version="0.9"> - <description>Compute CDR3 clonotypes</description> - <command interpreter="python">comp_clono_CDR3.py $input $clonos $topcl $summ ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="Filtered-in File"/> - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_CDR3"/> - <data name="topcl" format="tabular" label="${input.name}_top10CDR3"/> - <data name="summ" format="tabular" label="${input.name}_Summary3"/> -</outputs> - - - <help> -This tool computes the CDR3 clonotypes. - </help> - -</tool>
