Mercurial > repos > chmaramis > testirprofiler

--- a/cmpb2016/comp_clono_JCDR3.py	Sun Mar 18 07:05:35 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jun 19 17:33:34 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputationJ(inp_name,out1,t10n,fname):
-
-    frame = DataFrame()
-    tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    frame = concat([chunk for chunk in tp])
-
-    grouped = frame.groupby(['J-GENE','AA JUNCTION'])
-    x=grouped.size()
-    x1=DataFrame(list(x.index), columns=['J-GENE','AA JUNCTION'])
-    x1['Reads']=x.values
-    total = sum(x1['Reads'])
-    #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
-    x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
-    x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
-    final = x1.sort_values(by = ['Reads'] , ascending = False)
-
-    final.index=range(1,len(final)+1)
-    final.to_csv(out1 , sep = '\t')
-
-    numofclono = len(final)
-    clust = len(final[final['Reads'] > 1])
-    sing = len (final[final['Reads'] == 1])
-    top10 = final[['J-GENE','AA JUNCTION','Frequency %']].head(10)
-    top10.to_csv(t10n , sep = '\t')
-
-    summary = [[str(top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
-    summary.append([top10['Frequency %'].values[0]])
-    summary.append([numofclono])
-    summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
-    summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-
-    ind = ['Dominant Clonotype (J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
-    spl = fname.split('_')
-    col = [spl[0],'%']
-
-    frsum = DataFrame(summary,index = ind, columns = col)
-
-    return frsum
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inp_name = sys.argv[1]
-    out1 = sys.argv[2]
-    t10n = sys.argv[3]
-    sname = sys.argv[4]
-    fname = sys.argv[5]
-
-    # Execute basic function
-    frsum = clonotypeComputationJ(inp_name,out1,t10n,fname)
-
-    # Save output to CSV files
-    if not frsum.empty:
-        frsum.to_csv(sname, sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_JCDR3.xml	Sun Mar 18 07:05:35 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="compClonoJCDR3" name="J+CDR3 Clonotypes Computation" version="0.9">
-  <description>Compute J+CDR3 clonotypes</description>
-  <command interpreter="python">comp_clono_JCDR3.py $input $clonos $topcl $summ2 ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filtered-in File"/>
-
-
-  </inputs>
-
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_clonotypesJCDR3"/>
-	<data name="topcl" format="tabular" 	label="${input.name}_top10clonosJCDR3"/>
-	<data name="summ2" format="tabular" 	label="${input.name}_SummaryJCDR32"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the (J-gene, CDR3) clonotypes and their frequencies.
-  </help>
-
-</tool>