Mercurial > repos > chmaramis > testirprofiler

--- a/cmpb2016/comp_clono_VCDR3.py	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jun 19 17:33:34 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputation(inp_name, out1, t10n, fname):
-
-    frame = DataFrame()
-    tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    frame = concat([chunk for chunk in tp])
-
-
-    grouped = frame.groupby(['V-GENE','AA JUNCTION'])
-    x=grouped.size()
-    x1=DataFrame(list(x.index), columns=['V-GENE','AA JUNCTION'])
-    x1['Reads']=x.values
-    total = sum(x1['Reads'])
-    #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
-    x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
-    x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
-    final = x1.sort_values(by = ['Reads'] , ascending = False)
-
-    final.index=range(1,len(final)+1)
-    final.to_csv(out1 , sep = '\t')
-
-    numofclono = len(final)
-    clust = len(final[final['Reads'] > 1])
-    sing = len (final[final['Reads'] == 1])
-    top10 = final[['V-GENE','AA JUNCTION','Frequency %']].head(10)
-    top10.to_csv(t10n , sep = '\t')
-
-    summary = [[str(top10['V-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
-    summary.append([top10['Frequency %'].values[0]])
-    summary.append([numofclono])
-    summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
-    summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-    ind = ['Dominant Clonotype (V+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
-    spl = fname.split('_')
-    col = [spl[0],'%']
-
-    frsum = DataFrame(summary,index = ind, columns = col)
-
-    return frsum
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inp_name = sys.argv[1]
-    out1 = sys.argv[2]
-    t10n = sys.argv[3]
-    sname = sys.argv[4]
-    fname = sys.argv[5]
-
-    # Execute basic function
-    frsum = clonotypeComputation(inp_name,out1,t10n,fname)
-
-    # Save output to CSV files
-    if not frsum.empty:
-        frsum.to_csv(sname, sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VCDR3.xml	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="compClonoVCDR3" name="V+CDR3 Clonotypes Computation" version="0.9">
-  <description>Compute V+CDR3 clonotypes</description>
-  <command interpreter="python">comp_clono_VCDR3.py $input $clonos $topcl $summ2 ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filtered-in File"/>
-
-
-  </inputs>
-
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_clonotypes"/>
-	<data name="topcl" format="tabular" 	label="${input.name}_top10clonos"/>
-	<data name="summ2" format="tabular" 	label="${input.name}_Summary2"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the (V-gene, CDR3) clonotypes and their frequencies.
-  </help>
-
-</tool>
--- a/cmpb2016/comp_clono_VDJCDR3.py	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Dec  3 14:54:00 2015
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputationVDJ(inp_name,out1,t10n,fname):
-
-    frame = DataFrame()
-    tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    frame = concat([chunk for chunk in tp])
-
-    grouped = frame.groupby(['V-GENE','D-GENE','J-GENE','AA JUNCTION'])
-    x=grouped.size()
-    x1=DataFrame(list(x.index), columns=['V-GENE','D-GENE','J-GENE','AA JUNCTION'])
-    x1['Reads']=x.values
-    total = sum(x1['Reads'])
-    #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
-    x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
-    x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
-    final = x1.sort_values(by = ['Reads'] , ascending = False)
-    #final = x1.sort_values(by = ['Reads'] , ascending = False)
-
-    final.index=range(1,len(final)+1)
-    final.to_csv(out1 , sep = '\t')
-
-    numofclono = len(final)
-    clust = len(final[final['Reads'] > 1])
-    sing = len (final[final['Reads'] == 1])
-    top10 = final[['V-GENE','D-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10)
-    top10.to_csv(t10n , sep = '\t')
-
-    summary = [[str(top10['V-GENE'].values[0]+','+top10['D-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
-    summary.append([top10['Frequency %'].values[0]])
-    summary.append([numofclono])
-    summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
-    summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-
-    ind = ['Dominant Clonotype (V+D+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
-    spl = fname.split('_')
-    col = [spl[0],'%']
-
-    frsum = DataFrame(summary,index = ind, columns = col)
-
-    return frsum
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inp_name = sys.argv[1]
-    out1 = sys.argv[2]
-    t10n = sys.argv[3]
-    sname = sys.argv[4]
-    fname = sys.argv[5]
-
-    # Execute basic function
-    frsum = clonotypeComputationVDJ(inp_name,out1,t10n,fname)
-
-    # Save output to CSV files
-    if not frsum.empty:
-        frsum.to_csv(sname, sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VDJCDR3.xml	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="compClonoVDJCDR3" name="V+D+J+CDR3 Clonotypes Computation" version="0.9">
-  <description>Compute V+D+J+CDR3 clonotypes</description>
-  <command interpreter="python">comp_clono_VDJCDR3.py $input $clonos $topcl $summ2 ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filtered-in File"/>
-
-
-  </inputs>
-
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_clonotypesVDJCDR3"/>
-	<data name="topcl" format="tabular" 	label="${input.name}_top10clonosVDJCDR3"/>
-	<data name="summ2" format="tabular" 	label="${input.name}_SummaryVDJCDR3"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the (V-gene, D-gene, J-gene, CDR3) clonotypes and their frequencies.
-  </help>
-
-</tool>
--- a/cmpb2016/comp_clono_VJCDR3.py	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 23 17:33:34 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputationVJ(inp_name,out1,t10n,fname):
-
-    frame = DataFrame()
-    tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    frame = concat([chunk for chunk in tp])
-
-    grouped = frame.groupby(['V-GENE','J-GENE','AA JUNCTION'])
-    x=grouped.size()
-    x1=DataFrame(list(x.index), columns=['V-GENE','J-GENE','AA JUNCTION'])
-    x1['Reads']=x.values
-    total = sum(x1['Reads'])
-    #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
-    x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
-    x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
-    final = x1.sort_values(by = ['Reads'] , ascending = False)
-    #final = x1.sort_values(by = ['Reads'] , ascending = False)
-
-    final.index= range(1,len(final)+1)
-    final.to_csv(out1 , sep = '\t')
-
-    numofclono = len(final)
-    clust = len(final[final['Reads'] > 1])
-    sing = len (final[final['Reads'] == 1])
-    top10 = final[['V-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10)
-    top10.to_csv(t10n , sep = '\t')
-
-    summary = [[str(top10['V-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
-    summary.append([top10['Frequency %'].values[0]])
-    summary.append([numofclono])
-    summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
-    summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-
-    ind = ['Dominant Clonotype (V+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
-    spl = fname.split('_')
-    col = [spl[0],'%']
-
-    frsum = DataFrame(summary,index = ind, columns = col)
-
-    return frsum
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inp_name = sys.argv[1]
-    out1 = sys.argv[2]
-    t10n = sys.argv[3]
-    sname = sys.argv[4]
-    fname = sys.argv[5]
-
-    # Execute basic function
-    frsum = clonotypeComputationVJ(inp_name,out1,t10n,fname)
-
-    # Save output to CSV files
-    if not frsum.empty:
-        frsum.to_csv(sname, sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VJCDR3.xml	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="compClonoVJCDR3" name="V+J+CDR3 Clonotypes Computation" version="0.9">
-  <description>Compute V+J+CDR3 clonotypes</description>
-  <command interpreter="python">comp_clono_VJCDR3.py $input $clonos $topcl $summ2 ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="Filtered-in File"/>
-
-
-  </inputs>
-
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_clonotypesVJCDR3"/>
-	<data name="topcl" format="tabular" 	label="${input.name}_top10clonosVJCDR3"/>
-	<data name="summ2" format="tabular" 	label="${input.name}_SummaryVJCDR3"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the (V-gene, J-gene, CDR3) clonotypes and their frequencies.
-  </help>
-
-</tool>
--- a/cmpb2016/compare_repertoire_J.py	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 10:18:39 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-sw_reads = lambda x: x.startswith('Reads')
-sw_freq = lambda x: x.startswith('Freq')
-sw_gene = lambda x: x.startswith('J')
-
-def freqtoall(inputs):
-
-    mer=DataFrame()
-
-    for x in range(0,len(inputs),2):
-
-            ini = read_csv(inputs[x] , sep = '\t' , index_col = 0)
-
-            ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True)
-
-            x1 = inputs[x+1].split('_')
-            ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True)
-
-            if mer.empty:
-                mer = DataFrame(ini)
-            else:
-                mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer')
-
-    mer=mer.fillna(0)
-    mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1)
-    fr = 'mean'
-
-    mer=mer.sort_values(by = fr,ascending=False)
-    mer[fr] = mer[fr].map('{:.4f}'.format)
-    mer.index = range(1,len(mer)+1)
-
-    return mer
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inputs = sys.argv[2:]
-    output = sys.argv[1]
-
-    # Execute basic function
-    mer = freqtoall(inputs)
-
-    # Save output to CSV files
-    if not mer.empty:
-        mer.to_csv(output , sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/compare_repertoire_J.xml	Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-<tool id="compRepJ" name="J-Gene Repertoire Comparison" version="0.9">
-<description>Compare J-gene repertoires</description>
-<command interpreter="python">
-compare_repertoire_J.py  "${output1}"
-#for x in $rep_files
- "$x.rpfile"
- "$x.rpfile.name"
-#end for
-</command>
-<inputs>
-<repeat name="rep_files" title="Patient" min="2">
-<param name="rpfile" type="data" label="File of J-gene repertoire" format="tabular"/>
-</repeat>
-</inputs>
-<outputs>
-<data format="tabular" name="output1" label="File_Comparing_repertoire"/>
-</outputs>
-<help>
-This tool produces a union of all patients' J-gene repertoires and computes the mean frequency of each J-gene.
-</help>
-</tool>