# HG changeset patch
# User chmaramis
# Date 1521371194 14400
# Node ID b5bb2e8e829c1e97177dcc0b4894d8b98478234e
# Parent 6a8ecfdb9462715d7bae43ec2646b06e732d7302
Deleted selected files
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VCDR3.py
--- a/cmpb2016/comp_clono_VCDR3.py Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jun 19 17:33:34 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputation(inp_name, out1, t10n, fname):
-
- frame = DataFrame()
- tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
- frame = concat([chunk for chunk in tp])
-
-
- grouped = frame.groupby(['V-GENE','AA JUNCTION'])
- x=grouped.size()
- x1=DataFrame(list(x.index), columns=['V-GENE','AA JUNCTION'])
- x1['Reads']=x.values
- total = sum(x1['Reads'])
- #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
- x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
- x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
- final = x1.sort_values(by = ['Reads'] , ascending = False)
-
- final.index=range(1,len(final)+1)
- final.to_csv(out1 , sep = '\t')
-
- numofclono = len(final)
- clust = len(final[final['Reads'] > 1])
- sing = len (final[final['Reads'] == 1])
- top10 = final[['V-GENE','AA JUNCTION','Frequency %']].head(10)
- top10.to_csv(t10n , sep = '\t')
-
- summary = [[str(top10['V-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
- summary.append([top10['Frequency %'].values[0]])
- summary.append([numofclono])
- summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
- summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
- ind = ['Dominant Clonotype (V+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
- spl = fname.split('_')
- col = [spl[0],'%']
-
- frsum = DataFrame(summary,index = ind, columns = col)
-
- return frsum
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inp_name = sys.argv[1]
- out1 = sys.argv[2]
- t10n = sys.argv[3]
- sname = sys.argv[4]
- fname = sys.argv[5]
-
- # Execute basic function
- frsum = clonotypeComputation(inp_name,out1,t10n,fname)
-
- # Save output to CSV files
- if not frsum.empty:
- frsum.to_csv(sname, sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VCDR3.xml
--- a/cmpb2016/comp_clono_VCDR3.xml Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-
- Compute V+CDR3 clonotypes
- comp_clono_VCDR3.py $input $clonos $topcl $summ2 ${input.name}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the (V-gene, CDR3) clonotypes and their frequencies.
-
-
-
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VDJCDR3.py
--- a/cmpb2016/comp_clono_VDJCDR3.py Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Dec 3 14:54:00 2015
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputationVDJ(inp_name,out1,t10n,fname):
-
- frame = DataFrame()
- tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
- frame = concat([chunk for chunk in tp])
-
- grouped = frame.groupby(['V-GENE','D-GENE','J-GENE','AA JUNCTION'])
- x=grouped.size()
- x1=DataFrame(list(x.index), columns=['V-GENE','D-GENE','J-GENE','AA JUNCTION'])
- x1['Reads']=x.values
- total = sum(x1['Reads'])
- #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
- x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
- x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
- final = x1.sort_values(by = ['Reads'] , ascending = False)
- #final = x1.sort_values(by = ['Reads'] , ascending = False)
-
- final.index=range(1,len(final)+1)
- final.to_csv(out1 , sep = '\t')
-
- numofclono = len(final)
- clust = len(final[final['Reads'] > 1])
- sing = len (final[final['Reads'] == 1])
- top10 = final[['V-GENE','D-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10)
- top10.to_csv(t10n , sep = '\t')
-
- summary = [[str(top10['V-GENE'].values[0]+','+top10['D-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
- summary.append([top10['Frequency %'].values[0]])
- summary.append([numofclono])
- summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
- summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-
- ind = ['Dominant Clonotype (V+D+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
- spl = fname.split('_')
- col = [spl[0],'%']
-
- frsum = DataFrame(summary,index = ind, columns = col)
-
- return frsum
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inp_name = sys.argv[1]
- out1 = sys.argv[2]
- t10n = sys.argv[3]
- sname = sys.argv[4]
- fname = sys.argv[5]
-
- # Execute basic function
- frsum = clonotypeComputationVDJ(inp_name,out1,t10n,fname)
-
- # Save output to CSV files
- if not frsum.empty:
- frsum.to_csv(sname, sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VDJCDR3.xml
--- a/cmpb2016/comp_clono_VDJCDR3.xml Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-
- Compute V+D+J+CDR3 clonotypes
- comp_clono_VDJCDR3.py $input $clonos $topcl $summ2 ${input.name}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the (V-gene, D-gene, J-gene, CDR3) clonotypes and their frequencies.
-
-
-
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VJCDR3.py
--- a/cmpb2016/comp_clono_VJCDR3.py Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 23 17:33:34 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def clonotypeComputationVJ(inp_name,out1,t10n,fname):
-
- frame = DataFrame()
- tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
- frame = concat([chunk for chunk in tp])
-
- grouped = frame.groupby(['V-GENE','J-GENE','AA JUNCTION'])
- x=grouped.size()
- x1=DataFrame(list(x.index), columns=['V-GENE','J-GENE','AA JUNCTION'])
- x1['Reads']=x.values
- total = sum(x1['Reads'])
- #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
- x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
- x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
-
- final = x1.sort_values(by = ['Reads'] , ascending = False)
- #final = x1.sort_values(by = ['Reads'] , ascending = False)
-
- final.index= range(1,len(final)+1)
- final.to_csv(out1 , sep = '\t')
-
- numofclono = len(final)
- clust = len(final[final['Reads'] > 1])
- sing = len (final[final['Reads'] == 1])
- top10 = final[['V-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10)
- top10.to_csv(t10n , sep = '\t')
-
- summary = [[str(top10['V-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]]
- summary.append([top10['Frequency %'].values[0]])
- summary.append([numofclono])
- summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
- summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
-
-
- ind = ['Dominant Clonotype (V+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons']
- spl = fname.split('_')
- col = [spl[0],'%']
-
- frsum = DataFrame(summary,index = ind, columns = col)
-
- return frsum
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inp_name = sys.argv[1]
- out1 = sys.argv[2]
- t10n = sys.argv[3]
- sname = sys.argv[4]
- fname = sys.argv[5]
-
- # Execute basic function
- frsum = clonotypeComputationVJ(inp_name,out1,t10n,fname)
-
- # Save output to CSV files
- if not frsum.empty:
- frsum.to_csv(sname, sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/comp_clono_VJCDR3.xml
--- a/cmpb2016/comp_clono_VJCDR3.xml Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-
- Compute V+J+CDR3 clonotypes
- comp_clono_VJCDR3.py $input $clonos $topcl $summ2 ${input.name}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the (V-gene, J-gene, CDR3) clonotypes and their frequencies.
-
-
-
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/compare_repertoire_J.py
--- a/cmpb2016/compare_repertoire_J.py Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 10:18:39 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-sw_reads = lambda x: x.startswith('Reads')
-sw_freq = lambda x: x.startswith('Freq')
-sw_gene = lambda x: x.startswith('J')
-
-def freqtoall(inputs):
-
- mer=DataFrame()
-
- for x in range(0,len(inputs),2):
-
- ini = read_csv(inputs[x] , sep = '\t' , index_col = 0)
-
- ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True)
-
- x1 = inputs[x+1].split('_')
- ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True)
-
- if mer.empty:
- mer = DataFrame(ini)
- else:
- mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer')
-
- mer=mer.fillna(0)
- mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1)
- fr = 'mean'
-
- mer=mer.sort_values(by = fr,ascending=False)
- mer[fr] = mer[fr].map('{:.4f}'.format)
- mer.index = range(1,len(mer)+1)
-
- return mer
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inputs = sys.argv[2:]
- output = sys.argv[1]
-
- # Execute basic function
- mer = freqtoall(inputs)
-
- # Save output to CSV files
- if not mer.empty:
- mer.to_csv(output , sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r 6a8ecfdb9462 -r b5bb2e8e829c cmpb2016/compare_repertoire_J.xml
--- a/cmpb2016/compare_repertoire_J.xml Sun Mar 18 07:06:18 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-
-Compare J-gene repertoires
-
-compare_repertoire_J.py "${output1}"
-#for x in $rep_files
- "$x.rpfile"
- "$x.rpfile.name"
-#end for
-
-
-
-
-
-
-
-
-
-
-This tool produces a union of all patients' J-gene repertoires and computes the mean frequency of each J-gene.
-
-