# HG changeset patch
# User chmaramis
# Date 1521371208 14400
# Node ID 2669fa191052c95af9862b12afcc797ea462bd12
# Parent b5bb2e8e829c1e97177dcc0b4894d8b98478234e
Deleted selected files
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/compare_repertoire_V.py
--- a/cmpb2016/compare_repertoire_V.py Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Sep 16 12:50:43 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-sw_reads = lambda x: x.startswith('Reads')
-sw_freq = lambda x: x.startswith('Freq')
-sw_gene = lambda x: x.startswith('V')
-
-def freqtoall(inputs):
-
- mer=DataFrame()
-
- for x in range(0,len(inputs),2):
-
- ini = read_csv(inputs[x] , sep = '\t' , index_col = 0)
-
- ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True)
-
- x1 = inputs[x+1].split('_')
- ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True)
-
- if mer.empty:
- mer = DataFrame(ini)
- else:
- mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer')
-
- mer=mer.fillna(0)
- mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1)
- fr = 'mean'
-
- mer=mer.sort_values(by = fr,ascending=False)
- mer[fr] = mer[fr].map('{:.4f}'.format)
- mer.index = range(1,len(mer)+1)
-
- return mer
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inputs = sys.argv[2:]
- output = sys.argv[1]
-
- # Execute basic function
- mer = freqtoall(inputs)
-
- # Save output to CSV files
- if not mer.empty:
- mer.to_csv(output , sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/compare_repertoire_V.xml
--- a/cmpb2016/compare_repertoire_V.xml Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-
-Compare V-gene repertoires
-
-compare_repertoire_V.py "${output1}"
-#for x in $rep_files
- "$x.rpfile"
- "$x.rpfile.name"
-#end for
-
-
-
-
-
-
-
-
-
-
-This tool produces a union of all patients' V-gene repertoires and computes the mean frequency of each V-gene.
-
-
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_CDR3.py
--- a/cmpb2016/exclus_clono_CDR3.py Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 11:12:09 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-
-def exclusiveCDR3Func(inputs,thres):
-
- cdr3=DataFrame()
-
- # File A
- cl = DataFrame()
- cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- cdr3 = cl
-
- # File B
- cl = DataFrame()
- cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
- cdr3 = cdr3.merge(cl[['AA JUNCTION','ReadsB']], how='left', on='AA JUNCTION')
-
- cdr3['ReadsB'].fillna(0, inplace=True)
-
- cdr3 = cdr3[cdr3['ReadsB'] == 0]
- del cdr3['ReadsB']
-
- cdr3.index = range(1,len(cdr3)+1)
-
- return cdr3
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- threshold = sys.argv[2]
- arg = sys.argv[3:]
- output = sys.argv[1]
-
- # Execute basic function
- excl = exclusiveCDR3Func(arg,threshold)
-
- # Save output to CSV files
- if not excl.empty:
- excl.to_csv(output , sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_CDR3.xml
--- a/cmpb2016/exclus_clono_CDR3.xml Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-
-Compute Exclusive CDR3 Clonotypes
-
-exclus_clono_CDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the exclisive CDR3 clonotypes of patient or group A that are absent from patient or group B.
-
-
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_JCDR3.py
--- a/cmpb2016/exclus_clono_JCDR3.py Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 17:06:09 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-def exclusiveJclonoFunc(inputs,thres):
-
- jClono=DataFrame()
-
- # File A
- cl = DataFrame()
- cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- jClono = cl
-
- # File B
- cl = DataFrame()
- cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
- jClono = jClono.merge(cl[['J-GENE','AA JUNCTION','ReadsB']], how='left', on=['J-GENE','AA JUNCTION'])
-
- jClono['ReadsB'].fillna(0, inplace=True)
-
- jClono = jClono[jClono['ReadsB'] == 0]
- del jClono['ReadsB']
-
- jClono.index = range(1,len(jClono)+1)
-
- return jClono
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- threshold = sys.argv[2]
- arg = sys.argv[3:]
- output = sys.argv[1]
-
- # Execute basic function
- excl = exclusiveJclonoFunc(arg,threshold)
-
- # Save output to CSV files
- if not excl.empty:
- excl.to_csv(output , sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_JCDR3.xml
--- a/cmpb2016/exclus_clono_JCDR3.xml Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-
-Compute Exclusive J+CDR3 Clonotypes
-
-exclus_clono_JCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the exclisive (J-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B.
-
-
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_VCDR3.py
--- a/cmpb2016/exclus_clono_VCDR3.py Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 16:57:12 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-def exclusiveVclonoFunc(inputs,thres):
-
- vClono=DataFrame()
-
- # File A
- cl = DataFrame()
- cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- vClono = cl
-
- # File B
- cl = DataFrame()
- cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
- if (thres != 'null'):
- cl = cl[cl['Reads'] > int(thres)]
- cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
- vClono = vClono.merge(cl[['V-GENE','AA JUNCTION','ReadsB']], how='left', on=['V-GENE','AA JUNCTION'])
-
- vClono['ReadsB'].fillna(0, inplace=True)
-
- vClono = vClono[vClono['ReadsB'] == 0]
- del vClono['ReadsB']
-
- vClono.index = range(1,len(vClono)+1)
-
- return vClono
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- threshold = sys.argv[2]
- arg = sys.argv[3:]
- output = sys.argv[1]
-
- # Execute basic function
- excl = exclusiveVclonoFunc(arg,threshold)
-
- # Save output to CSV files
- if not excl.empty:
- excl.to_csv(output , sep = '\t')
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_VCDR3.xml
--- a/cmpb2016/exclus_clono_VCDR3.xml Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-
-Compute Exclusive V+CDR3 Clonotypes
-
-exclus_clono_VCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the exclisive (V-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B.
-
-
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/ext_repertoire_J.py
--- a/cmpb2016/ext_repertoire_J.py Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 20 14:58:08 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def repertoireJgComputation(inp_name, fname):
-
- df = DataFrame()
- df = read_csv(inp_name, sep='\t', index_col=0 )
- #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
- #df = concat([chunk for chunk in tp])
-
- vgroup = df.groupby(['J-GENE'])
- vdi = vgroup.size()
- rep = DataFrame(list(vdi.index), columns=['J-GENE'])
- rep['Reads'] = vdi.values
- #rep['Reads/Total'] = ['{r}/{l}'.format(r=p , l = len(df)) for p in vdi.values]
- rep['Reads/Total'] = rep['Reads'].map(ft.partial(frm, y=len(df)))
- rep['Frequency %'] = (100*rep['Reads']/len(df)).map('{:.4f}'.format)
-
- rep = rep.sort_values(by = ['Reads'] , ascending = False)
-
- rep.index = range(1,len(rep)+1)
-
- su = rep[['J-GENE','Frequency %']].head(10)
- spl = fname.split('_')
- summdf = DataFrame([su['J-GENE'].values[0],su['Frequency %'].values[0]],
- index = ['Dominant J-GENE','Frequency'], columns = [spl[0]])
- summdf['%'] = ''
-
- return (rep, summdf)
-
-
-if __name__ == '__main__':
-
- start=time.time()
-
- # Parse input arguments
- inp_name = sys.argv[1]
- outrep = sys.argv[2]
- summ_rep2 = sys.argv[3]
- fname = sys.argv[4]
-
- # Execute basic function
- rep, summdf = repertoireJgComputation(inp_name, fname)
-
- # Save output to CSV files
- if not rep.empty:
- rep.to_csv(outrep, sep = '\t')
- if not summdf.empty:
- summdf.to_csv(summ_rep2, sep = '\t')
-
-
- # Print execution time
- stop=time.time()
- print('Runtime:' + str(stop-start))
diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/ext_repertoire_J.xml
--- a/cmpb2016/ext_repertoire_J.xml Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-
- Compute repertoire of J-genes
- ext_repertoire_J.py $input $clonos $summ ${input.name}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool computes the repertoire of J-genes (i.e. , the number of clonotypes using each V-gene over the total number of clonotypes).
-
-
-