# HG changeset patch # User chmaramis # Date 1521371208 14400 # Node ID 2669fa191052c95af9862b12afcc797ea462bd12 # Parent b5bb2e8e829c1e97177dcc0b4894d8b98478234e Deleted selected files diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/compare_repertoire_V.py --- a/cmpb2016/compare_repertoire_V.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Sep 16 12:50:43 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - -sw_reads = lambda x: x.startswith('Reads') -sw_freq = lambda x: x.startswith('Freq') -sw_gene = lambda x: x.startswith('V') - -def freqtoall(inputs): - - mer=DataFrame() - - for x in range(0,len(inputs),2): - - ini = read_csv(inputs[x] , sep = '\t' , index_col = 0) - - ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True) - - x1 = inputs[x+1].split('_') - ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True) - - if mer.empty: - mer = DataFrame(ini) - else: - mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer') - - mer=mer.fillna(0) - mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1) - fr = 'mean' - - mer=mer.sort_values(by = fr,ascending=False) - mer[fr] = mer[fr].map('{:.4f}'.format) - mer.index = range(1,len(mer)+1) - - return mer - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inputs = sys.argv[2:] - output = sys.argv[1] - - # Execute basic function - mer = freqtoall(inputs) - - # Save output to CSV files - if not mer.empty: - mer.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start)) diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/compare_repertoire_V.xml --- a/cmpb2016/compare_repertoire_V.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ - -Compare V-gene repertoires - -compare_repertoire_V.py "${output1}" -#for x in $rep_files - "$x.rpfile" - "$x.rpfile.name" -#end for - - - - - - - - - - -This tool produces a union of all patients' V-gene repertoires and computes the mean frequency of each V-gene. - - diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_CDR3.py --- a/cmpb2016/exclus_clono_CDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 11:12:09 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - - -def exclusiveCDR3Func(inputs,thres): - - cdr3=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cdr3 = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - cdr3 = cdr3.merge(cl[['AA JUNCTION','ReadsB']], how='left', on='AA JUNCTION') - - cdr3['ReadsB'].fillna(0, inplace=True) - - cdr3 = cdr3[cdr3['ReadsB'] == 0] - del cdr3['ReadsB'] - - cdr3.index = range(1,len(cdr3)+1) - - return cdr3 - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveCDR3Func(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start)) diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_CDR3.xml --- a/cmpb2016/exclus_clono_CDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - -Compute Exclusive CDR3 Clonotypes - -exclus_clono_CDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool computes the exclisive CDR3 clonotypes of patient or group A that are absent from patient or group B. - - diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_JCDR3.py --- a/cmpb2016/exclus_clono_JCDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 17:06:09 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - -def exclusiveJclonoFunc(inputs,thres): - - jClono=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - jClono = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - jClono = jClono.merge(cl[['J-GENE','AA JUNCTION','ReadsB']], how='left', on=['J-GENE','AA JUNCTION']) - - jClono['ReadsB'].fillna(0, inplace=True) - - jClono = jClono[jClono['ReadsB'] == 0] - del jClono['ReadsB'] - - jClono.index = range(1,len(jClono)+1) - - return jClono - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveJclonoFunc(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start)) diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_JCDR3.xml --- a/cmpb2016/exclus_clono_JCDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - -Compute Exclusive J+CDR3 Clonotypes - -exclus_clono_JCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool computes the exclisive (J-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B. - - diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_VCDR3.py --- a/cmpb2016/exclus_clono_VCDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 16:57:12 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - -def exclusiveVclonoFunc(inputs,thres): - - vClono=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - vClono = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - vClono = vClono.merge(cl[['V-GENE','AA JUNCTION','ReadsB']], how='left', on=['V-GENE','AA JUNCTION']) - - vClono['ReadsB'].fillna(0, inplace=True) - - vClono = vClono[vClono['ReadsB'] == 0] - del vClono['ReadsB'] - - vClono.index = range(1,len(vClono)+1) - - return vClono - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveVclonoFunc(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start)) diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/exclus_clono_VCDR3.xml --- a/cmpb2016/exclus_clono_VCDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - -Compute Exclusive V+CDR3 Clonotypes - -exclus_clono_VCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" - - - - - - - - - - - - - - - - - - - - - - - - - - -This tool computes the exclisive (V-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B. - - diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/ext_repertoire_J.py --- a/cmpb2016/ext_repertoire_J.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jun 20 14:58:08 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def repertoireJgComputation(inp_name, fname): - - df = DataFrame() - df = read_csv(inp_name, sep='\t', index_col=0 ) - #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - #df = concat([chunk for chunk in tp]) - - vgroup = df.groupby(['J-GENE']) - vdi = vgroup.size() - rep = DataFrame(list(vdi.index), columns=['J-GENE']) - rep['Reads'] = vdi.values - #rep['Reads/Total'] = ['{r}/{l}'.format(r=p , l = len(df)) for p in vdi.values] - rep['Reads/Total'] = rep['Reads'].map(ft.partial(frm, y=len(df))) - rep['Frequency %'] = (100*rep['Reads']/len(df)).map('{:.4f}'.format) - - rep = rep.sort_values(by = ['Reads'] , ascending = False) - - rep.index = range(1,len(rep)+1) - - su = rep[['J-GENE','Frequency %']].head(10) - spl = fname.split('_') - summdf = DataFrame([su['J-GENE'].values[0],su['Frequency %'].values[0]], - index = ['Dominant J-GENE','Frequency'], columns = [spl[0]]) - summdf['%'] = '' - - return (rep, summdf) - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - outrep = sys.argv[2] - summ_rep2 = sys.argv[3] - fname = sys.argv[4] - - # Execute basic function - rep, summdf = repertoireJgComputation(inp_name, fname) - - # Save output to CSV files - if not rep.empty: - rep.to_csv(outrep, sep = '\t') - if not summdf.empty: - summdf.to_csv(summ_rep2, sep = '\t') - - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start)) diff -r b5bb2e8e829c -r 2669fa191052 cmpb2016/ext_repertoire_J.xml --- a/cmpb2016/ext_repertoire_J.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ - - Compute repertoire of J-genes - ext_repertoire_J.py $input $clonos $summ ${input.name} - - - - - - - - - - - - - - - - - -This tool computes the repertoire of J-genes (i.e. , the number of clonotypes using each V-gene over the total number of clonotypes). - - -