Mercurial > repos > chmaramis > testirprofiler
changeset 7:b5bb2e8e829c draft
Deleted selected files
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 07:06:34 -0400 |
| parents | 6a8ecfdb9462 |
| children | 2669fa191052 |
| files | cmpb2016/comp_clono_VCDR3.py cmpb2016/comp_clono_VCDR3.xml cmpb2016/comp_clono_VDJCDR3.py cmpb2016/comp_clono_VDJCDR3.xml cmpb2016/comp_clono_VJCDR3.py cmpb2016/comp_clono_VJCDR3.xml cmpb2016/compare_repertoire_J.py cmpb2016/compare_repertoire_J.xml |
| diffstat | 8 files changed, 0 insertions(+), 395 deletions(-) [+] |
line wrap: on
line diff
--- a/cmpb2016/comp_clono_VCDR3.py Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Jun 19 17:33:34 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def clonotypeComputation(inp_name, out1, t10n, fname): - - frame = DataFrame() - tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - frame = concat([chunk for chunk in tp]) - - - grouped = frame.groupby(['V-GENE','AA JUNCTION']) - x=grouped.size() - x1=DataFrame(list(x.index), columns=['V-GENE','AA JUNCTION']) - x1['Reads']=x.values - total = sum(x1['Reads']) - #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']] - x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total)) - x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format) - - final = x1.sort_values(by = ['Reads'] , ascending = False) - - final.index=range(1,len(final)+1) - final.to_csv(out1 , sep = '\t') - - numofclono = len(final) - clust = len(final[final['Reads'] > 1]) - sing = len (final[final['Reads'] == 1]) - top10 = final[['V-GENE','AA JUNCTION','Frequency %']].head(10) - top10.to_csv(t10n , sep = '\t') - - summary = [[str(top10['V-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]] - summary.append([top10['Frequency %'].values[0]]) - summary.append([numofclono]) - summary.append([clust,'{:.4f}'.format(100*clust/numofclono)]) - summary.append([sing,'{:.4f}'.format(100*sing/numofclono)]) - - ind = ['Dominant Clonotype (V+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons'] - spl = fname.split('_') - col = [spl[0],'%'] - - frsum = DataFrame(summary,index = ind, columns = col) - - return frsum - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - out1 = sys.argv[2] - t10n = sys.argv[3] - sname = sys.argv[4] - fname = sys.argv[5] - - # Execute basic function - frsum = clonotypeComputation(inp_name,out1,t10n,fname) - - # Save output to CSV files - if not frsum.empty: - frsum.to_csv(sname, sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VCDR3.xml Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="compClonoVCDR3" name="V+CDR3 Clonotypes Computation" version="0.9"> - <description>Compute V+CDR3 clonotypes</description> - <command interpreter="python">comp_clono_VCDR3.py $input $clonos $topcl $summ2 ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="Filtered-in File"/> - - - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_clonotypes"/> - <data name="topcl" format="tabular" label="${input.name}_top10clonos"/> - <data name="summ2" format="tabular" label="${input.name}_Summary2"/> - - - - </outputs> - - - <help> -This tool computes the (V-gene, CDR3) clonotypes and their frequencies. - </help> - -</tool>
--- a/cmpb2016/comp_clono_VDJCDR3.py Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Dec 3 14:54:00 2015 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def clonotypeComputationVDJ(inp_name,out1,t10n,fname): - - frame = DataFrame() - tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - frame = concat([chunk for chunk in tp]) - - grouped = frame.groupby(['V-GENE','D-GENE','J-GENE','AA JUNCTION']) - x=grouped.size() - x1=DataFrame(list(x.index), columns=['V-GENE','D-GENE','J-GENE','AA JUNCTION']) - x1['Reads']=x.values - total = sum(x1['Reads']) - #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']] - x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total)) - x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format) - - final = x1.sort_values(by = ['Reads'] , ascending = False) - #final = x1.sort_values(by = ['Reads'] , ascending = False) - - final.index=range(1,len(final)+1) - final.to_csv(out1 , sep = '\t') - - numofclono = len(final) - clust = len(final[final['Reads'] > 1]) - sing = len (final[final['Reads'] == 1]) - top10 = final[['V-GENE','D-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10) - top10.to_csv(t10n , sep = '\t') - - summary = [[str(top10['V-GENE'].values[0]+','+top10['D-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]] - summary.append([top10['Frequency %'].values[0]]) - summary.append([numofclono]) - summary.append([clust,'{:.4f}'.format(100*clust/numofclono)]) - summary.append([sing,'{:.4f}'.format(100*sing/numofclono)]) - - - ind = ['Dominant Clonotype (V+D+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons'] - spl = fname.split('_') - col = [spl[0],'%'] - - frsum = DataFrame(summary,index = ind, columns = col) - - return frsum - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - out1 = sys.argv[2] - t10n = sys.argv[3] - sname = sys.argv[4] - fname = sys.argv[5] - - # Execute basic function - frsum = clonotypeComputationVDJ(inp_name,out1,t10n,fname) - - # Save output to CSV files - if not frsum.empty: - frsum.to_csv(sname, sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VDJCDR3.xml Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="compClonoVDJCDR3" name="V+D+J+CDR3 Clonotypes Computation" version="0.9"> - <description>Compute V+D+J+CDR3 clonotypes</description> - <command interpreter="python">comp_clono_VDJCDR3.py $input $clonos $topcl $summ2 ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="Filtered-in File"/> - - - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_clonotypesVDJCDR3"/> - <data name="topcl" format="tabular" label="${input.name}_top10clonosVDJCDR3"/> - <data name="summ2" format="tabular" label="${input.name}_SummaryVDJCDR3"/> - - - - </outputs> - - - <help> -This tool computes the (V-gene, D-gene, J-gene, CDR3) clonotypes and their frequencies. - </help> - -</tool>
--- a/cmpb2016/comp_clono_VJCDR3.py Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Oct 23 17:33:34 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def clonotypeComputationVJ(inp_name,out1,t10n,fname): - - frame = DataFrame() - tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - frame = concat([chunk for chunk in tp]) - - grouped = frame.groupby(['V-GENE','J-GENE','AA JUNCTION']) - x=grouped.size() - x1=DataFrame(list(x.index), columns=['V-GENE','J-GENE','AA JUNCTION']) - x1['Reads']=x.values - total = sum(x1['Reads']) - #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']] - x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total)) - x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format) - - final = x1.sort_values(by = ['Reads'] , ascending = False) - #final = x1.sort_values(by = ['Reads'] , ascending = False) - - final.index= range(1,len(final)+1) - final.to_csv(out1 , sep = '\t') - - numofclono = len(final) - clust = len(final[final['Reads'] > 1]) - sing = len (final[final['Reads'] == 1]) - top10 = final[['V-GENE','J-GENE','AA JUNCTION','Frequency %']].head(10) - top10.to_csv(t10n , sep = '\t') - - summary = [[str(top10['V-GENE'].values[0]+','+top10['J-GENE'].values[0]+','+top10['AA JUNCTION'].values[0])]] - summary.append([top10['Frequency %'].values[0]]) - summary.append([numofclono]) - summary.append([clust,'{:.4f}'.format(100*clust/numofclono)]) - summary.append([sing,'{:.4f}'.format(100*sing/numofclono)]) - - - ind = ['Dominant Clonotype (V+J+CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes', 'Singletons'] - spl = fname.split('_') - col = [spl[0],'%'] - - frsum = DataFrame(summary,index = ind, columns = col) - - return frsum - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - out1 = sys.argv[2] - t10n = sys.argv[3] - sname = sys.argv[4] - fname = sys.argv[5] - - # Execute basic function - frsum = clonotypeComputationVJ(inp_name,out1,t10n,fname) - - # Save output to CSV files - if not frsum.empty: - frsum.to_csv(sname, sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/comp_clono_VJCDR3.xml Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="compClonoVJCDR3" name="V+J+CDR3 Clonotypes Computation" version="0.9"> - <description>Compute V+J+CDR3 clonotypes</description> - <command interpreter="python">comp_clono_VJCDR3.py $input $clonos $topcl $summ2 ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="Filtered-in File"/> - - - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_clonotypesVJCDR3"/> - <data name="topcl" format="tabular" label="${input.name}_top10clonosVJCDR3"/> - <data name="summ2" format="tabular" label="${input.name}_SummaryVJCDR3"/> - - - - </outputs> - - - <help> -This tool computes the (V-gene, J-gene, CDR3) clonotypes and their frequencies. - </help> - -</tool>
--- a/cmpb2016/compare_repertoire_J.py Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 10:18:39 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - -sw_reads = lambda x: x.startswith('Reads') -sw_freq = lambda x: x.startswith('Freq') -sw_gene = lambda x: x.startswith('J') - -def freqtoall(inputs): - - mer=DataFrame() - - for x in range(0,len(inputs),2): - - ini = read_csv(inputs[x] , sep = '\t' , index_col = 0) - - ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True) - - x1 = inputs[x+1].split('_') - ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True) - - if mer.empty: - mer = DataFrame(ini) - else: - mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer') - - mer=mer.fillna(0) - mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1) - fr = 'mean' - - mer=mer.sort_values(by = fr,ascending=False) - mer[fr] = mer[fr].map('{:.4f}'.format) - mer.index = range(1,len(mer)+1) - - return mer - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inputs = sys.argv[2:] - output = sys.argv[1] - - # Execute basic function - mer = freqtoall(inputs) - - # Save output to CSV files - if not mer.empty: - mer.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/compare_repertoire_J.xml Sun Mar 18 07:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -<tool id="compRepJ" name="J-Gene Repertoire Comparison" version="0.9"> -<description>Compare J-gene repertoires</description> -<command interpreter="python"> -compare_repertoire_J.py "${output1}" -#for x in $rep_files - "$x.rpfile" - "$x.rpfile.name" -#end for -</command> -<inputs> -<repeat name="rep_files" title="Patient" min="2"> -<param name="rpfile" type="data" label="File of J-gene repertoire" format="tabular"/> -</repeat> -</inputs> -<outputs> -<data format="tabular" name="output1" label="File_Comparing_repertoire"/> -</outputs> -<help> -This tool produces a union of all patients' J-gene repertoires and computes the mean frequency of each J-gene. -</help> -</tool>
