Mercurial > repos > chmaramis > testirprofiler
changeset 8:2669fa191052 draft
Deleted selected files
| author | chmaramis |
|---|---|
| date | Sun, 18 Mar 2018 07:06:48 -0400 |
| parents | b5bb2e8e829c |
| children | f4edb9437b95 |
| files | cmpb2016/compare_repertoire_V.py cmpb2016/compare_repertoire_V.xml cmpb2016/exclus_clono_CDR3.py cmpb2016/exclus_clono_CDR3.xml cmpb2016/exclus_clono_JCDR3.py cmpb2016/exclus_clono_JCDR3.xml cmpb2016/exclus_clono_VCDR3.py cmpb2016/exclus_clono_VCDR3.xml cmpb2016/ext_repertoire_J.py cmpb2016/ext_repertoire_J.xml |
| diffstat | 10 files changed, 0 insertions(+), 466 deletions(-) [+] |
line wrap: on
line diff
--- a/cmpb2016/compare_repertoire_V.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Sep 16 12:50:43 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - -sw_reads = lambda x: x.startswith('Reads') -sw_freq = lambda x: x.startswith('Freq') -sw_gene = lambda x: x.startswith('V') - -def freqtoall(inputs): - - mer=DataFrame() - - for x in range(0,len(inputs),2): - - ini = read_csv(inputs[x] , sep = '\t' , index_col = 0) - - ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True) - - x1 = inputs[x+1].split('_') - ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True) - - if mer.empty: - mer = DataFrame(ini) - else: - mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer') - - mer=mer.fillna(0) - mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1) - fr = 'mean' - - mer=mer.sort_values(by = fr,ascending=False) - mer[fr] = mer[fr].map('{:.4f}'.format) - mer.index = range(1,len(mer)+1) - - return mer - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inputs = sys.argv[2:] - output = sys.argv[1] - - # Execute basic function - mer = freqtoall(inputs) - - # Save output to CSV files - if not mer.empty: - mer.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/compare_repertoire_V.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -<tool id="compRepV" name="V-Gene Repertoire Comparison" version="0.9"> -<description>Compare V-gene repertoires</description> -<command interpreter="python"> -compare_repertoire_V.py "${output1}" -#for x in $rep_files - "$x.rpfile" - "$x.rpfile.name" -#end for -</command> -<inputs> -<repeat name="rep_files" title="Patient" min="2"> -<param name="rpfile" type="data" label="File of V-gene repertoire" format="tabular"/> -</repeat> -</inputs> -<outputs> -<data format="tabular" name="output1" label="File_Comparing_repertoire"/> -</outputs> -<help> -This tool produces a union of all patients' V-gene repertoires and computes the mean frequency of each V-gene. -</help> -</tool>
--- a/cmpb2016/exclus_clono_CDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 11:12:09 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - - -def exclusiveCDR3Func(inputs,thres): - - cdr3=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cdr3 = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - cdr3 = cdr3.merge(cl[['AA JUNCTION','ReadsB']], how='left', on='AA JUNCTION') - - cdr3['ReadsB'].fillna(0, inplace=True) - - cdr3 = cdr3[cdr3['ReadsB'] == 0] - del cdr3['ReadsB'] - - cdr3.index = range(1,len(cdr3)+1) - - return cdr3 - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveCDR3Func(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_CDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<tool id="exclClonoCDR3" name="Exclusive CDR3 Clonotypes Computation" version="0.9"> -<description>Compute Exclusive CDR3 Clonotypes</description> -<command interpreter="python"> -exclus_clono_CDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" -</command> -<inputs> - <conditional name="Th"> - - <param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?"> - <option value="y">Yes</option> - <option value="n" selected="true">No</option> - </param> - - <when value="y"> - <param name="thres" type="integer" size="4" value="1" min="1" label="Keep CDR3 with Number of Reads more than"/> - </when> - - <when value="n"> - <param name="thres" type="hidden" value="null" /> - </when> - - </conditional> - <param format="txt" name="inputA" type="data" label="First File of CDR3 Clonotypes (A)"/> - <param format="txt" name="inputB" type="data" label="Second File of CDR3 Clonotypes (B)"/> -</inputs> - -<outputs> -<data format="tabular" name="output1" label="Exclusive_CDR3"/> -</outputs> -<help> -This tool computes the exclisive CDR3 clonotypes of patient or group A that are absent from patient or group B. -</help> -</tool>
--- a/cmpb2016/exclus_clono_JCDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 17:06:09 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - -def exclusiveJclonoFunc(inputs,thres): - - jClono=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - jClono = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - jClono = jClono.merge(cl[['J-GENE','AA JUNCTION','ReadsB']], how='left', on=['J-GENE','AA JUNCTION']) - - jClono['ReadsB'].fillna(0, inplace=True) - - jClono = jClono[jClono['ReadsB'] == 0] - del jClono['ReadsB'] - - jClono.index = range(1,len(jClono)+1) - - return jClono - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveJclonoFunc(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_JCDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<tool id="exclClonoJCDR3" name="Exclusive J+CDR3 Clonotypes Computation" version="0.9"> -<description>Compute Exclusive J+CDR3 Clonotypes</description> -<command interpreter="python"> -exclus_clono_JCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" -</command> -<inputs> - <conditional name="Th"> - - <param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?"> - <option value="y">Yes</option> - <option value="n" selected="true">No</option> - </param> - - <when value="y"> - <param name="thres" type="integer" size="4" value="1" min="1" label="Keep CDR3 with Number of Reads more than"/> - </when> - - <when value="n"> - <param name="thres" type="hidden" value="null" /> - </when> - - </conditional> - <param format="txt" name="inputA" type="data" label="First File of J-CDR3 Clonotypes (A)"/> - <param format="txt" name="inputB" type="data" label="Second File of J-CDR3 Clonotypes (B)"/> -</inputs> - -<outputs> -<data format="tabular" name="output1" label="Exclusive_CDR3"/> -</outputs> -<help> -This tool computes the exclisive (J-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B. -</help> -</tool>
--- a/cmpb2016/exclus_clono_VCDR3.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Mon Feb 29 16:57:12 2016 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -from numpy import nan as NA -import sys -import time - - -def exclusiveVclonoFunc(inputs,thres): - - vClono=DataFrame() - - # File A - cl = DataFrame() - cl = read_csv(inputs[0] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - vClono = cl - - # File B - cl = DataFrame() - cl = read_csv(inputs[2] , sep = '\t' , index_col = 0) - if (thres != 'null'): - cl = cl[cl['Reads'] > int(thres)] - cl.rename(columns={'Reads':'ReadsB'}, inplace=True) - vClono = vClono.merge(cl[['V-GENE','AA JUNCTION','ReadsB']], how='left', on=['V-GENE','AA JUNCTION']) - - vClono['ReadsB'].fillna(0, inplace=True) - - vClono = vClono[vClono['ReadsB'] == 0] - del vClono['ReadsB'] - - vClono.index = range(1,len(vClono)+1) - - return vClono - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - threshold = sys.argv[2] - arg = sys.argv[3:] - output = sys.argv[1] - - # Execute basic function - excl = exclusiveVclonoFunc(arg,threshold) - - # Save output to CSV files - if not excl.empty: - excl.to_csv(output , sep = '\t') - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_VCDR3.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -<tool id="exclClonoVCDR3" name="Exclusive V+CDR3 Clonotypes Computation" version="0.9"> -<description>Compute Exclusive V+CDR3 Clonotypes</description> -<command interpreter="python"> -exclus_clono_VCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name" -</command> -<inputs> - <conditional name="Th"> - - <param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?"> - <option value="y">Yes</option> - <option value="n" selected="true">No</option> - </param> - - <when value="y"> - <param name="thres" type="integer" size="4" value="1" min="1" label="Keep CDR3 with Number of Reads more than"/> - </when> - - <when value="n"> - <param name="thres" type="hidden" value="null" /> - </when> - - </conditional> - <param format="txt" name="inputA" type="data" label="First File of V-CDR3 Clonotypes (A)"/> - <param format="txt" name="inputB" type="data" label="Second File of V-CDR3 Clonotypes (B)"/> -</inputs> - -<outputs> -<data format="tabular" name="output1" label="Exclusive_CDR3"/> -</outputs> -<help> -This tool computes the exclisive (V-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B. -</help> -</tool>
--- a/cmpb2016/ext_repertoire_J.py Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,67 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Fri Jun 20 14:58:08 2014 - -@author: chmaramis -""" - -from __future__ import division -import numpy as np -from pandas import * -import functools as ft -import sys -import time - -frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) - -def repertoireJgComputation(inp_name, fname): - - df = DataFrame() - df = read_csv(inp_name, sep='\t', index_col=0 ) - #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 ) - #df = concat([chunk for chunk in tp]) - - vgroup = df.groupby(['J-GENE']) - vdi = vgroup.size() - rep = DataFrame(list(vdi.index), columns=['J-GENE']) - rep['Reads'] = vdi.values - #rep['Reads/Total'] = ['{r}/{l}'.format(r=p , l = len(df)) for p in vdi.values] - rep['Reads/Total'] = rep['Reads'].map(ft.partial(frm, y=len(df))) - rep['Frequency %'] = (100*rep['Reads']/len(df)).map('{:.4f}'.format) - - rep = rep.sort_values(by = ['Reads'] , ascending = False) - - rep.index = range(1,len(rep)+1) - - su = rep[['J-GENE','Frequency %']].head(10) - spl = fname.split('_') - summdf = DataFrame([su['J-GENE'].values[0],su['Frequency %'].values[0]], - index = ['Dominant J-GENE','Frequency'], columns = [spl[0]]) - summdf['%'] = '' - - return (rep, summdf) - - -if __name__ == '__main__': - - start=time.time() - - # Parse input arguments - inp_name = sys.argv[1] - outrep = sys.argv[2] - summ_rep2 = sys.argv[3] - fname = sys.argv[4] - - # Execute basic function - rep, summdf = repertoireJgComputation(inp_name, fname) - - # Save output to CSV files - if not rep.empty: - rep.to_csv(outrep, sep = '\t') - if not summdf.empty: - summdf.to_csv(summ_rep2, sep = '\t') - - - # Print execution time - stop=time.time() - print('Runtime:' + str(stop-start))
--- a/cmpb2016/ext_repertoire_J.xml Sun Mar 18 07:06:34 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ -<tool id="extRepJ" name="J-Gene Repertoire Extraction" version="0.9"> - <description>Compute repertoire of J-genes</description> - <command interpreter="python">ext_repertoire_J.py $input $clonos $summ ${input.name}</command> - <inputs> - <param format="tabular" name="input" type="data" label="File of clonotypes"/> - - - </inputs> - -<outputs> - <data name="clonos" format="tabular" label="${input.name}_repertoireJ"/> - - <data name="summ" format="tabular" label="${input.name}_Summary4J"/> - - - - </outputs> - - - <help> -This tool computes the repertoire of J-genes (i.e. , the number of clonotypes using each V-gene over the total number of clonotypes). - </help> - -</tool>
