Mercurial > repos > chmaramis > testirprofiler

--- a/cmpb2016/compare_repertoire_V.py	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Sep 16 12:50:43 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-sw_reads = lambda x: x.startswith('Reads')
-sw_freq = lambda x: x.startswith('Freq')
-sw_gene = lambda x: x.startswith('V')
-
-def freqtoall(inputs):
-
-    mer=DataFrame()
-
-    for x in range(0,len(inputs),2):
-
-            ini = read_csv(inputs[x] , sep = '\t' , index_col = 0)
-
-            ini.drop(ini.columns[np.where(ini.columns.map(sw_reads))[0]], axis=1, inplace=True)
-
-            x1 = inputs[x+1].split('_')
-            ini.rename(columns={ini.columns[np.where(ini.columns.map(sw_freq))[0][0]]: x1[0]}, inplace=True)
-
-            if mer.empty:
-                mer = DataFrame(ini)
-            else:
-                mer = merge(mer,ini, on=ini.columns[np.where(ini.columns.map(sw_gene))[0][0]] , how='outer')
-
-    mer=mer.fillna(0)
-    mer['mean'] = mer.sum(axis=1)/(len(mer.columns)-1)
-    fr = 'mean'
-
-    mer=mer.sort_values(by = fr,ascending=False)
-    mer[fr] = mer[fr].map('{:.4f}'.format)
-    mer.index = range(1,len(mer)+1)
-
-    return mer
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inputs = sys.argv[2:]
-    output = sys.argv[1]
-
-    # Execute basic function
-    mer = freqtoall(inputs)
-
-    # Save output to CSV files
-    if not mer.empty:
-        mer.to_csv(output , sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/compare_repertoire_V.xml	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-<tool id="compRepV" name="V-Gene Repertoire Comparison" version="0.9">
-<description>Compare V-gene repertoires</description>
-<command interpreter="python">
-compare_repertoire_V.py  "${output1}"
-#for x in $rep_files
- "$x.rpfile"
- "$x.rpfile.name"
-#end for
-</command>
-<inputs>
-<repeat name="rep_files" title="Patient" min="2">
-<param name="rpfile" type="data" label="File of V-gene repertoire" format="tabular"/>
-</repeat>
-</inputs>
-<outputs>
-<data format="tabular" name="output1" label="File_Comparing_repertoire"/>
-</outputs>
-<help>
-This tool produces a union of all patients' V-gene repertoires and computes the mean frequency of each V-gene.
-</help>
-</tool>
--- a/cmpb2016/exclus_clono_CDR3.py	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 11:12:09 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-
-def exclusiveCDR3Func(inputs,thres):
-
-    cdr3=DataFrame()
-
-    # File A
-    cl = DataFrame()
-    cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    cdr3 = cl
-
-    # File B
-    cl = DataFrame()
-    cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
-    cdr3 = cdr3.merge(cl[['AA JUNCTION','ReadsB']], how='left', on='AA JUNCTION')
-
-    cdr3['ReadsB'].fillna(0, inplace=True)
-
-    cdr3 = cdr3[cdr3['ReadsB'] == 0]
-    del cdr3['ReadsB']
-
-    cdr3.index = range(1,len(cdr3)+1)
-
-    return cdr3
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    threshold = sys.argv[2]
-    arg = sys.argv[3:]
-    output = sys.argv[1]
-
-    # Execute basic function
-    excl = exclusiveCDR3Func(arg,threshold)
-
-    # Save output to CSV files
-    if not excl.empty:
-        excl.to_csv(output , sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_CDR3.xml	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="exclClonoCDR3" name="Exclusive CDR3 Clonotypes Computation" version="0.9">
-<description>Compute Exclusive CDR3 Clonotypes</description>
-<command interpreter="python">
-exclus_clono_CDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-</command>
-<inputs>
-	<conditional name="Th">
-
-		<param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?">
-			<option value="y">Yes</option>
-			<option value="n" selected="true">No</option>
-		</param>
-
-		<when value="y">
-			<param name="thres" type="integer" size="4" value="1" min="1"  label="Keep CDR3 with Number of Reads more than"/>
-		</when>
-
-		<when value="n">
-			<param name="thres" type="hidden" value="null" />
-		</when>
-
-	</conditional>
-	<param format="txt" name="inputA" type="data" label="First File of CDR3 Clonotypes (A)"/>
-	<param format="txt" name="inputB" type="data" label="Second File of CDR3 Clonotypes (B)"/>
-</inputs>
-
-<outputs>
-<data format="tabular" name="output1" label="Exclusive_CDR3"/>
-</outputs>
-<help>
-This tool computes the exclisive CDR3 clonotypes of patient or group A that are absent from patient or group B.
-</help>
-</tool>
--- a/cmpb2016/exclus_clono_JCDR3.py	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 17:06:09 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-def exclusiveJclonoFunc(inputs,thres):
-
-    jClono=DataFrame()
-
-    # File A
-    cl = DataFrame()
-    cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    jClono = cl
-
-    # File B
-    cl = DataFrame()
-    cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
-    jClono = jClono.merge(cl[['J-GENE','AA JUNCTION','ReadsB']], how='left', on=['J-GENE','AA JUNCTION'])
-
-    jClono['ReadsB'].fillna(0, inplace=True)
-
-    jClono = jClono[jClono['ReadsB'] == 0]
-    del jClono['ReadsB']
-
-    jClono.index = range(1,len(jClono)+1)
-
-    return jClono
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    threshold = sys.argv[2]
-    arg = sys.argv[3:]
-    output = sys.argv[1]
-
-    # Execute basic function
-    excl = exclusiveJclonoFunc(arg,threshold)
-
-    # Save output to CSV files
-    if not excl.empty:
-        excl.to_csv(output , sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_JCDR3.xml	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="exclClonoJCDR3" name="Exclusive J+CDR3 Clonotypes Computation" version="0.9">
-<description>Compute Exclusive J+CDR3 Clonotypes</description>
-<command interpreter="python">
-exclus_clono_JCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-</command>
-<inputs>
-	<conditional name="Th">
-
-		<param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?">
-			<option value="y">Yes</option>
-			<option value="n" selected="true">No</option>
-		</param>
-
-		<when value="y">
-			<param name="thres" type="integer" size="4" value="1" min="1"  label="Keep CDR3 with Number of Reads more than"/>
-		</when>
-
-		<when value="n">
-			<param name="thres" type="hidden" value="null" />
-		</when>
-
-	</conditional>
-	<param format="txt" name="inputA" type="data" label="First File of J-CDR3 Clonotypes (A)"/>
-	<param format="txt" name="inputB" type="data" label="Second File of J-CDR3 Clonotypes (B)"/>
-</inputs>
-
-<outputs>
-<data format="tabular" name="output1" label="Exclusive_CDR3"/>
-</outputs>
-<help>
-This tool computes the exclisive (J-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B.
-</help>
-</tool>
--- a/cmpb2016/exclus_clono_VCDR3.py	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Feb 29 16:57:12 2016
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-def exclusiveVclonoFunc(inputs,thres):
-
-    vClono=DataFrame()
-
-    # File A
-    cl = DataFrame()
-    cl = read_csv(inputs[0] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    vClono = cl
-
-    # File B
-    cl = DataFrame()
-    cl = read_csv(inputs[2] , sep = '\t' , index_col = 0)
-    if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-    cl.rename(columns={'Reads':'ReadsB'}, inplace=True)
-    vClono = vClono.merge(cl[['V-GENE','AA JUNCTION','ReadsB']], how='left', on=['V-GENE','AA JUNCTION'])
-
-    vClono['ReadsB'].fillna(0, inplace=True)
-
-    vClono = vClono[vClono['ReadsB'] == 0]
-    del vClono['ReadsB']
-
-    vClono.index = range(1,len(vClono)+1)
-
-    return vClono
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    threshold = sys.argv[2]
-    arg = sys.argv[3:]
-    output = sys.argv[1]
-
-    # Execute basic function
-    excl = exclusiveVclonoFunc(arg,threshold)
-
-    # Save output to CSV files
-    if not excl.empty:
-        excl.to_csv(output , sep = '\t')
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/exclus_clono_VCDR3.xml	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<tool id="exclClonoVCDR3" name="Exclusive V+CDR3 Clonotypes Computation" version="0.9">
-<description>Compute Exclusive V+CDR3 Clonotypes</description>
-<command interpreter="python">
-exclus_clono_VCDR3.py "$output1" "$Th.thres" "$inputA" "$inputA.name" "$inputB" "$inputB.name"
-</command>
-<inputs>
-	<conditional name="Th">
-
-		<param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?">
-			<option value="y">Yes</option>
-			<option value="n" selected="true">No</option>
-		</param>
-
-		<when value="y">
-			<param name="thres" type="integer" size="4" value="1" min="1"  label="Keep CDR3 with Number of Reads more than"/>
-		</when>
-
-		<when value="n">
-			<param name="thres" type="hidden" value="null" />
-		</when>
-
-	</conditional>
-	<param format="txt" name="inputA" type="data" label="First File of V-CDR3 Clonotypes (A)"/>
-	<param format="txt" name="inputB" type="data" label="Second File of V-CDR3 Clonotypes (B)"/>
-</inputs>
-
-<outputs>
-<data format="tabular" name="output1" label="Exclusive_CDR3"/>
-</outputs>
-<help>
-This tool computes the exclisive (V-gene, CDR3) clonotypes of patient or group A that are absent from patient or group B.
-</help>
-</tool>
--- a/cmpb2016/ext_repertoire_J.py	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 20 14:58:08 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
-
-def repertoireJgComputation(inp_name, fname):
-
-    df = DataFrame()
-    df = read_csv(inp_name, sep='\t', index_col=0 )
-    #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    #df = concat([chunk for chunk in tp])
-
-    vgroup = df.groupby(['J-GENE'])
-    vdi = vgroup.size()
-    rep = DataFrame(list(vdi.index), columns=['J-GENE'])
-    rep['Reads'] = vdi.values
-    #rep['Reads/Total'] = ['{r}/{l}'.format(r=p , l = len(df)) for p in vdi.values]
-    rep['Reads/Total'] = rep['Reads'].map(ft.partial(frm, y=len(df)))
-    rep['Frequency %'] = (100*rep['Reads']/len(df)).map('{:.4f}'.format)
-
-    rep = rep.sort_values(by = ['Reads'] , ascending = False)
-
-    rep.index = range(1,len(rep)+1)
-
-    su = rep[['J-GENE','Frequency %']].head(10)
-    spl = fname.split('_')
-    summdf = DataFrame([su['J-GENE'].values[0],su['Frequency %'].values[0]],
-                       index = ['Dominant J-GENE','Frequency'], columns = [spl[0]])
-    summdf['%'] = ''
-
-    return (rep, summdf)
-
-
-if __name__ == '__main__':
-
-    start=time.time()
-
-    # Parse input arguments
-    inp_name = sys.argv[1]
-    outrep = sys.argv[2]
-    summ_rep2 = sys.argv[3]
-    fname = sys.argv[4]
-
-    # Execute basic function
-    rep, summdf = repertoireJgComputation(inp_name, fname)
-
-    # Save output to CSV files
-    if not rep.empty:
-        rep.to_csv(outrep, sep = '\t')
-    if not summdf.empty:
-        summdf.to_csv(summ_rep2, sep = '\t')
-
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/ext_repertoire_J.xml	Sun Mar 18 07:06:34 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="extRepJ" name="J-Gene Repertoire Extraction" version="0.9">
-  <description>Compute repertoire of J-genes</description>
-  <command interpreter="python">ext_repertoire_J.py $input  $clonos  $summ ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="File of clonotypes"/>
-
-
-  </inputs>
-
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_repertoireJ"/>
-
-	<data name="summ" format="tabular" 	label="${input.name}_Summary4J"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the repertoire of J-genes (i.e. , the number of clonotypes using each V-gene over the total number of clonotypes).
-  </help>
-
-</tool>