changeset 9:f4edb9437b95 draft

Deleted selected files
author chmaramis
date Sun, 18 Mar 2018 07:06:56 -0400
parents 2669fa191052
children 5339fb9d8aee
files cmpb2016/ext_repertoire_V.py cmpb2016/ext_repertoire_V.xml cmpb2016/ngs_filtering.py cmpb2016/ngs_filtering.xml cmpb2016/pub_clono_CDR3.py cmpb2016/pub_clono_CDR3.xml
diffstat 6 files changed, 0 insertions(+), 798 deletions(-) [+]
line wrap: on
line diff
--- a/cmpb2016/ext_repertoire_V.py	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jun 20 14:58:08 2014
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-import functools as ft
-import sys
-import time
-
-frm = lambda x,y: '{r}/{l}'.format(r=x,l=y) 
-
-def repertoireComputation(inp_name, fname):
-        
-    df = DataFrame()
-    df = read_csv(inp_name, sep='\t', index_col=0 )
-    #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-    #df = concat([chunk for chunk in tp])     
-    
-    
-    vgroup = df.groupby(['V-GENE'])
-    vdi = vgroup.size()
-    rep = DataFrame(list(vdi.index), columns=['V-GENE'])
-    rep['Reads'] = vdi.values 
-    #rep['Reads/Total'] = ['{r}/{l}'.format(r=p , l = len(df)) for p in vdi.values]
-    rep['Reads/Total'] = rep['Reads'].map(ft.partial(frm, y=len(df)))
-    rep['Frequency %'] = (100*rep['Reads']/len(df)).map('{:.4f}'.format)
-    
-    rep = rep.sort_values(by = ['Reads'] , ascending = False)
-    rep.index = range(1,len(rep)+1)
-
-    su = rep[['V-GENE','Frequency %']].head(10)
-    spl = fname.split('_')
-    summdf = DataFrame([su['V-GENE'].values[0],su['Frequency %'].values[0]],
-                       index = ['Dominant V-GENE','Frequency'], columns = [spl[0]])
-    summdf['%'] = ''
-
-    return (rep, su, summdf)
-
-
-if __name__ == '__main__':   
-
-    start=time.time()
-
-    # Parse input arguments    
-    inp_name = sys.argv[1]
-    outrep = sys.argv[2]
-    summ_rep = sys.argv[3]
-    summ_rep2 = sys.argv[4]
-    fname = sys.argv[5]
-            
-    # Execute basic function
-    rep, su, summdf = repertoireComputation(inp_name, fname)
-    
-    # Save output to CSV files
-    if not rep.empty: 
-        rep.to_csv(outrep, sep = '\t')
-    if not su.empty:
-        su.to_csv(summ_rep, sep = '\t')
-    if not summdf.empty:
-        summdf.to_csv(summ_rep2, sep = '\t')
-        
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/ext_repertoire_V.xml	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,24 +0,0 @@
-<tool id="extRepV" name="V-Gene Repertoire Extraction" version="0.9">
-  <description>Compute repertoire of V-genes</description>
-  <command interpreter="python">ext_repertoire_V.py $input  $clonos $topcl $summ ${input.name}</command>
-  <inputs>
-    <param format="tabular" name="input" type="data" label="File of clonotypes"/>
-
-	
-  </inputs>
-  
-<outputs>
-	<data name="clonos" format="tabular" 	label="${input.name}_repertoire"/>
-	<data name="topcl" format="tabular" 	label="${input.name}_top10rep"/>
-	<data name="summ" format="tabular" 	label="${input.name}_Summary4"/>
-
-
-
-  </outputs>
-
-
-  <help>
-This tool computes the repertoire of V-genes (i.e. , the number of clonotypes using each V-gene over the total number of clonotypes).
-  </help>
-
-</tool>
--- a/cmpb2016/ngs_filtering.py	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,452 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Sep  4 18:41:42 2013
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import string as strpy
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import time
-import sys
-
-
-def filter_condition_AAjunction(x):
-    x= x.strip()
-    if ' ' in x:
-        return x.split(' ')[0]
-    else:
-        return x
-
-#-----------frame creation---------------------
-def filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname):
-    
-    try:
-        path=inp
-        frame = DataFrame()
-        seqlen = []
-        head = []
-        tp = read_csv(path, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-        frame = concat([chunk for chunk in tp])
-        
-        frcol = list(frame.columns)
-        #print frcol[-1]
-        if 'Unnamed' in frcol[-1]:
-            del frcol[-1]
-        frame=frame[frcol]
-        
-        frame.index = range(1,len(frame)+1)
-        
-        head.append('Total reads of raw data')
-        seqlen.append(len(frame))
-  
-        #------------drop nulls--------------------        
-        filtered = DataFrame()
-        filtall = DataFrame()
-        summ_df = DataFrame()
-        filtered = frame[isnull(frame['AA JUNCTION']) | isnull(frame['V-GENE and allele'])]
-        
-        filtall = filtall.append(filtered)
-        if len(filtall) > 0:
-            filtall.loc[filtered.index,'Reason'] = "NoResults"
-        frame = frame[frame['AA JUNCTION'].notnull()]
-        frame = frame[frame['V-GENE and allele'].notnull()]
-        
-        head.append('Not Null CDR3/V')
-        head.append('filter out')
-        seqlen.append(len(frame))
-        seqlen.append(len(filtered))
-        filtered = DataFrame()
-     
-        if psorf.startswith('y') or psorf.startswith('Y'):
-            
-            cc0=np.array(frame['V-GENE and allele'].unique())
-    
-        
-            for x in cc0:
-                x1=x.split('*')
-                try:
-                    if (x1[1].find('P')>-1) or (x1[1].find('ORF')>-1):
-                        filtered = filtered.append(frame[frame['V-GENE and allele'] == x])
-                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,NA)
-                    elif x.find('or')>-1:
-                        posa=x.count('or')        
-                        x2=x.split('or')
-                        x4=''
-                        genelist=[]        
-                        for cnt in range(0, posa+1):
-                            x3=x2[cnt].split('*')
-                            x3[0]=x3[0].strip()#kobei ta space
-                            k=x3[0].split(' ')# holds only TRBV
-                            if cnt==0:
-                                genelist.append(k[1])
-                                x4+=k[1]
-                            elif  ((str(k[1]) in genelist) == False) & (x3[1].find('P')==-1):# check for P in x3
-                                genelist.append(k[1])
-                                x4+=' or ' 
-                                x4+=k[1]
-                                x3=None
-                                k1=None
-                        genelist=None 
-                         
-                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,x4)
-                                
-                    else:
-                        s=x1[0].split(' ')
-                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,s[1])
-                except IndexError as e:
-                    print('V-gene is already been formed')
-                    continue
-            
-            x=None
-            x1=None
-            s=None
-            
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'P or ORF'
-            frame = frame[frame['V-GENE and allele'].notnull()]
-            
-            head.append('Functional TRBV')
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-            filtered = DataFrame()
-        
-        
-        
-        #------------FILTERING for data quality--------------------
-        if con.startswith('y') or con.startswith('Y'):
-            filtered = frame [frame['AA JUNCTION'].str.contains('X') |
-                            frame['AA JUNCTION'].str.contains('#') |
-                            frame['AA JUNCTION'].str.contains('[*]')]
-        
-        
-        
-            frame = frame [~frame['AA JUNCTION'].str.contains('X') &
-                            ~frame['AA JUNCTION'].str.contains('#') &
-                            ~frame['AA JUNCTION'].str.contains('[*]') ]
-        
-    
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'X,#,*'
-            head.append('Not Containing X,#,*')
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-            filtered = DataFrame()
-            
-        # Set label of functionality column, taking into account current & past IMGT Summary column label
-        functionality_label = 'Functionality'
-        if 'V-DOMAIN Functionality' in frame.columns:
-            functionality_label = 'V-DOMAIN Functionality' 
-            
-        if prod.startswith('y') or prod.startswith('Y'): 
-            filtered = frame[~frame[functionality_label].str.startswith('productive')]
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'not productive'
-            
-            
-            frame=frame[frame[functionality_label].str.startswith('productive')]
-            
-            head.append('Productive')
-            head.append('filter out')
-            seqlen.append(len(frame))
-            
-            seqlen.append(len(filtered))
-           
-        
-        frame['AA JUNCTION'] = frame['AA JUNCTION'].map(filter_condition_AAjunction)
-                
-        if CF.startswith('y') or CF.startswith('Y'):
-            if cells == 'TCR':
-                filtered = DataFrame()
-                filtered = frame[~frame['AA JUNCTION'].str.startswith('C')  |
-                        ~frame['AA JUNCTION'].str.endswith('F')]
-                        
-                filtall = filtall.append(filtered)
-                if len(filtall) > 0:
-                    filtall.loc[filtered.index,'Reason'] = 'Not C..F'
-            
-                frame = frame[frame['AA JUNCTION'].str.startswith('C') & 
-                            frame['AA JUNCTION'].str.endswith('F')]
-                
-                head.append('CDR3 landmarks C-F')
-                head.append('filter out')        
-                seqlen.append(len(frame))
-                seqlen.append(len(filtered))
-                filtered = DataFrame()
-            elif cells == 'BCR':
-                filtered = DataFrame()
-                filtered = frame[~frame['AA JUNCTION'].str.startswith('C')  |
-                        ~frame['AA JUNCTION'].str.endswith('W')]
-                        
-                filtall = filtall.append(filtered)
-                if len(filtall) > 0:
-                    filtall.loc[filtered.index,'Reason'] = 'Not C..W'
-            
-                frame = frame[frame['AA JUNCTION'].str.startswith('C') & 
-                            frame['AA JUNCTION'].str.endswith('W')]
-                
-                head.append('CDR3 landmarks C-W')
-                head.append('filter out')        
-                seqlen.append(len(frame))
-                seqlen.append(len(filtered))
-                filtered = DataFrame()
-            else:
-                print('TCR or BCR type')
-    
-    
-        filtered = DataFrame()
-        
-        filtered = frame[frame['V-REGION identity %'] < Vper]
-        
-       
-        filtall = filtall.append(filtered)
-        if len(filtall) > 0:
-            filtall.loc[filtered.index,'Reason'] = 'identity < {iden}%'.format(iden = Vper)
-        
-        frame=frame[frame['V-REGION identity %']>= Vper]
-        head.append('Identity >= {iden}%'.format(iden = Vper))
-        head.append('filter out')
-        seqlen.append(len(frame))
-        seqlen.append(len(filtered))
-    
-        head.append('Total filter out A')
-        head.append('Total filter in A')
-        seqlen.append(len(filtall))
-        seqlen.append(len(frame))
-        
-        ###############################
-        if Vgene != 'null':
-            
-            filtered = DataFrame()
-        
-            filtered = frame[frame['V-GENE and allele'] != Vgene]
-        
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'V-GENE != {} '.format(Vgene)
-        
-    
-            frame = frame[frame['V-GENE and allele'] == Vgene]
-            
-            
-            
-            head.append('V-GENE = {} '.format(Vgene))
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-        
-            
-        
-        ###############################
-        if (laa1 != 'null') or (laa2 != 'null'):
-            if int(laa2) == 0:
-                low = int(laa1)
-                high = 100
-            elif int(laa1) > int(laa2):
-                low = int(laa2)
-                high = int(laa1)
-            else:
-                low = int(laa1)
-                high = int(laa2)
-            
-            filtered = DataFrame()
-            criteria = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) < low)
-            criteria2 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) > high)
-            filtered = frame[criteria | criteria2]
-
-            filtall = filtall.append(filtered)
-            if int(laa2)==0:
-                if len(filtall) > 0:
-                    filtall.loc[filtered.index,'Reason'] = 'CDR3 length not bigger than {}'.format(low)
-            else:
-                if len(filtall) > 0:
-                    filtall.loc[filtered.index,'Reason'] = 'CDR3 length not from {} to {}'.format(low,high)
-            
-            criteria3 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) >= low)
-            criteria4 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) <= high)
-            frame = frame[criteria3 & criteria4] 
-            
-            if int(laa2)==0:            
-                head.append('CDR3 length bigger than {}'.format(low))
-            else:
-                head.append('CDR3 length from {} to {} '.format(low,high))
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-    
-        ###############################
-        if conaa != 'null':
-            if conaa.islower():
-                conaa = conaa.upper()
-            filtered = DataFrame()
-        
-            filtered = frame[~frame['AA JUNCTION'].str.contains(conaa)]
-        
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'CDR3 not containing {}'.format(conaa)
-        
-            frame = frame[frame['AA JUNCTION'].str.contains(conaa) ]    
-        
-            head.append('CDR3 containing {}'.format(conaa))
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-        
-        
-        
-       
-        #####------------keep the small J gene name--------------------
-        #frame['J-GENE and allele'] = frame['J-GENE and allele'].map(filter_condition_Jgene)
-        cc2=np.array(frame['J-GENE and allele'].unique())
-        
-        for x in cc2:
-            try:
-                if notnull(x):
-                    x1=x.split('*')
-            #        print(x)
-            #        print (x1[0]) 
-                    trbj=x1[0].split(' ')
-                    frame['J-GENE and allele']=frame['J-GENE and allele'].replace(x,trbj[1])
-            except IndexError as e:
-                print('J-Gene has been formed')
-            
-            
-        
-        x=None
-        x1=None
-        
-        
-        #------------keep the small D gene name--------------------
-        cc1=np.array(frame['D-GENE and allele'].unique())
-        for x in cc1:
-            try:
-                if notnull(x):    
-                    x1=x.split('*')
-                    trbd=x1[0].split(' ')
-                    frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,trbd[1])
-                else:
-                    frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,'none')
-            except IndexError as e:
-                print('D-gene has been formed')
-            
-        
-        x=None
-        x1=None    
-        
-        
-        if Jgene != 'null':
-            
-            filtered = DataFrame()
-        
-            filtered = frame[frame['J-GENE and allele'] != Jgene]
-        
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'J-GENE not {} '.format(Jgene)
-        
-    
-            frame = frame[frame['J-GENE and allele'] == Jgene]
-            
-            
-            
-            head.append('J-GENE = {} '.format(Jgene))
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-            
-            
-
-        if Dgene != 'null':
-            
-            filtered = DataFrame()
-        
-            filtered = frame[frame['D-GENE and allele'] != Dgene]
-        
-            filtall = filtall.append(filtered)
-            if len(filtall) > 0:
-                filtall.loc[filtered.index,'Reason'] = 'D-GENE not {} '.format(Dgene)
-        
-    
-            frame = frame[frame['D-GENE and allele'] == Dgene]
-            
-            
-            
-            head.append('D-GENE = {} '.format(Dgene))
-            head.append('filter out')
-            seqlen.append(len(frame))
-            seqlen.append(len(filtered))
-        
-        
-        head.append('Total filter out')
-        head.append('Total filter in')
-        seqlen.append(len(filtall))
-        seqlen.append(len(frame))
-        summ_df = DataFrame(index = head)
-        col = fname
-       
-        summ_df[col] = seqlen
-        frame=frame.rename(columns = {'V-GENE and allele':'V-GENE',
-        'J-GENE and allele':'J-GENE','D-GENE and allele':'D-GENE'})
-        
-        
-        frcol.append('Reason')
-        
-        filtall = filtall[frcol]
-    
-        #--------------out CSV---------------------------      
-        frame.index = range(1,len(frame)+1)
-        if not summ_df.empty:
-            summ_df['%'] = (100*summ_df[summ_df.columns[0]]/summ_df[summ_df.columns[0]][summ_df.index[0]]).map(('{:.4f}'.format))
-        return(frame,filtall,summ_df)
-    except KeyError as e:
-        print('This file has no ' + str(e) + ' column')
-        return(frame,filtall,summ_df)
-
-
-if __name__ == '__main__':   
-
-    start=time.time()    
-    
-    # Parse input arguments
-    inp = sys.argv[1]
-    cells = sys.argv[2]
-    psorf = sys.argv[3]
-    con = sys.argv[4]
-    prod = sys.argv[5]
-    CF = sys.argv[6]
-    Vper = float(sys.argv[7])
-    Vgene = sys.argv[8]
-    laa1 = sys.argv[9]
-    conaa = sys.argv[10]
-    filterin = sys.argv[11]
-    filterout = sys.argv[12]
-    Sum_table = sys.argv[13]
-    Jgene = sys.argv[14]
-    Dgene = sys.argv[15]
-    laa2 = sys.argv[16]
-    fname = sys.argv[17]
-        
-    # Execute basic function
-    fin,fout,summ = filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname)
-    
-    # Save output to CSV files
-    if not summ.empty:
-        summ.to_csv(Sum_table, sep = '\t')
-    if not fin.empty:
-        fin.to_csv(filterin , sep = '\t')
-    if not fout.empty:        
-        fout.to_csv(filterout, sep= '\t')
-        
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
-
--- a/cmpb2016/ngs_filtering.xml	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-<tool id="ngsFilt" name="IMGT Report Filtering" version="0.9" profile="15.10">
-  <description>Filter IMGT Summary Data</description>
-  <command interpreter="python">ngs_filtering.py $input $TCR_or_BCR $Vfun $spChar $prod $delCF $threshold $Vg.Vgid $clen.cdr3len1 $cdp.cdr3part $filtin $filtout $summ $Jg.Jgid $Dg.Dgid $clen.cdr3len2 $process_id
-	
-  </command>
-  <inputs>
-    <param format="txt" name="input" type="data" label="IMGT Summary Output"/>
-	<param format="txt" name="process_id" type="text" label="Process ID"/>
-	<param name="TCR_or_BCR" type="select" label="T-cell or B-cell option">
-    <option value="TCR">T-cell</option>
-    <option value="BCR">B-cell</option>
-    </param>
-	<param name="Vfun" type="select" label="Only Take Into Account Fuctional V-GENE? ">
-	<option value="y">yes</option>
-      <option value="n">no</option>
-      </param>
-
-	<param name="spChar" type="select" label="Only Take Into Account CDR3 with no Special Characters (X,*,#)? ">
-     <option value="y">yes</option> 
-	<option value="n">no</option>
-      </param>
-
-  <param name="prod" type="select" label="Only Take Into Account Productive Sequences? ">
-     <option value="y">yes</option> 
-	<option value="n">no</option>
-      </param>
-
-  <param name="delCF" type="select" label="Only Take Into Account CDR3 with valid start/end landmarks? ">
-	<option value="y">yes</option>
-      <option value="n">no</option>
-      </param>
-
-    <param name="threshold" type="float" size="3" value="0" min="0" max="100" label="V-REGION identity %" />
-  
-  <conditional name="Vg">
-	
-	<param name="Vg_select" type="select" label="Select Specific V gene?">
-        <option value="y">Yes</option>
-		<option value="n" selected="true">No</option>
-    </param>
-	
-	<when value="y">
-          <param format="txt" name="Vgid" type="text" label="Type V gene"/>
-     </when>
-	
-	<when value="n">
-		<param name="Vgid" type="hidden" value="null" />
-     </when>
-
-	 </conditional>
-	 
-	  <conditional name="Jg">
-	
-	<param name="Jg_select" type="select" label="Select Specific J gene?">
-        <option value="y">Yes</option>
-		<option value="n" selected="true">No</option>
-    </param>
-	
-	<when value="y">
-          <param format="txt" name="Jgid" type="text" label="Type J gene"/>
-     </when>
-	
-	<when value="n">
-		<param name="Jgid" type="hidden" value="null" />
-     </when>
-
-	 </conditional>
-	 
-	 <conditional name="Dg">
-	
-	<param name="Dg_select" type="select" label="Select Specific D gene?">
-        <option value="y">Yes</option>
-		<option value="n" selected="true">No</option>
-    </param>
-	
-	<when value="y">
-          <param format="txt" name="Dgid" type="text" label="Type D gene"/>
-     </when>
-	
-	<when value="n">
-		<param name="Dgid" type="hidden" value="null" />
-     </when>
-
-	 </conditional>
-	 
-	 
-	
-	<conditional name="clen">
-
-	<param name="clen_select" type="select" label="Select CDR3 length range?">
-        <option value="y">Yes</option>
-		<option value="n" selected="true">No</option>
-    </param>
-
-	<when value="y">
-		<param name="cdr3len1" type="integer" size="3" value="0" min="0" max="100" label="CDR3 Length Lower Threshold" />
-		<param name="cdr3len2" type="integer" size="3" value="0" min="0" max="100" label="CDR3 Length Upper Threshold" />
-	</when>
-
-	<when value="n">
-		<param name="cdr3len1" type="hidden" value="null" />
-		<param name="cdr3len2" type="hidden" value="null" />
-	</when>
-
-	</conditional>
-	
-	<conditional name="cdp">
-	
-	<param name="cdp_select" type="select" label="Only select CDR3 containing specific amino-acid sequence?">
-        <option value="y">Yes</option>
-		<option value="n" selected="true">No</option>
-    </param>
-	
-	<when value="y">
-    <param format="txt" name="cdr3part" type="text" label="Type specific amino-acid sequence"/>
-	</when>
-	
-	<when value="n">
-		<param name="cdr3part" type="hidden" value="null" />
-	</when>
-	</conditional>
-  
-  </inputs>
-  
-  
-  <outputs>
-	<data name="filtin" format="tabular" 	label="${process_id}_filterin"/>
-	<data name="filtout" format="tabular" 	label="${process_id}_filterout"/>
-	<data name="summ" format="tabular" 	label="${process_id}_Summary"/>
-
-
-  </outputs>
-
-
-  <help>
-This tool filters IMGT Summary Data based on a combination of criteria.
-  </help>
-
-</tool>
--- a/cmpb2016/pub_clono_CDR3.py	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Dec 21 18:26:01 2015
-
-@author: chmaramis
-"""
-
-from __future__ import division
-import numpy as np
-from pandas import *
-from numpy import nan as NA
-import sys
-import time
-
-
-
-def publicCDR3Func(inputs,thres):
-    
-    cdr3=DataFrame()
-
-    for x in range(0,len(inputs),2):
-            cl = DataFrame()
-            cl = read_csv(inputs[x] , sep = '\t' , index_col = 0)
-            #tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
-            #cl = concat([chunk for chunk in tp]) 
-            
-            if (thres != 'null'):
-                cl = cl[cl['Reads'] > int(thres)]
-            
-            x1 = inputs[x+1].split('_')
-            
-            del cl['Reads']
-            cl.columns = [cl.columns[0], x1[0]+' '+cl.columns[1], x1[0]+' Relative '+cl.columns[2]]
-            
-            if cdr3.empty:
-                cdr3 = cl
-            else:
-                cdr3 = cdr3.merge(cl, how='outer', on='AA JUNCTION')
-    
-    
-    col = cdr3.columns
-    freqs = col.map(lambda x: 'Frequency' in x)
-    reads = col.map(lambda x: 'Reads/Total' in x)
-    
-    cdr3[col[freqs]] = cdr3[col[freqs]].fillna(0)
-    cdr3[col[reads]] = cdr3[col[reads]].fillna('0/*')
-    
-    cdr3['Num of Patients']= cdr3[col[freqs]].apply(lambda x: np.sum(x != 0), axis=1)
-
-    cdr3 = cdr3[cdr3['Num of Patients'] > 1]
-
-    cdr3.index = range(1,len(cdr3)+1)
-    
-    return cdr3    
-
-
-if __name__ == '__main__':   
-
-    start=time.time()    
-    
-    # Parse input arguments
-    threshold = sys.argv[2]
-    arg = sys.argv[3:]
-    output = sys.argv[1]
-        
-    # Execute basic function
-    mer = publicCDR3Func(arg,threshold)
-    
-    # Save output to CSV files
-    if not mer.empty:
-        mer.to_csv(output , sep = '\t') 
-
-    # Print execution time
-    stop=time.time()
-    print('Runtime:' + str(stop-start))
--- a/cmpb2016/pub_clono_CDR3.xml	Sun Mar 18 07:06:48 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-<tool id="pubClonoCDR3" name="Public CDR3 Clonotypes Computation" version="0.9">
-<description>Compute Public CDR3 Clonotypes</description>
-<command interpreter="python">
-pub_clono_CDR3.py "$output1" "$Th.thres"
-#for x in $clono_files
-  "$x.clfile"
-  "$x.clfile.name"
-#end for
-</command>
-<inputs>
-	<conditional name="Th">
-	
-		<param name="thres_select" type="select" label="Remove CDR3 With Reads Fewer Than Threshold?">
-			<option value="y">Yes</option>
-			<option value="n" selected="true">No</option>
-		</param>
-	
-		<when value="y">
-			<param name="thres" type="integer" size="4" value="1" min="1"  label="Keep CDR3 with Number of Reads more than"/>
-		</when>
-	
-		<when value="n">
-			<param name="thres" type="hidden" value="null" />
-		</when>
-
-	</conditional>
-	<repeat name="clono_files" title="Files to be append" min="2">
-		<param name="clfile" type="data" label="File of CDR3 Frequencies" format="tabular"/>
-	</repeat>
-
-</inputs>
-
-<outputs>
-<data format="tabular" name="output1" label="Public_CDR3"/>
-</outputs>
-<help>
-This tool computes the public CDR3 clonotypes among a number of patients along with their frequencies for each patient.
-</help>
-</tool>