Mercurial > repos > chmaramis > testirprofiler

--- a/cmpb2016/top10_CDR3_inexact_pairing.py	Sun Mar 18 07:07:39 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Apr 18 11:37:40 2016
-
-@author: chmaramis
-"""
-
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Apr 18 09:48:00 2016
-
-@author: chmaramis
-"""
-
-import pandas as pd
-import numpy as np
-import sys
-import functools as ft
-
-def maxHam1(s1, s2):
-    if  len(s1) != len(s2):
-        return False
-    else:
-        return sum(c1 != c2 for c1, c2 in zip(s1, s2)) <= 1
-
-
-if __name__ == "__main__":
-
-    clonosFN = sys.argv[1]
-    outFN = sys.argv[2]
-
-    Cl = pd.read_csv(clonosFN,sep='\t',index_col=0)
-    T10 = Cl[:10].copy()
-
-    aa_junction = np.array(T10['AA JUNCTION'])
-    geneCol = [x for x in T10.columns if x.upper().endswith('GENE')][0]
-
-    F = np.zeros((2,20))
-
-    for i in range(0,10):
-        taa = T10['AA JUNCTION'][i+1]
-        gene = T10[geneCol][i+1]
-        S1 = Cl['AA JUNCTION'].apply(ft.partial(maxHam1, s2=taa))
-        S2 = Cl[geneCol] == gene
-        S1[i+1] = False
-        F[0,2*i] = (S1 & S2).sum()
-        F[0,2*i+1] = Cl['Frequency %'][S1 & S2].sum()
-        F[1,2*i] = (S1 & ~S2).sum()
-        F[1,2*i+1] = Cl['Frequency %'][S1 & ~S2].sum()
-
-
-    K = list(aa_junction+' Nr. Clonos')
-    L = list(aa_junction+' Freq. %')
-    columns = [val for pair in zip(K,L) for val in pair]
-
-    D = pd.DataFrame(F,columns=columns, index=['same gene', 'different gene'])
-    D.to_csv(outFN,sep='\t')
-
--- a/cmpb2016/top10_CDR3_inexact_pairing.xml	Sun Mar 18 07:07:39 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-<tool id="top10CDR3inexPair" name="Top10 CDR3 Inexact Pairing" version="0.9">
-<description>Pair CDR3 of top10 clonotypes with similar CDR3</description>
-<command interpreter="python">
-top10_CDR3_inexact_pairing.py "$clonos" "$out"
-</command>
-<inputs>
-<param format="txt" name="clonos" type="data" label="Full List of Clonotypes"/>
-</inputs>
-
-<outputs>
-<data format="tabular" name="out" label="${clonos.name}_top10CDR3_inexactPairing"/>
-</outputs>
-<help>
-Provides the number of clonotypes with AA Junction (CDR3) similar to that of the top-10 clonotypes (maximum 1 AA difference). Distinguishes between clonotypes using the same gene and different gene. Works for both V-Gene and J-Gene clonotypes.
-</help>
-</tool>
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/comp_clono_CDR3.py	Sun Mar 18 07:11:06 2018 -0400
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jun 19 17:33:34 2014
+
+@author: chmaramis
+"""
+
+from __future__ import division
+import numpy as np
+from pandas import *
+import functools as ft
+import sys
+import time
+
+frm = lambda x,y: '{r}/{l}'.format(r=x,l=y)
+
+
+def cdr3Computation(inp_name, out1, t10n, fname):
+
+    frame = DataFrame()
+    tp = read_csv(inp_name, iterator=True, chunksize=5000,sep='\t', index_col=0 )
+    frame = concat([chunk for chunk in tp])
+
+    grouped = frame.groupby(['AA JUNCTION'])
+    x=grouped.size()
+    x1=DataFrame(x.index, columns=['AA JUNCTION'])
+    x1['Reads']=x.values
+    total = sum(x1['Reads'])
+    #x1['Reads/Total'] = ['{r}/{l}'.format(r=pr , l = total) for pr in x1['Reads']]
+    x1['Reads/Total'] = x1['Reads'].map(ft.partial(frm, y=total))
+    x1['Frequency %'] = (100*x1['Reads']/total).map('{:.4f}'.format)
+
+    final = x1.sort_values(by = ['Reads'] , ascending = False)
+
+    final.index=range(1,len(final)+1)
+    final.to_csv(out1 , sep = '\t')
+
+    numofclono = len(final)
+    clust = len(final[final['Reads'] > 1])
+    sing = len (final[final['Reads'] == 1])
+    top10 = final[['AA JUNCTION','Frequency %']].head(10)
+    top10.to_csv(t10n , sep = '\t')
+
+    summary = [[str(top10['AA JUNCTION'].values[0])]]
+    summary.append([top10['Frequency %'].values[0]])
+    summary.append([numofclono])
+    summary.append([clust,'{:.4f}'.format(100*clust/numofclono)])
+    summary.append([sing,'{:.4f}'.format(100*sing/numofclono)])
+
+    ind = ['Dominant Clonotype (CDR3)', 'Frequency', 'Number of Clonotypes' , 'Expanding Clonotypes','Singletons']
+    spl = fname.split('_')
+    col = [spl[0],'%']
+
+    frsum = DataFrame(summary,index = ind, columns = col)
+
+    return frsum
+
+if __name__ == '__main__':
+
+    start=time.time()
+
+    # Parse input arguments
+    inp_name = sys.argv[1]
+    out1 = sys.argv[2]
+    t10n = sys.argv[3]
+    sname = sys.argv[4]
+    fname = sys.argv[5]
+
+    # Execute basic function
+    frsum = cdr3Computation(inp_name,out1,t10n,fname)
+
+    # Save output to CSV files
+    if not frsum.empty:
+        frsum.to_csv(sname, sep = '\t')
+
+    # Print execution time
+    stop=time.time()
+    print('Runtime:' + str(stop-start))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/comp_clono_CDR3.xml	Sun Mar 18 07:11:06 2018 -0400
@@ -0,0 +1,19 @@
+<tool id="compClonoCDR3" name="CDR3 Clonotypes Computation" version="0.9">
+  <description>Compute CDR3 clonotypes</description>
+  <command interpreter="python">comp_clono_CDR3.py $input  $clonos $topcl $summ ${input.name}</command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Filtered-in File"/>
+  </inputs>
+
+<outputs>
+	<data name="clonos" format="tabular" 	label="${input.name}_CDR3"/>
+	<data name="topcl" format="tabular" 	label="${input.name}_top10CDR3"/>
+	<data name="summ" format="tabular" 	label="${input.name}_Summary3"/>
+</outputs>
+
+
+  <help>
+This tool computes the CDR3 clonotypes.
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngs_filtering.py	Sun Mar 18 07:11:06 2018 -0400
@@ -0,0 +1,452 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Sep  4 18:41:42 2013
+
+@author: chmaramis
+"""
+
+from __future__ import division
+import string as strpy
+import numpy as np
+from pandas import *
+from numpy import nan as NA
+import time
+import sys
+
+
+def filter_condition_AAjunction(x):
+    x= x.strip()
+    if ' ' in x:
+        return x.split(' ')[0]
+    else:
+        return x
+
+#-----------frame creation---------------------
+def filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname):
+
+    try:
+        path=inp
+        frame = DataFrame()
+        seqlen = []
+        head = []
+        tp = read_csv(path, iterator=True, chunksize=5000,sep='\t', index_col=0 )
+        frame = concat([chunk for chunk in tp])
+
+        frcol = list(frame.columns)
+        #print frcol[-1]
+        if 'Unnamed' in frcol[-1]:
+            del frcol[-1]
+        frame=frame[frcol]
+
+        frame.index = range(1,len(frame)+1)
+
+        head.append('Total reads of raw data')
+        seqlen.append(len(frame))
+
+        #------------drop nulls--------------------
+        filtered = DataFrame()
+        filtall = DataFrame()
+        summ_df = DataFrame()
+        filtered = frame[isnull(frame['AA JUNCTION']) | isnull(frame['V-GENE and allele'])]
+
+        filtall = filtall.append(filtered)
+        if len(filtall) > 0:
+            filtall.loc[filtered.index,'Reason'] = "NoResults"
+        frame = frame[frame['AA JUNCTION'].notnull()]
+        frame = frame[frame['V-GENE and allele'].notnull()]
+
+        head.append('Not Null CDR3/V')
+        head.append('filter out')
+        seqlen.append(len(frame))
+        seqlen.append(len(filtered))
+        filtered = DataFrame()
+
+        if psorf.startswith('y') or psorf.startswith('Y'):
+
+            cc0=np.array(frame['V-GENE and allele'].unique())
+
+
+            for x in cc0:
+                x1=x.split('*')
+                try:
+                    if (x1[1].find('P')>-1) or (x1[1].find('ORF')>-1):
+                        filtered = filtered.append(frame[frame['V-GENE and allele'] == x])
+                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,NA)
+                    elif x.find('or')>-1:
+                        posa=x.count('or')
+                        x2=x.split('or')
+                        x4=''
+                        genelist=[]
+                        for cnt in range(0, posa+1):
+                            x3=x2[cnt].split('*')
+                            x3[0]=x3[0].strip()#kobei ta space
+                            k=x3[0].split(' ')# holds only TRBV
+                            if cnt==0:
+                                genelist.append(k[1])
+                                x4+=k[1]
+                            elif  ((str(k[1]) in genelist) == False) & (x3[1].find('P')==-1):# check for P in x3
+                                genelist.append(k[1])
+                                x4+=' or '
+                                x4+=k[1]
+                                x3=None
+                                k1=None
+                        genelist=None
+
+                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,x4)
+
+                    else:
+                        s=x1[0].split(' ')
+                        frame['V-GENE and allele']=frame['V-GENE and allele'].replace(x,s[1])
+                except IndexError as e:
+                    print('V-gene is already been formed')
+                    continue
+
+            x=None
+            x1=None
+            s=None
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'P or ORF'
+            frame = frame[frame['V-GENE and allele'].notnull()]
+
+            head.append('Functional TRBV')
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+            filtered = DataFrame()
+
+
+
+        #------------FILTERING for data quality--------------------
+        if con.startswith('y') or con.startswith('Y'):
+            filtered = frame [frame['AA JUNCTION'].str.contains('X') |
+                            frame['AA JUNCTION'].str.contains('#') |
+                            frame['AA JUNCTION'].str.contains('[*]')]
+
+
+
+            frame = frame [~frame['AA JUNCTION'].str.contains('X') &
+                            ~frame['AA JUNCTION'].str.contains('#') &
+                            ~frame['AA JUNCTION'].str.contains('[*]') ]
+
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'X,#,*'
+            head.append('Not Containing X,#,*')
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+            filtered = DataFrame()
+
+        # Set label of functionality column, taking into account current & past IMGT Summary column label
+        functionality_label = 'Functionality'
+        if 'V-DOMAIN Functionality' in frame.columns:
+            functionality_label = 'V-DOMAIN Functionality'
+
+        if prod.startswith('y') or prod.startswith('Y'):
+            filtered = frame[~frame[functionality_label].str.startswith('productive')]
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'not productive'
+
+
+            frame=frame[frame[functionality_label].str.startswith('productive')]
+
+            head.append('Productive')
+            head.append('filter out')
+            seqlen.append(len(frame))
+
+            seqlen.append(len(filtered))
+
+
+        frame['AA JUNCTION'] = frame['AA JUNCTION'].map(filter_condition_AAjunction)
+
+        if CF.startswith('y') or CF.startswith('Y'):
+            if cells == 'TCR':
+                filtered = DataFrame()
+                filtered = frame[~frame['AA JUNCTION'].str.startswith('C')  |
+                        ~frame['AA JUNCTION'].str.endswith('F')]
+
+                filtall = filtall.append(filtered)
+                if len(filtall) > 0:
+                    filtall.loc[filtered.index,'Reason'] = 'Not C..F'
+
+                frame = frame[frame['AA JUNCTION'].str.startswith('C') &
+                            frame['AA JUNCTION'].str.endswith('F')]
+
+                head.append('CDR3 landmarks C-F')
+                head.append('filter out')
+                seqlen.append(len(frame))
+                seqlen.append(len(filtered))
+                filtered = DataFrame()
+            elif cells == 'BCR':
+                filtered = DataFrame()
+                filtered = frame[~frame['AA JUNCTION'].str.startswith('C')  |
+                        ~frame['AA JUNCTION'].str.endswith('W')]
+
+                filtall = filtall.append(filtered)
+                if len(filtall) > 0:
+                    filtall.loc[filtered.index,'Reason'] = 'Not C..W'
+
+                frame = frame[frame['AA JUNCTION'].str.startswith('C') &
+                            frame['AA JUNCTION'].str.endswith('W')]
+
+                head.append('CDR3 landmarks C-W')
+                head.append('filter out')
+                seqlen.append(len(frame))
+                seqlen.append(len(filtered))
+                filtered = DataFrame()
+            else:
+                print('TCR or BCR type')
+
+
+        filtered = DataFrame()
+
+        filtered = frame[frame['V-REGION identity %'] < Vper]
+
+
+        filtall = filtall.append(filtered)
+        if len(filtall) > 0:
+            filtall.loc[filtered.index,'Reason'] = 'identity < {iden}%'.format(iden = Vper)
+
+        frame=frame[frame['V-REGION identity %']>= Vper]
+        head.append('Identity >= {iden}%'.format(iden = Vper))
+        head.append('filter out')
+        seqlen.append(len(frame))
+        seqlen.append(len(filtered))
+
+        head.append('Total filter out A')
+        head.append('Total filter in A')
+        seqlen.append(len(filtall))
+        seqlen.append(len(frame))
+
+        ###############################
+        if Vgene != 'null':
+
+            filtered = DataFrame()
+
+            filtered = frame[frame['V-GENE and allele'] != Vgene]
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'V-GENE != {} '.format(Vgene)
+
+
+            frame = frame[frame['V-GENE and allele'] == Vgene]
+
+
+
+            head.append('V-GENE = {} '.format(Vgene))
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+
+
+
+        ###############################
+        if (laa1 != 'null') or (laa2 != 'null'):
+            if int(laa2) == 0:
+                low = int(laa1)
+                high = 100
+            elif int(laa1) > int(laa2):
+                low = int(laa2)
+                high = int(laa1)
+            else:
+                low = int(laa1)
+                high = int(laa2)
+
+            filtered = DataFrame()
+            criteria = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) < low)
+            criteria2 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) > high)
+            filtered = frame[criteria | criteria2]
+
+            filtall = filtall.append(filtered)
+            if int(laa2)==0:
+                if len(filtall) > 0:
+                    filtall.loc[filtered.index,'Reason'] = 'CDR3 length not bigger than {}'.format(low)
+            else:
+                if len(filtall) > 0:
+                    filtall.loc[filtered.index,'Reason'] = 'CDR3 length not from {} to {}'.format(low,high)
+
+            criteria3 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) >= low)
+            criteria4 = frame['AA JUNCTION'].apply(lambda row: (len(row)-2) <= high)
+            frame = frame[criteria3 & criteria4]
+
+            if int(laa2)==0:
+                head.append('CDR3 length bigger than {}'.format(low))
+            else:
+                head.append('CDR3 length from {} to {} '.format(low,high))
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+
+        ###############################
+        if conaa != 'null':
+            if conaa.islower():
+                conaa = conaa.upper()
+            filtered = DataFrame()
+
+            filtered = frame[~frame['AA JUNCTION'].str.contains(conaa)]
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'CDR3 not containing {}'.format(conaa)
+
+            frame = frame[frame['AA JUNCTION'].str.contains(conaa) ]
+
+            head.append('CDR3 containing {}'.format(conaa))
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+
+
+
+
+        #####------------keep the small J gene name--------------------
+        #frame['J-GENE and allele'] = frame['J-GENE and allele'].map(filter_condition_Jgene)
+        cc2=np.array(frame['J-GENE and allele'].unique())
+
+        for x in cc2:
+            try:
+                if notnull(x):
+                    x1=x.split('*')
+            #        print(x)
+            #        print (x1[0])
+                    trbj=x1[0].split(' ')
+                    frame['J-GENE and allele']=frame['J-GENE and allele'].replace(x,trbj[1])
+            except IndexError as e:
+                print('J-Gene has been formed')
+
+
+
+        x=None
+        x1=None
+
+
+        #------------keep the small D gene name--------------------
+        cc1=np.array(frame['D-GENE and allele'].unique())
+        for x in cc1:
+            try:
+                if notnull(x):
+                    x1=x.split('*')
+                    trbd=x1[0].split(' ')
+                    frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,trbd[1])
+                else:
+                    frame['D-GENE and allele']=frame['D-GENE and allele'].replace(x,'none')
+            except IndexError as e:
+                print('D-gene has been formed')
+
+
+        x=None
+        x1=None
+
+
+        if Jgene != 'null':
+
+            filtered = DataFrame()
+
+            filtered = frame[frame['J-GENE and allele'] != Jgene]
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'J-GENE not {} '.format(Jgene)
+
+
+            frame = frame[frame['J-GENE and allele'] == Jgene]
+
+
+
+            head.append('J-GENE = {} '.format(Jgene))
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+
+
+
+        if Dgene != 'null':
+
+            filtered = DataFrame()
+
+            filtered = frame[frame['D-GENE and allele'] != Dgene]
+
+            filtall = filtall.append(filtered)
+            if len(filtall) > 0:
+                filtall.loc[filtered.index,'Reason'] = 'D-GENE not {} '.format(Dgene)
+
+
+            frame = frame[frame['D-GENE and allele'] == Dgene]
+
+
+
+            head.append('D-GENE = {} '.format(Dgene))
+            head.append('filter out')
+            seqlen.append(len(frame))
+            seqlen.append(len(filtered))
+
+
+        head.append('Total filter out')
+        head.append('Total filter in')
+        seqlen.append(len(filtall))
+        seqlen.append(len(frame))
+        summ_df = DataFrame(index = head)
+        col = fname
+
+        summ_df[col] = seqlen
+        frame=frame.rename(columns = {'V-GENE and allele':'V-GENE',
+        'J-GENE and allele':'J-GENE','D-GENE and allele':'D-GENE'})
+
+
+        frcol.append('Reason')
+
+        filtall = filtall[frcol]
+
+        #--------------out CSV---------------------------
+        frame.index = range(1,len(frame)+1)
+        if not summ_df.empty:
+            summ_df['%'] = (100*summ_df[summ_df.columns[0]]/summ_df[summ_df.columns[0]][summ_df.index[0]]).map(('{:.4f}'.format))
+        return(frame,filtall,summ_df)
+    except KeyError as e:
+        print('This file has no ' + str(e) + ' column')
+        return(frame,filtall,summ_df)
+
+
+if __name__ == '__main__':
+
+    start=time.time()
+
+    # Parse input arguments
+    inp = sys.argv[1]
+    cells = sys.argv[2]
+    psorf = sys.argv[3]
+    con = sys.argv[4]
+    prod = sys.argv[5]
+    CF = sys.argv[6]
+    Vper = float(sys.argv[7])
+    Vgene = sys.argv[8]
+    laa1 = sys.argv[9]
+    conaa = sys.argv[10]
+    filterin = sys.argv[11]
+    filterout = sys.argv[12]
+    Sum_table = sys.argv[13]
+    Jgene = sys.argv[14]
+    Dgene = sys.argv[15]
+    laa2 = sys.argv[16]
+    fname = sys.argv[17]
+
+    # Execute basic function
+    fin,fout,summ = filtering(inp,cells,psorf,con,prod,CF,Vper,Vgene,laa1,laa2,conaa,Jgene,Dgene,fname)
+
+    # Save output to CSV files
+    if not summ.empty:
+        summ.to_csv(Sum_table, sep = '\t')
+    if not fin.empty:
+        fin.to_csv(filterin , sep = '\t')
+    if not fout.empty:
+        fout.to_csv(filterout, sep= '\t')
+
+    # Print execution time
+    stop=time.time()
+    print('Runtime:' + str(stop-start))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngs_filtering.xml	Sun Mar 18 07:11:06 2018 -0400
@@ -0,0 +1,139 @@
+<tool id="ngsFilt" name="IMGT Report Filtering" version="0.9" profile="15.10">
+  <description>Filter IMGT Summary Data</description>
+  <command interpreter="python">ngs_filtering.py $input $TCR_or_BCR $Vfun $spChar $prod $delCF $threshold $Vg.Vgid $clen.cdr3len1 $cdp.cdr3part $filtin $filtout $summ $Jg.Jgid $Dg.Dgid $clen.cdr3len2 $process_id
+
+  </command>
+  <inputs>
+    <param format="txt" name="input" type="data" label="IMGT Summary Output"/>
+	<param format="txt" name="process_id" type="text" label="Process ID"/>
+	<param name="TCR_or_BCR" type="select" label="T-cell or B-cell option">
+    <option value="TCR">T-cell</option>
+    <option value="BCR">B-cell</option>
+    </param>
+	<param name="Vfun" type="select" label="Only Take Into Account Fuctional V-GENE? ">
+	<option value="y">yes</option>
+      <option value="n">no</option>
+      </param>
+
+	<param name="spChar" type="select" label="Only Take Into Account CDR3 with no Special Characters (X,*,#)? ">
+     <option value="y">yes</option>
+	<option value="n">no</option>
+      </param>
+
+  <param name="prod" type="select" label="Only Take Into Account Productive Sequences? ">
+     <option value="y">yes</option>
+	<option value="n">no</option>
+      </param>
+
+  <param name="delCF" type="select" label="Only Take Into Account CDR3 with valid start/end landmarks? ">
+	<option value="y">yes</option>
+      <option value="n">no</option>
+      </param>
+
+    <param name="threshold" type="float" size="3" value="0" min="0" max="100" label="V-REGION identity %" />
+
+  <conditional name="Vg">
+
+	<param name="Vg_select" type="select" label="Select Specific V gene?">
+        <option value="y">Yes</option>
+		<option value="n" selected="true">No</option>
+    </param>
+
+	<when value="y">
+          <param format="txt" name="Vgid" type="text" label="Type V gene"/>
+     </when>
+
+	<when value="n">
+		<param name="Vgid" type="hidden" value="null" />
+     </when>
+
+	 </conditional>
+
+	  <conditional name="Jg">
+
+	<param name="Jg_select" type="select" label="Select Specific J gene?">
+        <option value="y">Yes</option>
+		<option value="n" selected="true">No</option>
+    </param>
+
+	<when value="y">
+          <param format="txt" name="Jgid" type="text" label="Type J gene"/>
+     </when>
+
+	<when value="n">
+		<param name="Jgid" type="hidden" value="null" />
+     </when>
+
+	 </conditional>
+
+	 <conditional name="Dg">
+
+	<param name="Dg_select" type="select" label="Select Specific D gene?">
+        <option value="y">Yes</option>
+		<option value="n" selected="true">No</option>
+    </param>
+
+	<when value="y">
+          <param format="txt" name="Dgid" type="text" label="Type D gene"/>
+     </when>
+
+	<when value="n">
+		<param name="Dgid" type="hidden" value="null" />
+     </when>
+
+	 </conditional>
+
+
+
+	<conditional name="clen">
+
+	<param name="clen_select" type="select" label="Select CDR3 length range?">
+        <option value="y">Yes</option>
+		<option value="n" selected="true">No</option>
+    </param>
+
+	<when value="y">
+		<param name="cdr3len1" type="integer" size="3" value="0" min="0" max="100" label="CDR3 Length Lower Threshold" />
+		<param name="cdr3len2" type="integer" size="3" value="0" min="0" max="100" label="CDR3 Length Upper Threshold" />
+	</when>
+
+	<when value="n">
+		<param name="cdr3len1" type="hidden" value="null" />
+		<param name="cdr3len2" type="hidden" value="null" />
+	</when>
+
+	</conditional>
+
+	<conditional name="cdp">
+
+	<param name="cdp_select" type="select" label="Only select CDR3 containing specific amino-acid sequence?">
+        <option value="y">Yes</option>
+		<option value="n" selected="true">No</option>
+    </param>
+
+	<when value="y">
+    <param format="txt" name="cdr3part" type="text" label="Type specific amino-acid sequence"/>
+	</when>
+
+	<when value="n">
+		<param name="cdr3part" type="hidden" value="null" />
+	</when>
+	</conditional>
+
+  </inputs>
+
+
+  <outputs>
+	<data name="filtin" format="tabular" 	label="${process_id}_filterin"/>
+	<data name="filtout" format="tabular" 	label="${process_id}_filterout"/>
+	<data name="summ" format="tabular" 	label="${process_id}_Summary"/>
+
+
+  </outputs>
+
+
+  <help>
+This tool filters IMGT Summary Data based on a combination of criteria.
+  </help>
+
+</tool>