annotate shm_csr.xml @ 72:c787e79e8b90 draft

Uploaded
author davidvanzessen
date Tue, 05 Feb 2019 03:26:41 -0500
parents 2649a821162d
children 825c07055d73
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
1 <tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
2 <description></description>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
3 <requirements>
69
fefaaa19f30b Uploaded
davidvanzessen
parents: 67
diff changeset
4 <requirement type="package" version="2.7">python</requirement>
67
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
5 <requirement type="package" version="1.16.0">numpy</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
6 <requirement type="package" version="1.2.0">xlrd</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
7 <requirement type="package" version="3.0.0">r-ggplot2</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
8 <requirement type="package" version="1.4.3">r-reshape2</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
9 <requirement type="package" version="0.5.0">r-scales</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
10 <requirement type="package" version="3.4_5">r-seqinr</requirement>
ba33b94637ca Uploaded
davidvanzessen
parents: 60
diff changeset
11 <requirement type="package" version="1.11.4">r-data.table</requirement>
72
c787e79e8b90 Uploaded
davidvanzessen
parents: 71
diff changeset
12 <requirement type="package" version="0.4.5">changeo</requirement>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
13 </requirements>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
14 <command interpreter="bash">
49
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
15 #if str ( $filter_unique.filter_unique_select ) == "remove":
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
16 wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
17 #else:
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
18 wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
19 #end if
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
20 </command>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
21 <inputs>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
22 <param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
23 <param name="empty_region_filter" type="select" label="Sequence starts at" help="" >
1
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
24 <option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
25 <option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
26 <option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
27 <option value="FR2">FR2: include CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
28 </param>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
29 <param name="functionality" type="select" label="Functionality filter" help="" >
1
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
30 <option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
31 <option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option>
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
32 <option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
33 </param>
51
8fa8836bd605 Uploaded
davidvanzessen
parents: 50
diff changeset
34 <conditional name="filter_unique">
49
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
35 <param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example.">
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
36 <option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option>
60
3b5fe323f563 Uploaded
davidvanzessen
parents: 59
diff changeset
37 <option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option>
49
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
38 <option value="keep">Keep uniques (Based on nucleotide sequence + C)</option>
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
39 <option value="no">No</option>
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
40 </param>
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
41 <when value="remove">
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
42 <param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/>
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
43 </when>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
44 <when value="keep"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
45 <when value="no"></when>
49
f5fe63533c58 Uploaded
davidvanzessen
parents: 47
diff changeset
46 </conditional>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
47 <param name="unique" type="select" label="Remove duplicates based on" help="" >
14
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
48 <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
49 <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
50 <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
51 <option value="CDR3.IMGT.AA">CDR3 (AA)</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
52
29
80c4eebf7bc9 Uploaded
davidvanzessen
parents: 28
diff changeset
53 <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
54 <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
13
933fb21568ce Uploaded
davidvanzessen
parents: 11
diff changeset
55 <option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
56 <option value="CDR3.IMGT.seq">CDR3 (nt)</option>
10
4b695ca65213 Uploaded
davidvanzessen
parents: 6
diff changeset
57 <option value="Sequence.ID" selected="true">Don't remove duplicates</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
58 </param>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
59 <conditional name="class_filter_cond">
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
60 <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
61 <option value="70_70" selected="true">>70% class and >70% subclass</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
62 <option value="60_55">>60% class and >55% subclass</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
63 <option value="70_0">>70% class</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
64 <option value="60_0">>60% class</option>
53
3be28ac82909 Uploaded
davidvanzessen
parents: 51
diff changeset
65 <option value="19_0">>19% class</option>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
66 <option value="101_101">Do not assign (sub)class</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
67 </param>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
68 <when value="70_70"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
69 <when value="60_55"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
70 <when value="70_0"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
71 <when value="60_0"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
72 <when value="19_0"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
73 <when value="101_101"></when>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
74 </conditional>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
75 <conditional name="naive_output_cond">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
76 <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
77 <option value="yes">Yes</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
78 <option value="no" selected="true">No</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
79 </param>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
80 <when value="yes"></when>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
81 <when value="no"></when>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
82 </conditional>
5
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
83 <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
84 <option value="yes">Yes</option>
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
85 <option value="no" selected="true">No</option>
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
86 </param>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
87 </inputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
88 <outputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
89 <data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 30
diff changeset
90 <data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" >
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
91 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
92 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
93 </data>
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 30
diff changeset
94 <data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" >
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
95 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
96 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
97 </data>
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 30
diff changeset
98 <data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" >
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
99 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
100 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
101 </data>
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 30
diff changeset
102 <data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" >
6
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
103 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
104 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
105 </data>
31
fe44a905aee9 Uploaded
davidvanzessen
parents: 30
diff changeset
106 <data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" >
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
107 <filter>naive_output_cond['naive_output'] == "yes"</filter>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
108 <filter>class_filter_cond['class_filter'] == "101_101"</filter>
6
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
109 </data>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
110 </outputs>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
111 <tests>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
112 <test>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
113 <param name="fast" value="yes"/>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
114 <output name="out_file" file="test1.html"/>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
115 </test>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
116 </tests>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
117 <help>
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
118 <![CDATA[
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
119 **References**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
120
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
121 Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_]
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
122
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
123 .. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
124
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
125 Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_]
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
126
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
127 .. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
128
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
129 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
130
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
131 **Input files**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
132
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
133 IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
134
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
135 .. class:: infomark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
136
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
137 Note: Files can be uploaded by using “get data” and “upload file” and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
138
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
139 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
140
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
141 **Sequence starts at**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
142
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
143 Identifies the region which will be included in the analysis (analysed region)
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
144
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
145 - Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded.
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
146 - Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded.
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
147 - All other filtering/analysis is based on the analysed region.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
148
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
149 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
150
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
151 **Functionality filter**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
152
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
153 Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
154
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
155 **Filter unique sequences**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
156
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
157 *Remove unique:*
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
158
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
159
30
33a7c49d48a7 Uploaded
davidvanzessen
parents: 29
diff changeset
160 This filter consists of two different steps.
33a7c49d48a7 Uploaded
davidvanzessen
parents: 29
diff changeset
161
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
162 Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
163
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
164 Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class).
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
165
30
33a7c49d48a7 Uploaded
davidvanzessen
parents: 29
diff changeset
166 .. class:: infomark
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
167
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
168 This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
169
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
170 *Keep unique:*
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
171
30
33a7c49d48a7 Uploaded
davidvanzessen
parents: 29
diff changeset
172 Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
173
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
174 Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
175
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
176 +--------------------------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
177 | unique filter |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
178 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
179 | values | remove | keep |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
180 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
181 | A | A | A |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
182 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
183 | A | B | B |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
184 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
185 | B | D | C |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
186 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
187 | B | | D |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
188 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
189 | C | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
190 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
191 | D | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
192 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
193 | D | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
194 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
195
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
196 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
197
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
198 **Remove duplicates based on**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
199
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
200 Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
201
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
202 .. class:: infomark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
203
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
204 Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
205
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
206 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
207
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
208 **Human Class/Subclass filter**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
209
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
210 .. class:: warningmark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
211
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
212 Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
213
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
214 The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below).
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
215
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
216 The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
217
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
218 *Chunk hit percentage*: The percentage of the chunks that is aligned
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
219
47
64711f461c8e Uploaded
davidvanzessen
parents: 39
diff changeset
220 *Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass.
64711f461c8e Uploaded
davidvanzessen
parents: 39
diff changeset
221 The option “>25% class” can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of your sequences and the length of your sequence is not long enough to assign the subclasses.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
222
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
223 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
224
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
225 **Output new IMGT archives per class into your history?**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
226
39
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
227 If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline.
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
228
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
229 -----
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
230
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
231 **Execute**
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
232
a24f8c93583a Uploaded
davidvanzessen
parents: 34
diff changeset
233 Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found.
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
234
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
235 ]]>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
236 </help>
56
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
237 <citations>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
238 <citation type="doi">10.1093/nar/gks457</citation>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
239 <citation type="doi">10.1093/bioinformatics/btv359</citation>
ee807645b224 Uploaded
davidvanzessen
parents: 53
diff changeset
240 </citations>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
241 </tool>