annotate shm_csr.xml @ 18:949a30f04d9b draft

Uploaded
author davidvanzessen
date Mon, 28 Nov 2016 04:41:22 -0500
parents 61d0a6318711
children fff3c83ec9b8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
1 <tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
2 <description></description>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
3 <command interpreter="bash">
6
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
4 wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $filter_uniques $class_filter $empty_region_filter $fast
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
5 </command>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
6 <inputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
7 <param name="in_file" type="data" label="IMGT zip file to be analysed" />
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
8 <param name="empty_region_filter" type="select" label="Sequence starts at" help="" >
1
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
9 <option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
10 <option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
11 <option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
12 <option value="FR2">FR2: include CDR2,FR3 in filters</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
13 </param>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
14 <param name="functionality" type="select" label="Functionality filter" help="" >
1
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
15 <option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
16 <option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option>
faae21ba5c63 Uploaded
davidvanzessen
parents: 0
diff changeset
17 <option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
18 </param>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
19 <param name="filter_uniques" type="select" label="Filter unique sequences" help="See below for an example.">
10
4b695ca65213 Uploaded
davidvanzessen
parents: 6
diff changeset
20 <option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
21 <option value="keep">Keep uniques (Based on nucleotide sequence + C)</option>
10
4b695ca65213 Uploaded
davidvanzessen
parents: 6
diff changeset
22 <option value="no">No</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
23 </param>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
24 <param name="unique" type="select" label="Remove duplicates based on" help="" >
14
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
25 <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
26 <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
27 <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
59765d2c8890 Uploaded
davidvanzessen
parents: 13
diff changeset
28 <option value="CDR3.IMGT.AA">CDR3 (AA)</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
29
13
933fb21568ce Uploaded
davidvanzessen
parents: 11
diff changeset
30 <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3.nt.Seq, C region</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
31 <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
13
933fb21568ce Uploaded
davidvanzessen
parents: 11
diff changeset
32 <option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
33 <option value="CDR3.IMGT.seq">CDR3 (nt)</option>
10
4b695ca65213 Uploaded
davidvanzessen
parents: 6
diff changeset
34 <option value="Sequence.ID" selected="true">Don't remove duplicates</option>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
35 </param>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
36 <conditional name="class_filter_cond">
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
37 <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
38 <option value="70_70" selected="true">>70% class and >70% subclass</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
39 <option value="60_55">>60% class and >55% subclass</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
40 <option value="70_0">>70% class</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
41 <option value="60_0">>60% class</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
42 <option value="101_101">Do not assign (sub)class</option>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
43 </param>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
44 </conditional>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
45 <conditional name="naive_output_cond">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
46 <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
47 <option value="yes">Yes</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
48 <option value="no" selected="true">No</option>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
49 </param>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
50 </conditional>
5
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
51
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
52
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
53 <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
54 <option value="yes">Yes</option>
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
55 <option value="no" selected="true">No</option>
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
56 </param>
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
57
012a738edf5a Uploaded
davidvanzessen
parents: 1
diff changeset
58
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
59 </inputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
60 <outputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
61 <data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
62 <data format="imgt_archive" name="naive_output_ca" label = "Naive CA input data from ${in_file.name}" >
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
63 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
64 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
65 </data>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
66 <data format="imgt_archive" name="naive_output_cg" label = "Naive CG input data from ${in_file.name}" >
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
67 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
68 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
69 </data>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
70 <data format="imgt_archive" name="naive_output_cm" label = "Naive CM input data from ${in_file.name}" >
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
71 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
72 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
73 </data>
6
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
74 <data format="imgt_archive" name="naive_output_ce" label = "Naive CE input data from ${in_file.name}" >
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
75 <filter>naive_output_cond['naive_output'] == "yes"</filter>
18
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
76 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
77 </data>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
78 <data format="imgt_archive" name="naive_output_ca" label = "Naive input data from ${in_file.name}" >
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
79 <filter>naive_output_cond['naive_output'] == "yes"</filter>
949a30f04d9b Uploaded
davidvanzessen
parents: 15
diff changeset
80 <filter>class_filter_cond['class_filter'] == "101_101"</filter>
6
2ddb9a21f635 Uploaded
davidvanzessen
parents: 5
diff changeset
81 </data>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
82 </outputs>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
83 <citations>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
84 <citation type="doi">10.1093/nar/gks457</citation>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
85 <citation type="doi">10.1093/bioinformatics/btv359</citation>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
86 </citations>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
87 <help>
15
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
88 <![CDATA[
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
89 **References**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
90
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
91 Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_]
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
92
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
93 .. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
94
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
95 Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_]
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
96
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
97 .. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
98
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
99 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
100
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
101 **Input files**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
102
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
103 IMGT/HighV-QUEST .zip and .txz are accepted as input files.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
104
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
105 .. class:: infomark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
106
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
107 Note: Files can be uploaded by using “get data” and “upload file” and selecting “IMGT archive“ as a file type.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
108
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
109 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
110
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
111 **Sequence starts at**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
112
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
113 Identifies the region which will be included in the analysis (analysed region)
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
114
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
115 - Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
116 - Sequences containing an ambiguous base in the analysed region are excluded
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
117 - All other filtering/analysis is based on the analysed region
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
118
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
119 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
120
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
121 **Functionality filter**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
122
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
123 Allows filtering on productive rearrangement, unproductive rearrangements or both based on the assignment provided by IMGT.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
124
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
125 **Filter unique sequences**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
126
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
127 *Remove unique:*
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
128
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
129 This filter consists of two different steps.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
130
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
131 Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
132
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
133 Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
134
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
135 .. class:: infomark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
136
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
137 Note: This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
138
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
139 *Keep unique:*
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
140
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
141 Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
142
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
143 Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
144
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
145 +--------------------------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
146 | unique filter |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
147 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
148 | values | remove | keep |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
149 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
150 | A | A | A |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
151 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
152 | A | B | B |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
153 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
154 | B | D | C |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
155 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
156 | B | | D |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
157 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
158 | C | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
159 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
160 | D | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
161 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
162 | D | | |
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
163 +--------+--------+--------+
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
164
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
165 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
166
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
167 **Remove duplicates based on**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
168
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
169 Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
170
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
171 .. class:: infomark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
172
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
173 Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influence results.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
174
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
175 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
176
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
177 **Human Class/Subclass filter**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
178
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
179 .. class:: warningmark
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
180
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
181 Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the "do not assign (sub)class" option to prevent errors when running the pipeline.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
182
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
183 The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below).
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
184
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
185 The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
186
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
187 *Chunk hit percentage*: the percentage of the chunks that is aligned
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
188
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
189 *Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
190
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
191 -----
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
192
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
193 **Output new IMGT archives per class into your history?**
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
194
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
195 If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as IGGalaxy.
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
196
61d0a6318711 Uploaded
davidvanzessen
parents: 14
diff changeset
197 ]]>
0
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
198 </help>
c33d93683a09 Uploaded
davidvanzessen
parents:
diff changeset
199 </tool>