Mercurial > repos > iuc > hisat2
comparison hisat2.xml @ 0:5bafe7d7a55e draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/hisat2 commit 1e8d3feeb391aabcfff2338b57913138deea51ec-dirty
author | iuc |
---|---|
date | Sat, 10 Oct 2015 14:54:39 -0400 |
parents | |
children | 6d4d39720545 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5bafe7d7a55e |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="hisat2" name="HISAT2" version="1.0.0"> | |
3 <description>A fast and sensitive alignment program</description> | |
4 <macros> | |
5 <import>hisat2_macros.xml</import> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="2.0">hisat</requirement> | |
9 <requirement type="package" version="1.2">samtools</requirement> | |
10 </requirements> | |
11 <stdio> | |
12 <regex match="hisat2-align exited with value 1" source="both" level="fatal"/> | |
13 <exit_code range="1:" /> | |
14 </stdio> | |
15 <version_command>hisat2 --version</version_command> | |
16 <command> | |
17 <![CDATA[ | |
18 #if str($spliced_options.spliced_options_selector) == "advanced" and str($spliced_options.known_splice_gtf) != 'None': | |
19 ln -s "${spliced_options.known_splice_gtf}" splice_sites.gtf && | |
20 extract_splice_sites.py splice_sites.gtf > splice_sites.txt && | |
21 #end if | |
22 #if $reference_genome.reference_genome_source == "history": | |
23 ln -s "$reference_genome.history_item" genome.fa && | |
24 hisat2-build genome.fa genome && | |
25 #set index_path = 'genome' | |
26 #else: | |
27 #set index_path = $reference_genome.index.fields.path | |
28 #end if | |
29 hisat2 -p \${GALAXY_SLOTS:-1} -x "${index_path}" | |
30 #if str($input_format.paired.paired_selector) == 'paired': | |
31 -1 "${reads_f}" -2 "${reads_r}" | |
32 @paired_end_options@ | |
33 #else if str($input_format.paired.paired_selector) == 'paired_collection': | |
34 -1 "${input_format.paired.reads.forward}" -2 "${input_format.paired.reads.reverse}" | |
35 @paired_end_options@ | |
36 #else: | |
37 -U "${input_format.paired.reads}" | |
38 #end if | |
39 #if $input_format.input_format_selector == 'fasta': | |
40 -f | |
41 #end if | |
42 #if $max_primary: | |
43 -k ${max_primary} | |
44 #end if | |
45 #if str($input_options.input_options_selector) == "advanced": | |
46 #if int( $input_options.skip ) > 0: | |
47 -s ${input_options.skip} | |
48 #end if | |
49 #if int( $input_options.stop_after ) > 0: | |
50 -u ${input_options.stop_after} | |
51 #end if | |
52 -5 ${input_options.trim_five} -3 ${input_options.trim_three} | |
53 #end if | |
54 #if str($scoring_options.scoring_options_selector) == "advanced": | |
55 --ma ${scoring_options.match_bonus} --np ${scoring_options.ambiguous_penalty} | |
56 --mp ${scoring_options.max_mismatch},${scoring_options.min_mismatch} | |
57 --rdg ${scoring_options.read_open_penalty},${scoring_options.read_extend_penalty} | |
58 --rfg ${scoring_options.ref_open_penalty},${scoring_options.ref_extend_penalty} | |
59 --sp ${scoring_options.soft_clip_penalty_max},${scoring_options.soft_clip_penalty_min} | |
60 --score-min ${scoring_options.function_type},${scoring_options.constant_term},${scoring_options.coefficient} | |
61 #end if | |
62 #if str($alignment_options.alignment_options_selector) == "advanced": | |
63 --n-ceil ${alignment_options.function_type},${alignment_options.constant_term},${alignment_options.coefficient} | |
64 ${alignment_options.skip_forward} ${alignment_options.skip_reverse} | |
65 #end if | |
66 #if str($spliced_options.spliced_options_selector) == "advanced": | |
67 --pen-cansplice ${spliced_options.canonical_penalty} --pen-noncansplice ${spliced_options.noncanonical_penalty} | |
68 --pen-canintronlen ${spliced_options.function_type},${spliced_options.constant_term},${spliced_options.coefficient} | |
69 --pen-noncanintronlen ${spliced_options.nc_function_type},${spliced_options.nc_constant_term},${spliced_options.nc_coefficient} | |
70 #if str($spliced_options.known_splice_gtf) != 'None': | |
71 --known-splicesite-infile splice_sites.txt | |
72 #end if | |
73 ${spliced_options.no_spliced_alignment} | |
74 --min-intronlen ${spliced_options.min_intron} | |
75 --max-intronlen ${spliced_options.max_intron} | |
76 ${spliced_options.tma} | |
77 @strandedness_parameters@ | |
78 #end if | |
79 #if str($paired_options.paired_options_selector) == "advanced": | |
80 --minins ${paired_options.minins} --maxins ${paired_options.maxins} ${paired_options.no_mixed} ${paired_options.no_discordant} | |
81 ${paired_options.dovetail} ${paired_options.contain} ${paired_options.overlap} | |
82 #end if | |
83 | samtools view -bS - | samtools sort - -o hsbam > "${output_alignments}" | |
84 ]]> | |
85 </command> | |
86 <inputs> | |
87 <conditional name="input_format"> | |
88 <param label="Input data format" name="input_format_selector" type="select"> | |
89 <option selected="selected" value="fastq">FASTQ</option> | |
90 <option value="fasta">FASTA</option> | |
91 </param> | |
92 <when value="fasta"> | |
93 <expand ftype="fasta" macro="paired_input_conditional" /> | |
94 </when> | |
95 <when value="fastq"> | |
96 <expand ftype="fastq" macro="paired_input_conditional" /> | |
97 </when> | |
98 </conditional> | |
99 <conditional name="reference_genome"> | |
100 <param help="Built-in references were created using default options" label="Source for the reference genome to align against" name="reference_genome_source" type="select"> | |
101 <option selected="True" value="indexed">Use a built-in genome</option> | |
102 <option value="history">Use a genome from history</option> | |
103 </param> | |
104 <when value="indexed"> | |
105 <param help="If your genome of interest is not listed, contact the Galaxy team" label="Select a reference genome" name="index" type="select"> | |
106 <options from_data_table="hisat2_indexes"> | |
107 <filter column="2" type="sort_by" /> | |
108 <validator message="No genomes are available for the selected input dataset" type="no_options" /> | |
109 </options> | |
110 </param> | |
111 </when> | |
112 <when value="history"> | |
113 <param format="fasta" label="Select the reference genome" metadata_name="dbkey" name="history_item" type="data" /> | |
114 </when> | |
115 </conditional> | |
116 <param argument="-k" default="5" help="Search for at most K distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds K, whichever happens first." label="Primary alignments" name="max_primary" optional="True" type="integer" value="5" /> | |
117 <conditional name="alignment_options"> | |
118 <param label="Alignment options" name="alignment_options_selector" type="select"> | |
119 <option value="defaults">Use default values</option> | |
120 <option value="advanced">Specify alignment parameters</option> | |
121 </param> | |
122 <when value="defaults" /> | |
123 <when value="advanced"> | |
124 <expand macro="function" helptext="Sets a function governing the maximum number of ambiguous characters" /> | |
125 <param argument="--ignore-quals" falsevalue="" help="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value. I.e. input is treated as though all quality values are high. This is also the default behavior when the input doesn't specify quality values." label="Ignore quality values" name="ignore_quals" truevalue="--ignore-quals" type="boolean" /> | |
126 <param argument="--nofw" falsevalue="" help="If --nofw is specified, hisat will not attempt to align unpaired reads to the forward (Watson) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand." label="Skip forward strand of reference" name="skip_forward" truevalue="--nofw" type="boolean" /> | |
127 <param argument="--norc" falsevalue="" help="If --norc is specified, hisat will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand." label="Skip reference strand of reference" name="skip_reverse" truevalue="--norc" type="boolean" /> | |
128 </when> | |
129 </conditional> | |
130 <conditional name="input_options"> | |
131 <param label="Input options" name="input_options_selector" type="select"> | |
132 <option value="defaults">Use default values</option> | |
133 <option value="advanced">Specify input parameters</option> | |
134 </param> | |
135 <when value="defaults" /> | |
136 <when value="advanced"> | |
137 <param argument="-s" default="0" min="0" label="Skip the first N reads or pairs in the input" name="skip" type="integer" value="0" /> | |
138 <param argument="-u" default="0" min="0" help="Align the first N reads or read pairs from the input (after the first N reads or pairs have been skipped), then stop." label="Stop after aligning N reads" name="stop_after" type="integer" value="0" /> | |
139 <param argument="-5" default="0" min="0" help="Trim N bases from 5' (left) end of each read before alignment" label="Trim 5' end" name="trim_five" type="integer" value="0" /> | |
140 <param argument="-3" default="0" min="0" help="Trim N bases from 3' (right) end of each read before alignment" label="Trim 3' end" name="trim_three" type="integer" value="0" /> | |
141 </when> | |
142 </conditional> | |
143 <conditional name="scoring_options"> | |
144 <param label="Scoring options" name="scoring_options_selector" type="select"> | |
145 <option value="defaults">Use default values</option> | |
146 <option value="advanced">Specify scoring parameters</option> | |
147 </param> | |
148 <when value="defaults" /> | |
149 <when value="advanced"> | |
150 <expand macro="function" helptext="Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report)." /> | |
151 <param argument="--ma" default="2" help="In local mode N is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in end-to-end mode" label="Set match bonus" name="match_bonus" type="integer" value="2" /> | |
152 <param argument="--mp" default="6" help="Sets the maximum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value." label="Maximum mismatch penalty" name="max_mismatch" type="integer" value="6" /> | |
153 <param argument="--mp" default="2" help="Sets the minimum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value." label="Minimum mismatch penalty" name="min_mismatch" type="integer" value="2" /> | |
154 <param argument="--np" default="1" help="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N" label="Ambiguous read penalty" name="ambiguous_penalty" type="integer" value="1" /> | |
155 <param argument="--sp" default="2" help="Sets the maximum (MX) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value." label="Maximum soft-clipping penalty" name="soft_clip_penalty_max" type="integer" value="2" /> | |
156 <param argument="--sp" default="1" help="Sets the minimum (MN) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value." label="Minimum soft-clipping penalty" name="soft_clip_penalty_min" type="integer" value="1" /> | |
157 <param argument="--rdg" default="5" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" label="Read gap open penalty." name="read_open_penalty" type="integer" value="5" /> | |
158 <param argument="--rdg" default="3" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" label="Read gap extend penalty." name="read_extend_penalty" type="integer" value="3" /> | |
159 <param argument="--rfg" default="5" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" label="Reference gap open penalty." name="ref_open_penalty" type="integer" value="5" /> | |
160 <param argument="--rfg" default="3" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" label="Reference gap extend penalty." name="ref_extend_penalty" type="integer" value="3" /> | |
161 </when> | |
162 </conditional> | |
163 <conditional name="spliced_options"> | |
164 <param label="Spliced alignment parameters" name="spliced_options_selector" type="select"> | |
165 <option value="defaults">Use default values</option> | |
166 <option value="advanced">Specify spliced alignment parameters</option> | |
167 </param> | |
168 <when value="defaults" /> | |
169 <when value="advanced"> | |
170 <param label="Penalty for canonical splice sites" name="canonical_penalty" type="integer" value="0" /> | |
171 <param label="Penalty for non-canonical splice sites" name="noncanonical_penalty" type="integer" value="3" /> | |
172 <param display="radio" label="Penalty for long introns with canonical splice sites" name="function_type" type="select"> | |
173 <option value="C">Constant</option> | |
174 <option value="L">Linear [f(x) = y + z * x]</option> | |
175 <option value="S">Square root [f(x) = y + z * x²]</option> | |
176 <option value="G">Natural logarithm [f(x) = y + z * log(x)]</option> | |
177 </param> | |
178 <param help="Constant term for long canonical introns" label="Constant term (y)" name="constant_term" type="integer" value="0" /> | |
179 <param help="Coefficient for long canonical introns" label="Coefficient (z)" name="coefficient" type="integer" value="0" /> | |
180 <param display="radio" label="Penalty for long introns with noncanonical splice sites." name="nc_function_type" type="select"> | |
181 <option value="C">Constant</option> | |
182 <option value="L">Linear [f(x) = y + z * x]</option> | |
183 <option value="S">Square root [f(x) = y + z * x²]</option> | |
184 <option selected="selected" value="G">Natural logarithm [f(x) = y + z * log(x)]</option> | |
185 </param> | |
186 <param help="Constant term for long non-canonical introns" label="Constant term (y)" name="nc_constant_term" type="integer" value="-8" /> | |
187 <param help="Coefficient for long non-canonical introns" label="Coefficient (z)" name="nc_coefficient" type="integer" value="1" /> | |
188 <param label="Minimum intron length" name="min_intron" type="integer" value="20" /> | |
189 <param label="Maximum intron length" name="max_intron" type="integer" value="500000" /> | |
190 <param argument="--rna-strandness" name="rna_strandness" type="select" label="Specify strand-specific information" | |
191 help="'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript."> | |
192 <option value="">FR Unstranded</option> | |
193 <option value="R">First Strand (R/RF)</option> | |
194 <option value="F">Second Strand (F/FR)</option> | |
195 </param> | |
196 <param argument="--no-spliced-alignment" name="ignore_quals" type="boolean" truevalue="--no-spliced-alignment" falsevalue="" label="Disable spliced alignment" /> | |
197 <param format="gtf" label="GTF file with known splice sites" name="known_splice_gtf" optional="True" type="data" /> | |
198 <param display="radio" label="Transcriptome assembly reporting" name="tma" type="select"> | |
199 <option value="">Use default reporting.</option> | |
200 <option value="--tmo">Report only those alignments within known transcripts.</option> | |
201 <option value="--dta">Report alignments tailored for transcript assemblers including StringTie.</option> | |
202 <option value="--dta-cufflinks">Report alignments tailored specifically for Cufflinks.</option> | |
203 </param> | |
204 </when> | |
205 </conditional> | |
206 <conditional name="paired_options"> | |
207 <param label="Paired alignment parameters" name="paired_options_selector" type="select"> | |
208 <option value="defaults">Use default values</option> | |
209 <option value="advanced">Specify paired alignment parameters</option> | |
210 </param> | |
211 <when value="defaults" /> | |
212 <when value="advanced"> | |
213 <param argument="--minins" help="The minimum fragment length for valid paired-end alignments. 0 sets no minimum." label="Minimum fragment length" name="minins" type="integer" value="0" /> | |
214 <param argument="--maxins" help="The maximum fragment length for valid paired-end alignments" label="Maximum fragment length" name="maxins" type="integer" value="500" /> | |
215 <param argument="--no-mixed" falsevalue="" help="By default, when hisat2 cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates" label="Disable finding alignments for individual mates." name="no_mixed" truevalue="--no-mixed" type="boolean" /> | |
216 <param argument="--no-discordant" falsevalue="" help="By default, hisat2 looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints" label="Disable looking for discordant alignments." name="no_discordant" truevalue="--no-discordant" type="boolean" /> | |
217 <param argument="--dovetail" falsevalue="" help="If the mates "dovetail", that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant." label="Mates not dovetail" name="dovetail" truevalue="--dovetail" type="boolean" /> | |
218 <param argument="--no-contain" falsevalue="" help="If one mate alignment contains the other, consider that to be non-concordant." label="Mates cannot contain others" name="contain" truevalue="--no-contain" type="boolean" /> | |
219 <param argument="--no-overlap" falsevalue="" help="If one mate alignment overlaps the other at all, consider that to be non-concordant." label="Mates cannot overlap" name="overlap" truevalue="--no-overlap" type="boolean" /> | |
220 </when> | |
221 </conditional> | |
222 </inputs> | |
223 <outputs> | |
224 <data format="bam" name="output_alignments" /> | |
225 </outputs> | |
226 <tests> | |
227 <test> | |
228 <param name="input_format_selector" value="fastq" /> | |
229 <param name="paired_selector" value="paired" /> | |
230 <param name="reference_genome_source" value="history" /> | |
231 <param ftype="fasta" name="history_item" value="phiX.fa" /> | |
232 <param ftype="fastqsanger" name="reads_f" value="hisat_input_1_forward.fastq" /> | |
233 <param ftype="fastqsanger" name="reads_r" value="hisat_input_1_reverse.fastq" /> | |
234 <output file="hisat_output_1.bam" ftype="bam" name="output_alignments" /> | |
235 </test> | |
236 <test> | |
237 <param name="input_format_selector" value="fastq" /> | |
238 <param name="paired_selector" value="paired" /> | |
239 <param name="reference_genome_source" value="history" /> | |
240 <param ftype="fasta" name="history_item" value="phiX.fa" /> | |
241 <param name="input_options_selector" value="advanced" /> | |
242 <param name="trim_three" value="15" /> | |
243 <param name="trim_five" value="15" /> | |
244 <param ftype="fastqsanger" name="reads_f" value="hisat_input_2_forward.fastq" /> | |
245 <param ftype="fastqsanger" name="reads_r" value="hisat_input_2_reverse.fastq" /> | |
246 <output file="hisat_output_2.bam" ftype="bam" name="output_alignments" /> | |
247 </test> | |
248 <test> | |
249 <param name="input_format_selector" value="fastq" /> | |
250 <param name="paired_selector" value="paired" /> | |
251 <param name="reference_genome_source" value="history" /> | |
252 <param name="history_item" value="phiX.fa" ftype="fasta" /> | |
253 <param name="input_options_selector" value="advanced" /> | |
254 <param name="trim_three" value="15" /> | |
255 <param name="trim_five" value="15" /> | |
256 <param name="reads_f" ftype="fastqsanger" value="hisat_input_2_forward.fastq" /> | |
257 <param name="reads_r" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" /> | |
258 <param name="paired_end_options_selector" value="advanced" /> | |
259 <param name="no_mixed" value="True" /> | |
260 <param name="no_discordant" value="True" /> | |
261 <output name="output_alignments" ftype="bam" file="hisat_output_3.bam" /> | |
262 </test> | |
263 </tests> | |
264 <help> | |
265 <![CDATA[ | |
266 Introduction | |
267 ============ | |
268 | |
269 What is HISAT? | |
270 -------------- | |
271 | |
272 `HISAT <http://ccb.jhu.edu/software/hisat>`__ is a fast and sensitive | |
273 spliced alignment program. As part of HISAT, we have developed a new | |
274 indexing scheme based on the Burrows-Wheeler transform | |
275 (`BWT <http://en.wikipedia.org/wiki/Burrows-Wheeler_transform>`__) and | |
276 the `FM index <http://en.wikipedia.org/wiki/FM-index>`__, called | |
277 hierarchical indexing, that employs two types of indexes: (1) one global | |
278 FM index representing the whole genome, and (2) many separate local FM | |
279 indexes for small regions collectively covering the genome. Our | |
280 hierarchical index for the human genome (about 3 billion bp) includes | |
281 ~48,000 local FM indexes, each representing a genomic region of | |
282 ~64,000bp. As the basis for non-gapped alignment, the FM index is | |
283 extremely fast with a low memory footprint, as demonstrated by | |
284 `Bowtie <http://bowtie-bio.sf.net>`__. In addition, HISAT provides | |
285 several alignment strategies specifically designed for mapping different | |
286 types of RNA-seq reads. All these together, HISAT enables extremely fast | |
287 and sensitive alignment of reads, in particular those spanning two exons | |
288 or more. As a result, HISAT is much faster >50 times than | |
289 `TopHat2 <http://ccb.jhu.edu/software/tophat>`__ with better alignment | |
290 quality. Although it uses a large number of indexes, the memory | |
291 requirement of HISAT is still modest, approximately 4.3 GB for human. | |
292 HISAT uses the `Bowtie2 <http://bowtie-bio.sf.net/bowtie2>`__ | |
293 implementation to handle most of the operations on the FM index. In | |
294 addition to spliced alignment, HISAT handles reads involving indels and | |
295 supports a paired-end alignment mode. Multiple processors can be used | |
296 simultaneously to achieve greater alignment speed. HISAT outputs | |
297 alignments in `SAM <http://samtools.sourceforge.net/SAM1.pdf>`__ format, | |
298 enabling interoperation with a large number of other tools (e.g. | |
299 `SAMtools <http://samtools.sourceforge.net>`__, | |
300 `GATK <http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit>`__) | |
301 that use SAM. HISAT is distributed under the `GPLv3 | |
302 license <http://www.gnu.org/licenses/gpl-3.0.html>`__, and it runs on | |
303 the command line under Linux, Mac OS X and Windows. | |
304 | |
305 Running HISAT | |
306 ============= | |
307 | |
308 Reporting | |
309 --------- | |
310 | |
311 The reporting mode governs how many alignments HISAT looks for, and how | |
312 to report them. | |
313 | |
314 In general, when we say that a read has an alignment, we mean that it | |
315 has a `valid | |
316 alignment <#valid-alignments-meet-or-exceed-the-minimum-score-threshold>`__. | |
317 When we say that a read has multiple alignments, we mean that it has | |
318 multiple alignments that are valid and distinct from one another. | |
319 | |
320 Distinct alignments map a read to different places | |
321 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
322 | |
323 Two alignments for the same individual read are "distinct" if they map | |
324 the same read to different places. Specifically, we say that two | |
325 alignments are distinct if there are no alignment positions where a | |
326 particular read offset is aligned opposite a particular reference offset | |
327 in both alignments with the same orientation. E.g. if the first | |
328 alignment is in the forward orientation and aligns the read character at | |
329 read offset 10 to the reference character at chromosome 3, offset | |
330 3,445,245, and the second alignment is also in the forward orientation | |
331 and also aligns the read character at read offset 10 to the reference | |
332 character at chromosome 3, offset 3,445,245, they are not distinct | |
333 alignments. | |
334 | |
335 Two alignments for the same pair are distinct if either the mate 1s in | |
336 the two paired-end alignments are distinct or the mate 2s in the two | |
337 alignments are distinct or both. | |
338 | |
339 Default mode: search for one or more alignments, report each | |
340 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
341 | |
342 HISAT searches for up to N distinct, primary alignments for each read, | |
343 where N equals the integer specified with the ``-k`` parameter. Primary | |
344 alignments mean alignments whose alignment score is equal or higher than | |
345 any other alignments. It is possible that multiple distinct alignments | |
346 whave the same score. That is, if ``-k 2`` is specified, HISAT will | |
347 search for at most 2 distinct alignments. The alignment score for a | |
348 paired-end alignment equals the sum of the alignment scores of the | |
349 individual mates. Each reported read or pair alignment beyond the first | |
350 has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. | |
351 See the `SAM specification <http://samtools.sourceforge.net/SAM1.pdf>`__ | |
352 for details. | |
353 | |
354 HISAT does not "find" alignments in any specific order, so for reads | |
355 that have more than N distinct, valid alignments, HISAT does not | |
356 gaurantee that the N alignments reported are the best possible in terms | |
357 of alignment score. Still, this mode can be effective and fast in | |
358 situations where the user cares more about whether a read aligns (or | |
359 aligns a certain number of times) than where exactly it originated. | |
360 | |
361 Alignment summmary | |
362 ------------------ | |
363 | |
364 When HISAT finishes running, it prints messages summarizing what | |
365 happened. These messages are printed to the "standard error" ("stderr") | |
366 filehandle. For datasets consisting of unpaired reads, the summary might | |
367 look like this: | |
368 | |
369 :: | |
370 | |
371 20000 reads; of these: | |
372 20000 (100.00%) were unpaired; of these: | |
373 1247 (6.24%) aligned 0 times | |
374 18739 (93.69%) aligned exactly 1 time | |
375 14 (0.07%) aligned >1 times | |
376 93.77% overall alignment rate | |
377 | |
378 For datasets consisting of pairs, the summary might look like this: | |
379 | |
380 :: | |
381 | |
382 10000 reads; of these: | |
383 10000 (100.00%) were paired; of these: | |
384 650 (6.50%) aligned concordantly 0 times | |
385 8823 (88.23%) aligned concordantly exactly 1 time | |
386 527 (5.27%) aligned concordantly >1 times | |
387 ---- | |
388 650 pairs aligned concordantly 0 times; of these: | |
389 34 (5.23%) aligned discordantly 1 time | |
390 ---- | |
391 616 pairs aligned 0 times concordantly or discordantly; of these: | |
392 1232 mates make up the pairs; of these: | |
393 660 (53.57%) aligned 0 times | |
394 571 (46.35%) aligned exactly 1 time | |
395 1 (0.08%) aligned >1 times | |
396 96.70% overall alignment rate | |
397 | |
398 The indentation indicates how subtotals relate to totals. | |
399 ]]> | |
400 </help> | |
401 <citations> | |
402 <citation type="doi">10.1038/nmeth.3317</citation> | |
403 </citations> | |
404 </tool> |