Mercurial > repos > iuc > hisat2
comparison hisat2.xml @ 22:a26ed87f444c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hisat2 commit ac79103bf57c195226877a698dc197a965f82aba
author | iuc |
---|---|
date | Tue, 24 Jul 2018 09:29:27 -0400 |
parents | 0b1c04a90182 |
children | 6daca6da3059 |
comparison
equal
deleted
inserted
replaced
21:0b1c04a90182 | 22:a26ed87f444c |
---|---|
1 <tool id="hisat2" name="HISAT2" version="2.1.0+galaxy1" profile="17.01"> | 1 <tool id="hisat2" name="HISAT2" version="2.1.0+galaxy2" profile="17.01"> |
2 <description>A fast and sensitive alignment program</description> | 2 <description>A fast and sensitive alignment program</description> |
3 <macros> | 3 <macros> |
4 <import>hisat2_macros.xml</import> | 4 <import>hisat2_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
7 <requirement type="package" version="2.1.0">hisat2</requirement> | 7 <requirement type="package" version="2.1.0">hisat2</requirement> |
8 <requirement type="package" version="1.8">samtools</requirement> | 8 <requirement type="package" version="1.9">samtools</requirement> |
9 <requirement type="package" version="1.3">seqtk</requirement> | |
9 </requirements> | 10 </requirements> |
10 <stdio> | 11 <stdio> |
11 <regex level="fatal" match="hisat2-align exited with value 1" source="both" /> | 12 <regex level="fatal" match="hisat2-align exited with value 1" source="both" /> |
12 <regex level="fatal" match="hisat2: not found" source="both" /> | 13 <regex level="fatal" match="hisat2: not found" source="both" /> |
13 <exit_code range="1:" /> | 14 <exit_code range="1:" /> |
40 | 41 |
41 #if str($library.type) == 'paired': | 42 #if str($library.type) == 'paired': |
42 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): | 43 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): |
43 #set read1 = "input_f.fastq.gz" | 44 #set read1 = "input_f.fastq.gz" |
44 #set compressed = "GZ" | 45 #set compressed = "GZ" |
45 #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | 46 #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
46 #set read1 = "input_f.fastq.bz2" | 47 #set read1 = "input_f.fastq.bz2" |
47 #set compressed = "BZ2" | 48 #set compressed = "BZ2" |
48 #else if $library.input_1.is_of_type('fasta'): | 49 #elif $library.input_1.is_of_type('fasta'): |
49 #set reads_are_fastq = False | 50 #set reads_are_fastq = False |
50 #set read1 = "input_f.fasta" | 51 #set read1 = "input_f.fasta" |
51 #else: | 52 #else: |
52 #set read1 = "input_f.fastq" | 53 #set read1 = "input_f.fastq" |
53 #end if | 54 #end if |
54 ln -f -s '${library.input_1}' ${read1} && | 55 ln -f -s '${library.input_1}' ${read1} && |
55 | 56 |
56 #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"): | 57 #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"): |
57 #set read2 = "input_r.fastq.gz" | 58 #set read2 = "input_r.fastq.gz" |
58 #set compressed = "GZ" | 59 #set compressed = "GZ" |
59 #else if $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"): | 60 #elif $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
60 #set read2 = "input_r.fastq.bz2" | 61 #set read2 = "input_r.fastq.bz2" |
61 #set compressed = "BZ2" | 62 #set compressed = "BZ2" |
62 #else if $library.input_2.is_of_type('fasta'): | 63 #elif $library.input_2.is_of_type('fasta'): |
63 #set read2 = "input_r.fasta" | 64 #set read2 = "input_r.fasta" |
64 #else: | 65 #else: |
65 #set read2 = "input_r.fastq" | 66 #set read2 = "input_r.fastq" |
66 #end if | 67 #end if |
67 ln -f -s '${library.input_2}' ${read2} && | 68 ln -f -s '${library.input_2}' ${read2} && |
68 | 69 |
69 #else if str($library.type) == 'paired_collection': | 70 #elif str($library.type) == 'paired_collection': |
70 #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): | 71 #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): |
71 #set read1 = "input_f.fastq.gz" | 72 #set read1 = "input_f.fastq.gz" |
72 #set compressed = "GZ" | 73 #set compressed = "GZ" |
73 #else if $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): | 74 #elif $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
74 #set read1 = "input_f.fastq.bz2" | 75 #set read1 = "input_f.fastq.bz2" |
75 #set compressed = "BZ2" | 76 #set compressed = "BZ2" |
76 #else if $library.input_1.forward.is_of_type('fasta'): | 77 #elif $library.input_1.forward.is_of_type('fasta'): |
77 #set reads_are_fastq = False | 78 #set reads_are_fastq = False |
78 #set read1 = "input_f.fasta" | 79 #set read1 = "input_f.fasta" |
79 #else: | 80 #else: |
80 #set read1 = "input_f.fastq" | 81 #set read1 = "input_f.fastq" |
81 #end if | 82 #end if |
82 ln -s '${library.input_1.forward}' ${read1} && | 83 ln -s '${library.input_1.forward}' ${read1} && |
83 | 84 |
84 #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"): | 85 #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"): |
85 #set read2 = "input_r.fastq.gz" | 86 #set read2 = "input_r.fastq.gz" |
86 #set compressed = "GZ" | 87 #set compressed = "GZ" |
87 #else if $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"): | 88 #elif $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
88 #set read2 = "input_r.fastq.bz2" | 89 #set read2 = "input_r.fastq.bz2" |
89 #set compressed = "BZ2" | 90 #set compressed = "BZ2" |
90 #else if $library.input_1.reverse.is_of_type("fasta"): | 91 #elif $library.input_1.reverse.is_of_type("fasta"): |
91 #set read2 = "input_r.fasta" | 92 #set read2 = "input_r.fasta" |
92 #else: | 93 #else: |
93 #set read2 = "input_r.fastq" | 94 #set read2 = "input_r.fastq" |
94 #end if | 95 #end if |
95 ln -s '${library.input_1.reverse}' ${read2} && | 96 ln -s '${library.input_1.reverse}' ${read2} && |
96 | 97 #elif str( $library.type ) == "paired_interleaved": |
98 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): | |
99 #set interleaved_reads = "input_f.fastq.gz" | |
100 #set compressed = "GZ" | |
101 #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
102 #set interleaved_reads = "input_f.fastq.bz2" | |
103 #set compressed = "BZ2" | |
104 #elif $library.input_1.is_of_type('fasta'): | |
105 #set reads_are_fastq = False | |
106 #set interleaved_reads = "input_f.fasta" | |
107 #else: | |
108 #set interleaved_reads = "input_f.fastq" | |
109 #end if | |
110 ln -f -s '${library.input_1}' ${interleaved_reads} && | |
111 #if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | |
112 #set read1 = "<(bzcat input_f.fastq.bz2 | seqtk seq -1 /dev/stdin)" | |
113 #set read2 = "<(bzcat input_f.fastq.bz2 | seqtk seq -2 /dev/stdin)" | |
114 #else: | |
115 #set read1 = "<(seqtk seq -1 %s)" % $interleaved_reads | |
116 #set read2 = "<(seqtk seq -2 %s)" % $interleaved_reads | |
117 #end if | |
97 #else: | 118 #else: |
98 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): | 119 #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): |
99 #set read1 = "input_f.fastq.gz" | 120 #set read1 = "input_f.fastq.gz" |
100 #set compressed = "GZ" | 121 #set compressed = "GZ" |
101 #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): | 122 #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): |
102 #set read1 = "input_f.fastq.bz2" | 123 #set read1 = "input_f.fastq.bz2" |
103 #set compressed = "BZ2" | 124 #set compressed = "BZ2" |
104 #else if $library.input_1.is_of_type('fasta'): | 125 #elif $library.input_1.is_of_type('fasta'): |
105 #set reads_are_fastq = False | 126 #set reads_are_fastq = False |
106 #set read1 = "input_f.fasta" | 127 #set read1 = "input_f.fasta" |
107 #else: | 128 #else: |
108 #set read1 = "input_f.fastq" | 129 #set read1 = "input_f.fastq" |
109 #end if | 130 #end if |
134 | 155 |
135 #if str($adv.output_options.output_options_selector) == "advanced": | 156 #if str($adv.output_options.output_options_selector) == "advanced": |
136 #if str( $adv.output_options.unaligned_file ) == "true": | 157 #if str( $adv.output_options.unaligned_file ) == "true": |
137 #if $compressed == "GZ": | 158 #if $compressed == "GZ": |
138 --un-gz '$output_unaligned_reads_l' | 159 --un-gz '$output_unaligned_reads_l' |
139 #else if $compressed == "BZ2": | 160 #elif $compressed == "BZ2": |
140 --un-bz2 '$output_unaligned_reads_l' | 161 --un-bz2 '$output_unaligned_reads_l' |
141 #else: | 162 #else: |
142 --un '$output_unaligned_reads_l' | 163 --un '$output_unaligned_reads_l' |
143 #end if | 164 #end if |
144 #end if | 165 #end if |
145 | 166 |
146 #if str( $adv.output_options.aligned_file ) == "true": | 167 #if str( $adv.output_options.aligned_file ) == "true": |
147 #if $compressed == "GZ": | 168 #if $compressed == "GZ": |
148 --al-gz '$output_aligned_reads_l' | 169 --al-gz '$output_aligned_reads_l' |
149 #else if $compressed == "BZ2": | 170 #elif $compressed == "BZ2": |
150 --al-bz2 '$output_aligned_reads_l' | 171 --al-bz2 '$output_aligned_reads_l' |
151 #else: | 172 #else: |
152 --al '$output_aligned_reads_l' | 173 --al '$output_aligned_reads_l' |
153 #end if | 174 #end if |
154 #end if | 175 #end if |
155 #end if | 176 #end if |
156 | 177 |
157 #else: | 178 #else: |
158 | 179 ##quotes are embedded in r1 and r2 variables, needed to allow use of <() |
159 -1 '${read1}' | 180 #if str( $library.type ) == "paired_interleaved": |
160 -2 '${read2}' | 181 -1 ${read1} |
161 | 182 -2 ${read2} |
183 #else: | |
184 -1 '${read1}' | |
185 -2 '${read2}' | |
186 #end if | |
162 #if str($adv.output_options.output_options_selector) == "advanced": | 187 #if str($adv.output_options.output_options_selector) == "advanced": |
163 #if str( $adv.output_options.unaligned_file ) == "true": | 188 #if str( $adv.output_options.unaligned_file ) == "true": |
164 #if $compressed == "GZ": | 189 #if $compressed == "GZ": |
165 --un-conc-gz '${output_unaligned_reads_l}' | 190 --un-conc-gz '${output_unaligned_reads_l}' |
166 #else if $compressed == "BZ2": | 191 #elif $compressed == "BZ2": |
167 --un-conc-bz2 '${output_unaligned_reads_l}' | 192 --un-conc-bz2 '${output_unaligned_reads_l}' |
168 #else: | 193 #else: |
169 --un-conc '${output_unaligned_reads_l}' | 194 --un-conc '${output_unaligned_reads_l}' |
170 #end if | 195 #end if |
171 #end if | 196 #end if |
172 | 197 |
173 #if str( $adv.output_options.aligned_file ) == "true": | 198 #if str( $adv.output_options.aligned_file ) == "true": |
174 #if $compressed == "GZ": | 199 #if $compressed == "GZ": |
175 --al-conc-gz '${output_aligned_reads_l}' | 200 --al-conc-gz '${output_aligned_reads_l}' |
176 #else if $compressed == "BZ2": | 201 #elif $compressed == "BZ2": |
177 --al-conc-bz2 '${output_aligned_reads_l}' | 202 --al-conc-bz2 '${output_aligned_reads_l}' |
178 #else: | 203 #else: |
179 --al-conc '${output_aligned_reads_l}' | 204 --al-conc '${output_aligned_reads_l}' |
180 #end if | 205 #end if |
181 #end if | 206 #end if |
290 #end if | 315 #end if |
291 | 316 |
292 ## Convert SAM output to sorted BAM | 317 ## Convert SAM output to sorted BAM |
293 ## using the two pipe stages has the following effect | 318 ## using the two pipe stages has the following effect |
294 ## - hisat2 and sort run in parallel, during this time sort produces | 319 ## - hisat2 and sort run in parallel, during this time sort produces |
295 ## presorted temporary files but does not produce output (hence | 320 ## presorted temporary files but does not produce output (hence |
296 ## view does not run) | 321 ## view does not run) |
297 ## - once hisat is finished sort will start to merge the temporary | 322 ## - once hisat is finished sort will start to merge the temporary |
298 ## files (which should be fast also on a single thread) gives the | 323 ## files (which should be fast also on a single thread) gives the |
299 ## sorted output to view which only compresses the files (now | 324 ## sorted output to view which only compresses the files (now |
300 ## using full parallelism again) | 325 ## using full parallelism again) |
301 | 326 |
302 | samtools sort -l 0 -O bam | samtools view -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}' | 327 | samtools sort -l 0 -O bam | samtools view -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}' |
303 | 328 |
304 ## Rename any output fastq files | 329 ## Rename any output fastq files |
340 </when> | 365 </when> |
341 </conditional> | 366 </conditional> |
342 | 367 |
343 <!-- Reads --> | 368 <!-- Reads --> |
344 <conditional name="library"> | 369 <conditional name="library"> |
345 <param name="type" type="select" label="Single-end or paired-end reads?"> | 370 <param name="type" type="select" label="Is this a single or paired library"> |
346 <option value="single">Single-end</option> | 371 <option value="single">Single-end</option> |
347 <option value="paired">Paired-end</option> | 372 <option value="paired">Paired-end</option> |
348 <option value="paired_collection">Paired-end Collection</option> | 373 <option value="paired_collection">Paired-end Dataset Collection</option> |
374 <option value="paired_interleaved">Paired-end data from single interleaved dataset</option> | |
349 </param> | 375 </param> |
350 | 376 |
351 <when value="single"> | 377 <when value="single"> |
352 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta"" /> | 378 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta"" /> |
353 <param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information" | 379 <param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information" |
364 <expand macro="paired_end_options" /> | 390 <expand macro="paired_end_options" /> |
365 </when> | 391 </when> |
366 | 392 |
367 <when value="paired_collection"> | 393 <when value="paired_collection"> |
368 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype "fastqsanger" or "fasta"" /> | 394 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype "fastqsanger" or "fasta"" /> |
395 <expand macro="paired_end_options" /> | |
396 </when> | |
397 <when value="paired_interleaved"> | |
398 <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta". --interleaved"/> | |
369 <expand macro="paired_end_options" /> | 399 <expand macro="paired_end_options" /> |
370 </when> | 400 </when> |
371 </conditional> | 401 </conditional> |
372 | 402 |
373 <!-- Summary Options --> | 403 <!-- Summary Options --> |
707 <param name="rna_strandness" value="R" /> | 737 <param name="rna_strandness" value="R" /> |
708 <param name="new_summary" value="true" /> | 738 <param name="new_summary" value="true" /> |
709 <param name="summary_file" value="true" /> | 739 <param name="summary_file" value="true" /> |
710 <output name="summary_file" file="hisat_output.summary" ftype="txt" /> | 740 <output name="summary_file" file="hisat_output.summary" ftype="txt" /> |
711 </test> | 741 </test> |
742 <!-- Ensure interleaved input works --> | |
743 <test expect_num_outputs="1" > | |
744 <param name="type" value="paired_interleaved" /> | |
745 <param name="source" value="history" /> | |
746 <param name="history_item" ftype="fasta" value="phiX.fa" /> | |
747 <param name="input_1" ftype="fastqsanger" value="hisat_input_1_interleaved.fastq" /> | |
748 <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> | |
749 </test> | |
750 <!-- Ensure interleaved bz input works --> | |
751 <test expect_num_outputs="1" > | |
752 <param name="type" value="paired_interleaved" /> | |
753 <param name="source" value="history" /> | |
754 <param name="history_item" ftype="fasta" value="phiX.fa" /> | |
755 <param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" /> | |
756 <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> | |
757 </test> | |
758 <!-- Ensure interleaved gz input works --> | |
759 <test expect_num_outputs="1" > | |
760 <param name="type" value="paired_interleaved" /> | |
761 <param name="source" value="history" /> | |
762 <param name="history_item" ftype="fasta" value="phiX.fa" /> | |
763 <param name="input_1" ftype="fastqsanger.gz" value="hisat_input_1_interleaved.fastq.gz" /> | |
764 <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> | |
765 </test> | |
766 <!-- Ensure interleaved fasta input works --> | |
767 <test expect_num_outputs="1" > | |
768 <param name="type" value="paired_interleaved" /> | |
769 <param name="source" value="history" /> | |
770 <param name="history_item" ftype="fasta" value="phiX.fa" /> | |
771 <param name="input_1" ftype="fasta" value="hisat_input_1_interleaved.fasta" /> | |
772 <output name="output_alignments" file="hisat_output_1_noqual.bam" ftype="bam" lines_diff="2" /> | |
773 </test> | |
712 </tests> | 774 </tests> |
713 | 775 |
714 <help><![CDATA[ | 776 <help><![CDATA[ |
715 Introduction | 777 Introduction |
716 ============ | 778 ============ |
1075 --seed <int> | 1137 --seed <int> |
1076 Use `<int>` as the seed for pseudo-random number generator. Default: 0. | 1138 Use `<int>` as the seed for pseudo-random number generator. Default: 0. |
1077 | 1139 |
1078 --non-deterministic | 1140 --non-deterministic |
1079 Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When `--non-deterministic` is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads. | 1141 Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When `--non-deterministic` is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads. |
1080 | |
1081 ]]></help> | 1142 ]]></help> |
1082 <citations> | 1143 <citations> |
1083 <citation type="doi">10.1038/nmeth.3317</citation> | 1144 <citation type="doi">10.1038/nmeth.3317</citation> |
1084 </citations> | 1145 </citations> |
1085 </tool> | 1146 </tool> |