changeset 22:a26ed87f444c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hisat2 commit ac79103bf57c195226877a698dc197a965f82aba
author iuc
date Tue, 24 Jul 2018 09:29:27 -0400
parents 0b1c04a90182
children 6daca6da3059
files hisat2.xml test-data/hisat_input_1_interleaved.fasta test-data/hisat_input_1_interleaved.fastq test-data/hisat_input_1_interleaved.fastq.bz2 test-data/hisat_input_1_interleaved.fastq.gz test-data/hisat_output_1_noqual.bam
diffstat 6 files changed, 210 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/hisat2.xml	Sat Jul 14 09:06:59 2018 -0400
+++ b/hisat2.xml	Tue Jul 24 09:29:27 2018 -0400
@@ -1,11 +1,12 @@
-<tool id="hisat2" name="HISAT2" version="2.1.0+galaxy1" profile="17.01">
+<tool id="hisat2" name="HISAT2" version="2.1.0+galaxy2" profile="17.01">
     <description>A fast and sensitive alignment program</description>
     <macros>
         <import>hisat2_macros.xml</import>
     </macros>
     <requirements>
         <requirement type="package" version="2.1.0">hisat2</requirement>
-        <requirement type="package" version="1.8">samtools</requirement>
+        <requirement type="package" version="1.9">samtools</requirement>
+        <requirement type="package" version="1.3">seqtk</requirement>
     </requirements>
     <stdio>
         <regex level="fatal" match="hisat2-align exited with value 1" source="both" />
@@ -42,10 +43,10 @@
     #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
         #set read1 = "input_f.fastq.gz"
         #set compressed = "GZ"
-    #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
         #set read1 = "input_f.fastq.bz2"
         #set compressed = "BZ2"
-    #else if $library.input_1.is_of_type('fasta'):
+    #elif $library.input_1.is_of_type('fasta'):
         #set reads_are_fastq = False
         #set read1 = "input_f.fasta"
     #else:
@@ -56,24 +57,24 @@
     #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"):
         #set read2 = "input_r.fastq.gz"
         #set compressed = "GZ"
-    #else if $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+    #elif $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
         #set read2 = "input_r.fastq.bz2"
         #set compressed = "BZ2"
-    #else if $library.input_2.is_of_type('fasta'):
+    #elif $library.input_2.is_of_type('fasta'):
         #set read2 = "input_r.fasta"
     #else:
         #set read2 = "input_r.fastq"
     #end if
     ln -f -s '${library.input_2}' ${read2} &&
 
-#else if str($library.type) == 'paired_collection':
+#elif str($library.type) == 'paired_collection':
     #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
         #set read1 = "input_f.fastq.gz"
         #set compressed = "GZ"
-    #else if $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+    #elif $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
         #set read1 = "input_f.fastq.bz2"
         #set compressed = "BZ2"
-    #else if $library.input_1.forward.is_of_type('fasta'):
+    #elif $library.input_1.forward.is_of_type('fasta'):
         #set reads_are_fastq = False
         #set read1 = "input_f.fasta"
     #else:
@@ -84,24 +85,44 @@
     #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
         #set read2 = "input_r.fastq.gz"
         #set compressed = "GZ"
-    #else if $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+    #elif $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
         #set read2 = "input_r.fastq.bz2"
         #set compressed = "BZ2"
-    #else if $library.input_1.reverse.is_of_type("fasta"):
+    #elif $library.input_1.reverse.is_of_type("fasta"):
         #set read2 = "input_r.fasta"
     #else:
         #set read2 = "input_r.fastq"
     #end if
     ln -s '${library.input_1.reverse}' ${read2} &&
-
+#elif str( $library.type ) == "paired_interleaved":
+    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
+        #set interleaved_reads = "input_f.fastq.gz"
+        #set compressed = "GZ"
+    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+        #set interleaved_reads = "input_f.fastq.bz2"
+        #set compressed = "BZ2"
+    #elif $library.input_1.is_of_type('fasta'):
+        #set reads_are_fastq = False
+        #set interleaved_reads = "input_f.fasta"
+    #else:
+        #set interleaved_reads = "input_f.fastq"
+    #end if
+    ln -f -s '${library.input_1}' ${interleaved_reads} &&
+    #if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+        #set read1 = "<(bzcat input_f.fastq.bz2 | seqtk seq -1 /dev/stdin)"
+        #set read2 = "<(bzcat input_f.fastq.bz2 | seqtk seq -2 /dev/stdin)"
+    #else:
+        #set read1 = "<(seqtk seq -1 %s)" % $interleaved_reads
+        #set read2 = "<(seqtk seq -2 %s)" % $interleaved_reads
+    #end if
 #else:
     #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
         #set read1 = "input_f.fastq.gz"
         #set compressed = "GZ"
-    #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
+    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
         #set read1 = "input_f.fastq.bz2"
         #set compressed = "BZ2"
-    #else if $library.input_1.is_of_type('fasta'):
+    #elif $library.input_1.is_of_type('fasta'):
         #set reads_are_fastq = False
         #set read1 = "input_f.fasta"
     #else:
@@ -136,7 +157,7 @@
         #if str( $adv.output_options.unaligned_file ) == "true":
             #if $compressed == "GZ":
                 --un-gz '$output_unaligned_reads_l'
-            #else if $compressed == "BZ2":
+            #elif $compressed == "BZ2":
                 --un-bz2 '$output_unaligned_reads_l'
             #else:
                 --un '$output_unaligned_reads_l'
@@ -146,7 +167,7 @@
         #if str( $adv.output_options.aligned_file ) == "true":
             #if $compressed == "GZ":
                 --al-gz '$output_aligned_reads_l'
-            #else if $compressed == "BZ2":
+            #elif $compressed == "BZ2":
                 --al-bz2 '$output_aligned_reads_l'
             #else:
                 --al '$output_aligned_reads_l'
@@ -155,15 +176,19 @@
     #end if
 
 #else:
-
-    -1 '${read1}'
-    -2 '${read2}'
-
+    ##quotes are embedded in r1 and r2 variables, needed to allow use of <()
+    #if str( $library.type ) == "paired_interleaved":
+      -1 ${read1}
+      -2 ${read2}
+    #else:
+      -1 '${read1}'
+      -2 '${read2}'
+    #end if
     #if str($adv.output_options.output_options_selector) == "advanced":
         #if str( $adv.output_options.unaligned_file ) == "true":
             #if $compressed == "GZ":
                 --un-conc-gz '${output_unaligned_reads_l}'
-            #else if $compressed == "BZ2":
+            #elif $compressed == "BZ2":
                 --un-conc-bz2 '${output_unaligned_reads_l}'
             #else:
                 --un-conc '${output_unaligned_reads_l}'
@@ -173,7 +198,7 @@
         #if str( $adv.output_options.aligned_file ) == "true":
             #if $compressed == "GZ":
                 --al-conc-gz '${output_aligned_reads_l}'
-            #else if $compressed == "BZ2":
+            #elif $compressed == "BZ2":
                 --al-conc-bz2 '${output_aligned_reads_l}'
             #else:
                 --al-conc '${output_aligned_reads_l}'
@@ -292,11 +317,11 @@
 ## Convert SAM output to sorted BAM
 ## using the two pipe stages has the following effect
 ## - hisat2 and sort run in parallel, during this time sort produces
-##   presorted temporary files but does not produce output (hence 
+##   presorted temporary files but does not produce output (hence
 ##   view does not run)
-## - once hisat is finished sort will start to merge the temporary 
-##   files (which should be fast also on a single thread) gives the 
-##   sorted output to view which only compresses the files (now 
+## - once hisat is finished sort will start to merge the temporary
+##   files (which should be fast also on a single thread) gives the
+##   sorted output to view which only compresses the files (now
 ##   using full parallelism again)
 
 | samtools sort -l 0 -O bam | samtools view -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}'
@@ -342,10 +367,11 @@
 
         <!-- Reads -->
             <conditional name="library">
-                <param name="type" type="select" label="Single-end or paired-end reads?">
+                <param name="type" type="select" label="Is this a single or paired library">
                     <option value="single">Single-end</option>
                     <option value="paired">Paired-end</option>
-                    <option value="paired_collection">Paired-end Collection</option>
+                    <option value="paired_collection">Paired-end Dataset Collection</option>
+                    <option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
                 </param>
 
                 <when value="single">
@@ -368,6 +394,10 @@
                     <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
                     <expand macro="paired_end_options" />
                 </when>
+                <when value="paired_interleaved">
+                    <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTA/Q file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
+                    <expand macro="paired_end_options" />
+                </when>
             </conditional>
 
         <!-- Summary Options -->
@@ -709,6 +739,38 @@
             <param name="summary_file" value="true" />
             <output name="summary_file" file="hisat_output.summary" ftype="txt" />
         </test>
+        <!-- Ensure interleaved input works -->
+        <test expect_num_outputs="1" >
+            <param name="type" value="paired_interleaved" />
+            <param name="source" value="history" />
+            <param name="history_item" ftype="fasta" value="phiX.fa" />
+            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_interleaved.fastq" />
+            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
+        </test>
+        <!-- Ensure interleaved bz input works -->
+        <test expect_num_outputs="1" >
+            <param name="type" value="paired_interleaved" />
+            <param name="source" value="history" />
+            <param name="history_item" ftype="fasta" value="phiX.fa" />
+            <param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" />
+            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
+        </test>
+        <!-- Ensure interleaved gz input works -->
+        <test expect_num_outputs="1" >
+            <param name="type" value="paired_interleaved" />
+            <param name="source" value="history" />
+            <param name="history_item" ftype="fasta" value="phiX.fa" />
+            <param name="input_1" ftype="fastqsanger.gz" value="hisat_input_1_interleaved.fastq.gz" />
+            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
+        </test>
+        <!-- Ensure interleaved fasta input works -->
+        <test expect_num_outputs="1" >
+            <param name="type" value="paired_interleaved" />
+            <param name="source" value="history" />
+            <param name="history_item" ftype="fasta" value="phiX.fa" />
+            <param name="input_1" ftype="fasta" value="hisat_input_1_interleaved.fasta" />
+            <output name="output_alignments" file="hisat_output_1_noqual.bam" ftype="bam" lines_diff="2" />
+        </test>
     </tests>
 
     <help><![CDATA[
@@ -1077,7 +1139,6 @@
 
     --non-deterministic
             Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When `--non-deterministic` is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads.
-
     ]]></help>
     <citations>
         <citation type="doi">10.1038/nmeth.3317</citation>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hisat_input_1_interleaved.fasta	Tue Jul 24 09:29:27 2018 -0400
@@ -0,0 +1,40 @@
+>phiX174_1980_2501_0:1:0_3:0:0_0/1
+TTAGGTGTGTGTAAAACAGGTGCCGAAGAAGCTGGATTAACAGAATTGAGAACCAGCTTATCAGAAAAAA
+>phiX174_1980_2501_0:1:0_3:0:0_0/2
+GTGAAATTTCTAGGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCA
+>phiX174_1542_1965_0:0:0_0:0:0_1/1
+CTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA
+>phiX174_1542_1965_0:0:0_0:0:0_1/2
+CCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCT
+>phiX174_2950_3377_0:0:0_2:0:0_2/1
+CTCAAATCCGGCGTCAACCATACCAGCATAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAATCTCA
+>phiX174_2950_3377_0:0:0_2:0:0_2/2
+GCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTC
+>phiX174_2259_2739_1:0:0_1:0:0_3/1
+CTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAAAT
+>phiX174_2259_2739_1:0:0_1:0:0_3/2
+GCGACCATTCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGACGAAGACTCAAAGCGA
+>phiX174_1141_1609_1:0:0_1:0:0_4/1
+TGGCGCTCTCCGTCTTTCTCCATTTCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT
+>phiX174_1141_1609_1:0:0_1:0:0_4/2
+CAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCTAGAA
+>phiX174_185_708_0:0:0_1:0:0_5/1
+CCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCT
+>phiX174_185_708_0:0:0_1:0:0_5/2
+TGTTTTCCGTAAATTCAGCGCCTTCCATGATGCGACAGGCCGTTTGAATGTTGACGGGATGAACATAATA
+>phiX174_1363_1914_3:0:0_0:0:0_6/1
+GCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGC
+>phiX174_1363_1914_3:0:0_0:0:0_6/2
+TAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTCAAGCGCCGAGGATGCGTGACCGT
+>phiX174_3199_3732_0:0:0_1:0:0_7/1
+CTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAA
+>phiX174_3199_3732_0:0:0_1:0:0_7/2
+TCTGCGTTTGCTGATGAACTAAGTCAACCTCAGCACTAACCTTGCGAGTCATTTCATTGATTTGGTCATT
+>phiX174_36_572_1:0:0_0:0:0_8/1
+ACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTC
+>phiX174_36_572_1:0:0_0:0:0_8/2
+TTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGTAATTACTACTGCTTGTTTACGAAT
+>phiX174_2128_2577_0:0:0_4:0:0_9/1
+TTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAG
+>phiX174_2128_2577_0:0:0_4:0:0_9/2
+CTGAATGGAATTAAGAAAACCACCAATACCAGCATTAACCTTCAAACTATCAAAATATAACGTTGACGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hisat_input_1_interleaved.fastq	Tue Jul 24 09:29:27 2018 -0400
@@ -0,0 +1,80 @@
+@phiX174_1980_2501_0:1:0_3:0:0_0/1
+TTAGGTGTGTGTAAAACAGGTGCCGAAGAAGCTGGATTAACAGAATTGAGAACCAGCTTATCAGAAAAAA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1980_2501_0:1:0_3:0:0_0/2
+GTGAAATTTCTAGGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1542_1965_0:0:0_0:0:0_1/1
+CTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1542_1965_0:0:0_0:0:0_1/2
+CCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2950_3377_0:0:0_2:0:0_2/1
+CTCAAATCCGGCGTCAACCATACCAGCATAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAATCTCA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2950_3377_0:0:0_2:0:0_2/2
+GCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTC
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2259_2739_1:0:0_1:0:0_3/1
+CTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAAAT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2259_2739_1:0:0_1:0:0_3/2
+GCGACCATTCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGACGAAGACTCAAAGCGA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1141_1609_1:0:0_1:0:0_4/1
+TGGCGCTCTCCGTCTTTCTCCATTTCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1141_1609_1:0:0_1:0:0_4/2
+CAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCTAGAA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_185_708_0:0:0_1:0:0_5/1
+CCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_185_708_0:0:0_1:0:0_5/2
+TGTTTTCCGTAAATTCAGCGCCTTCCATGATGCGACAGGCCGTTTGAATGTTGACGGGATGAACATAATA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1363_1914_3:0:0_0:0:0_6/1
+GCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGC
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_1363_1914_3:0:0_0:0:0_6/2
+TAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTCAAGCGCCGAGGATGCGTGACCGT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_3199_3732_0:0:0_1:0:0_7/1
+CTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAA
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_3199_3732_0:0:0_1:0:0_7/2
+TCTGCGTTTGCTGATGAACTAAGTCAACCTCAGCACTAACCTTGCGAGTCATTTCATTGATTTGGTCATT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_36_572_1:0:0_0:0:0_8/1
+ACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTC
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_36_572_1:0:0_0:0:0_8/2
+TTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGTAATTACTACTGCTTGTTTACGAAT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2128_2577_0:0:0_4:0:0_9/1
+TTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAG
++
+2222222222222222222222222222222222222222222222222222222222222222222222
+@phiX174_2128_2577_0:0:0_4:0:0_9/2
+CTGAATGGAATTAAGAAAACCACCAATACCAGCATTAACCTTCAAACTATCAAAATATAACGTTGACGAT
++
+2222222222222222222222222222222222222222222222222222222222222222222222
Binary file test-data/hisat_input_1_interleaved.fastq.bz2 has changed
Binary file test-data/hisat_input_1_interleaved.fastq.gz has changed
Binary file test-data/hisat_output_1_noqual.bam has changed