Mercurial > repos > iuc > hisat2
changeset 22:a26ed87f444c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hisat2 commit ac79103bf57c195226877a698dc197a965f82aba
author | iuc |
---|---|
date | Tue, 24 Jul 2018 09:29:27 -0400 |
parents | 0b1c04a90182 |
children | 6daca6da3059 |
files | hisat2.xml test-data/hisat_input_1_interleaved.fasta test-data/hisat_input_1_interleaved.fastq test-data/hisat_input_1_interleaved.fastq.bz2 test-data/hisat_input_1_interleaved.fastq.gz test-data/hisat_output_1_noqual.bam |
diffstat | 6 files changed, 210 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/hisat2.xml Sat Jul 14 09:06:59 2018 -0400 +++ b/hisat2.xml Tue Jul 24 09:29:27 2018 -0400 @@ -1,11 +1,12 @@ -<tool id="hisat2" name="HISAT2" version="2.1.0+galaxy1" profile="17.01"> +<tool id="hisat2" name="HISAT2" version="2.1.0+galaxy2" profile="17.01"> <description>A fast and sensitive alignment program</description> <macros> <import>hisat2_macros.xml</import> </macros> <requirements> <requirement type="package" version="2.1.0">hisat2</requirement> - <requirement type="package" version="1.8">samtools</requirement> + <requirement type="package" version="1.9">samtools</requirement> + <requirement type="package" version="1.3">seqtk</requirement> </requirements> <stdio> <regex level="fatal" match="hisat2-align exited with value 1" source="both" /> @@ -42,10 +43,10 @@ #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): #set read1 = "input_f.fastq.gz" #set compressed = "GZ" - #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set read1 = "input_f.fastq.bz2" #set compressed = "BZ2" - #else if $library.input_1.is_of_type('fasta'): + #elif $library.input_1.is_of_type('fasta'): #set reads_are_fastq = False #set read1 = "input_f.fasta" #else: @@ -56,24 +57,24 @@ #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"): #set read2 = "input_r.fastq.gz" #set compressed = "GZ" - #else if $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #elif $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set read2 = "input_r.fastq.bz2" #set compressed = "BZ2" - #else if $library.input_2.is_of_type('fasta'): + #elif $library.input_2.is_of_type('fasta'): #set read2 = "input_r.fasta" #else: #set read2 = "input_r.fastq" #end if ln -f -s '${library.input_2}' ${read2} && -#else if str($library.type) == 'paired_collection': +#elif str($library.type) == 'paired_collection': #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"): #set read1 = "input_f.fastq.gz" #set compressed = "GZ" - #else if $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #elif $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set read1 = "input_f.fastq.bz2" #set compressed = "BZ2" - #else if $library.input_1.forward.is_of_type('fasta'): + #elif $library.input_1.forward.is_of_type('fasta'): #set reads_are_fastq = False #set read1 = "input_f.fasta" #else: @@ -84,24 +85,44 @@ #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"): #set read2 = "input_r.fastq.gz" #set compressed = "GZ" - #else if $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #elif $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set read2 = "input_r.fastq.bz2" #set compressed = "BZ2" - #else if $library.input_1.reverse.is_of_type("fasta"): + #elif $library.input_1.reverse.is_of_type("fasta"): #set read2 = "input_r.fasta" #else: #set read2 = "input_r.fastq" #end if ln -s '${library.input_1.reverse}' ${read2} && - +#elif str( $library.type ) == "paired_interleaved": + #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): + #set interleaved_reads = "input_f.fastq.gz" + #set compressed = "GZ" + #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #set interleaved_reads = "input_f.fastq.bz2" + #set compressed = "BZ2" + #elif $library.input_1.is_of_type('fasta'): + #set reads_are_fastq = False + #set interleaved_reads = "input_f.fasta" + #else: + #set interleaved_reads = "input_f.fastq" + #end if + ln -f -s '${library.input_1}' ${interleaved_reads} && + #if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #set read1 = "<(bzcat input_f.fastq.bz2 | seqtk seq -1 /dev/stdin)" + #set read2 = "<(bzcat input_f.fastq.bz2 | seqtk seq -2 /dev/stdin)" + #else: + #set read1 = "<(seqtk seq -1 %s)" % $interleaved_reads + #set read2 = "<(seqtk seq -2 %s)" % $interleaved_reads + #end if #else: #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"): #set read1 = "input_f.fastq.gz" #set compressed = "GZ" - #else if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): + #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"): #set read1 = "input_f.fastq.bz2" #set compressed = "BZ2" - #else if $library.input_1.is_of_type('fasta'): + #elif $library.input_1.is_of_type('fasta'): #set reads_are_fastq = False #set read1 = "input_f.fasta" #else: @@ -136,7 +157,7 @@ #if str( $adv.output_options.unaligned_file ) == "true": #if $compressed == "GZ": --un-gz '$output_unaligned_reads_l' - #else if $compressed == "BZ2": + #elif $compressed == "BZ2": --un-bz2 '$output_unaligned_reads_l' #else: --un '$output_unaligned_reads_l' @@ -146,7 +167,7 @@ #if str( $adv.output_options.aligned_file ) == "true": #if $compressed == "GZ": --al-gz '$output_aligned_reads_l' - #else if $compressed == "BZ2": + #elif $compressed == "BZ2": --al-bz2 '$output_aligned_reads_l' #else: --al '$output_aligned_reads_l' @@ -155,15 +176,19 @@ #end if #else: - - -1 '${read1}' - -2 '${read2}' - + ##quotes are embedded in r1 and r2 variables, needed to allow use of <() + #if str( $library.type ) == "paired_interleaved": + -1 ${read1} + -2 ${read2} + #else: + -1 '${read1}' + -2 '${read2}' + #end if #if str($adv.output_options.output_options_selector) == "advanced": #if str( $adv.output_options.unaligned_file ) == "true": #if $compressed == "GZ": --un-conc-gz '${output_unaligned_reads_l}' - #else if $compressed == "BZ2": + #elif $compressed == "BZ2": --un-conc-bz2 '${output_unaligned_reads_l}' #else: --un-conc '${output_unaligned_reads_l}' @@ -173,7 +198,7 @@ #if str( $adv.output_options.aligned_file ) == "true": #if $compressed == "GZ": --al-conc-gz '${output_aligned_reads_l}' - #else if $compressed == "BZ2": + #elif $compressed == "BZ2": --al-conc-bz2 '${output_aligned_reads_l}' #else: --al-conc '${output_aligned_reads_l}' @@ -292,11 +317,11 @@ ## Convert SAM output to sorted BAM ## using the two pipe stages has the following effect ## - hisat2 and sort run in parallel, during this time sort produces -## presorted temporary files but does not produce output (hence +## presorted temporary files but does not produce output (hence ## view does not run) -## - once hisat is finished sort will start to merge the temporary -## files (which should be fast also on a single thread) gives the -## sorted output to view which only compresses the files (now +## - once hisat is finished sort will start to merge the temporary +## files (which should be fast also on a single thread) gives the +## sorted output to view which only compresses the files (now ## using full parallelism again) | samtools sort -l 0 -O bam | samtools view -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}' @@ -342,10 +367,11 @@ <!-- Reads --> <conditional name="library"> - <param name="type" type="select" label="Single-end or paired-end reads?"> + <param name="type" type="select" label="Is this a single or paired library"> <option value="single">Single-end</option> <option value="paired">Paired-end</option> - <option value="paired_collection">Paired-end Collection</option> + <option value="paired_collection">Paired-end Dataset Collection</option> + <option value="paired_interleaved">Paired-end data from single interleaved dataset</option> </param> <when value="single"> @@ -368,6 +394,10 @@ <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype "fastqsanger" or "fasta"" /> <expand macro="paired_end_options" /> </when> + <when value="paired_interleaved"> + <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTA/Q file" help="Must be of datatype "fastqsanger" or "fasta". --interleaved"/> + <expand macro="paired_end_options" /> + </when> </conditional> <!-- Summary Options --> @@ -709,6 +739,38 @@ <param name="summary_file" value="true" /> <output name="summary_file" file="hisat_output.summary" ftype="txt" /> </test> + <!-- Ensure interleaved input works --> + <test expect_num_outputs="1" > + <param name="type" value="paired_interleaved" /> + <param name="source" value="history" /> + <param name="history_item" ftype="fasta" value="phiX.fa" /> + <param name="input_1" ftype="fastqsanger" value="hisat_input_1_interleaved.fastq" /> + <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> + </test> + <!-- Ensure interleaved bz input works --> + <test expect_num_outputs="1" > + <param name="type" value="paired_interleaved" /> + <param name="source" value="history" /> + <param name="history_item" ftype="fasta" value="phiX.fa" /> + <param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" /> + <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> + </test> + <!-- Ensure interleaved gz input works --> + <test expect_num_outputs="1" > + <param name="type" value="paired_interleaved" /> + <param name="source" value="history" /> + <param name="history_item" ftype="fasta" value="phiX.fa" /> + <param name="input_1" ftype="fastqsanger.gz" value="hisat_input_1_interleaved.fastq.gz" /> + <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" /> + </test> + <!-- Ensure interleaved fasta input works --> + <test expect_num_outputs="1" > + <param name="type" value="paired_interleaved" /> + <param name="source" value="history" /> + <param name="history_item" ftype="fasta" value="phiX.fa" /> + <param name="input_1" ftype="fasta" value="hisat_input_1_interleaved.fasta" /> + <output name="output_alignments" file="hisat_output_1_noqual.bam" ftype="bam" lines_diff="2" /> + </test> </tests> <help><![CDATA[ @@ -1077,7 +1139,6 @@ --non-deterministic Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When `--non-deterministic` is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads. - ]]></help> <citations> <citation type="doi">10.1038/nmeth.3317</citation>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hisat_input_1_interleaved.fasta Tue Jul 24 09:29:27 2018 -0400 @@ -0,0 +1,40 @@ +>phiX174_1980_2501_0:1:0_3:0:0_0/1 +TTAGGTGTGTGTAAAACAGGTGCCGAAGAAGCTGGATTAACAGAATTGAGAACCAGCTTATCAGAAAAAA +>phiX174_1980_2501_0:1:0_3:0:0_0/2 +GTGAAATTTCTAGGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCA +>phiX174_1542_1965_0:0:0_0:0:0_1/1 +CTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA +>phiX174_1542_1965_0:0:0_0:0:0_1/2 +CCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCT +>phiX174_2950_3377_0:0:0_2:0:0_2/1 +CTCAAATCCGGCGTCAACCATACCAGCATAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAATCTCA +>phiX174_2950_3377_0:0:0_2:0:0_2/2 +GCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTC +>phiX174_2259_2739_1:0:0_1:0:0_3/1 +CTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAAAT +>phiX174_2259_2739_1:0:0_1:0:0_3/2 +GCGACCATTCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGACGAAGACTCAAAGCGA +>phiX174_1141_1609_1:0:0_1:0:0_4/1 +TGGCGCTCTCCGTCTTTCTCCATTTCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT +>phiX174_1141_1609_1:0:0_1:0:0_4/2 +CAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCTAGAA +>phiX174_185_708_0:0:0_1:0:0_5/1 +CCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCT +>phiX174_185_708_0:0:0_1:0:0_5/2 +TGTTTTCCGTAAATTCAGCGCCTTCCATGATGCGACAGGCCGTTTGAATGTTGACGGGATGAACATAATA +>phiX174_1363_1914_3:0:0_0:0:0_6/1 +GCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGC +>phiX174_1363_1914_3:0:0_0:0:0_6/2 +TAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTCAAGCGCCGAGGATGCGTGACCGT +>phiX174_3199_3732_0:0:0_1:0:0_7/1 +CTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAA +>phiX174_3199_3732_0:0:0_1:0:0_7/2 +TCTGCGTTTGCTGATGAACTAAGTCAACCTCAGCACTAACCTTGCGAGTCATTTCATTGATTTGGTCATT +>phiX174_36_572_1:0:0_0:0:0_8/1 +ACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTC +>phiX174_36_572_1:0:0_0:0:0_8/2 +TTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGTAATTACTACTGCTTGTTTACGAAT +>phiX174_2128_2577_0:0:0_4:0:0_9/1 +TTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAG +>phiX174_2128_2577_0:0:0_4:0:0_9/2 +CTGAATGGAATTAAGAAAACCACCAATACCAGCATTAACCTTCAAACTATCAAAATATAACGTTGACGAT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hisat_input_1_interleaved.fastq Tue Jul 24 09:29:27 2018 -0400 @@ -0,0 +1,80 @@ +@phiX174_1980_2501_0:1:0_3:0:0_0/1 +TTAGGTGTGTGTAAAACAGGTGCCGAAGAAGCTGGATTAACAGAATTGAGAACCAGCTTATCAGAAAAAA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1980_2501_0:1:0_3:0:0_0/2 +GTGAAATTTCTAGGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1542_1965_0:0:0_0:0:0_1/1 +CTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1542_1965_0:0:0_0:0:0_1/2 +CCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2950_3377_0:0:0_2:0:0_2/1 +CTCAAATCCGGCGTCAACCATACCAGCATAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAATCTCA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2950_3377_0:0:0_2:0:0_2/2 +GCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTC ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2259_2739_1:0:0_1:0:0_3/1 +CTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAAAT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2259_2739_1:0:0_1:0:0_3/2 +GCGACCATTCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGACGAAGACTCAAAGCGA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1141_1609_1:0:0_1:0:0_4/1 +TGGCGCTCTCCGTCTTTCTCCATTTCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1141_1609_1:0:0_1:0:0_4/2 +CAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCTAGAA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_185_708_0:0:0_1:0:0_5/1 +CCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_185_708_0:0:0_1:0:0_5/2 +TGTTTTCCGTAAATTCAGCGCCTTCCATGATGCGACAGGCCGTTTGAATGTTGACGGGATGAACATAATA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1363_1914_3:0:0_0:0:0_6/1 +GCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGC ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_1363_1914_3:0:0_0:0:0_6/2 +TAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTCAAGCGCCGAGGATGCGTGACCGT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_3199_3732_0:0:0_1:0:0_7/1 +CTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAA ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_3199_3732_0:0:0_1:0:0_7/2 +TCTGCGTTTGCTGATGAACTAAGTCAACCTCAGCACTAACCTTGCGAGTCATTTCATTGATTTGGTCATT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_36_572_1:0:0_0:0:0_8/1 +ACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTC ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_36_572_1:0:0_0:0:0_8/2 +TTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGTAATTACTACTGCTTGTTTACGAAT ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2128_2577_0:0:0_4:0:0_9/1 +TTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAG ++ +2222222222222222222222222222222222222222222222222222222222222222222222 +@phiX174_2128_2577_0:0:0_4:0:0_9/2 +CTGAATGGAATTAAGAAAACCACCAATACCAGCATTAACCTTCAAACTATCAAAATATAACGTTGACGAT ++ +2222222222222222222222222222222222222222222222222222222222222222222222