Mercurial > repos > iuc > hisat2

<tool id="hisat2" name="HISAT2" version="2.1.0+galaxy5" profile="17.01">
    <description>A fast and sensitive alignment program</description>
    <macros>
        <import>hisat2_macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="2.1.0">hisat2</requirement>
        <requirement type="package" version="1.9">samtools</requirement>
        <requirement type="package" version="1.3">seqtk</requirement>
    </requirements>
    <stdio>
        <regex level="fatal" match="hisat2-align exited with value 1" source="both" />
        <regex level="fatal" match="hisat2: not found" source="both" />
        <exit_code range="1:" />
    </stdio>
    <version_command>hisat2 --version</version_command>
    <command><![CDATA[
## Prepare HISAT2 index

#if $reference_genome.source == "history":
    ln -s '$reference_genome.history_item' genome.fa &&
    hisat2-build -p \${GALAXY_SLOTS:-1} genome.fa genome &&
    #set index_path = 'genome'
#else:
    #set index_path = $reference_genome.index.fields.path
#end if


## If using known splice sites

#if str($adv.spliced_options.spliced_options_selector) == "advanced" and str($adv.spliced_options.known_splice_gtf) != 'None':
    ln -s '${adv.spliced_options.known_splice_gtf}' splice_sites.gtf &&
    hisat2_extract_splice_sites.py splice_sites.gtf > splice_sites.txt &&
#end if


## Link in the input files, so HISAT2 can tell their type

#set compressed="False"
#set reads_are_fastq = True

#if str($library.type) == 'paired':
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_1.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -f -s '${library.input_1}' ${read1} &&

    #if $library.input_2.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read2 = "input_r.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_2.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read2 = "input_r.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_2.is_of_type('fasta'):
        #set read2 = "input_r.fasta"
    #else:
        #set read2 = "input_r.fastq"
    #end if
    ln -f -s '${library.input_2}' ${read2} &&

#elif str($library.type) == 'paired_collection':
    #if $library.input_1.forward.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_1.forward.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_1.forward.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -s '${library.input_1.forward}' ${read1} &&

    #if $library.input_1.reverse.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read2 = "input_r.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_1.reverse.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read2 = "input_r.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_1.reverse.is_of_type("fasta"):
        #set read2 = "input_r.fasta"
    #else:
        #set read2 = "input_r.fastq"
    #end if
    ln -s '${library.input_1.reverse}' ${read2} &&
#elif str( $library.type ) == "paired_interleaved":
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set interleaved_reads = "input_f.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set interleaved_reads = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_1.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set interleaved_reads = "input_f.fasta"
    #else:
        #set interleaved_reads = "input_f.fastq"
    #end if
    ln -f -s '${library.input_1}' ${interleaved_reads} &&
    #if $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "<(bzcat input_f.fastq.bz2 | seqtk seq -1 /dev/stdin)"
        #set read2 = "<(bzcat input_f.fastq.bz2 | seqtk seq -2 /dev/stdin)"
    #else:
        #set read1 = "<(seqtk seq -1 %s)" % $interleaved_reads
        #set read2 = "<(seqtk seq -2 %s)" % $interleaved_reads
    #end if
#else:
    #if $library.input_1.is_of_type("fastq.gz", "fastqsanger.gz"):
        #set read1 = "input_f.fastq.gz"
        #set compressed = "GZ"
    #elif $library.input_1.is_of_type("fastq.bz2", "fastqsanger.bz2"):
        #set read1 = "input_f.fastq.bz2"
        #set compressed = "BZ2"
    #elif $library.input_1.is_of_type('fasta'):
        #set reads_are_fastq = False
        #set read1 = "input_f.fasta"
    #else:
        #set read1 = "input_f.fastq"
    #end if
    ln -f -s '${library.input_1}' ${read1} &&
#end if


## Run HISAT2

hisat2

## number threads
-p \${GALAXY_SLOTS:-1}

## ref genome index path
-x '${index_path}'

## input reads are fasta?
#if not $reads_are_fastq:
    -f
#end if

## Input reads

#if str( $library.type ) == "single":

    -U '${read1}'

    #if str($adv.output_options.output_options_selector) == "advanced":
        #if str( $adv.output_options.unaligned_file ) == "true":
            #if $compressed == "GZ":
                --un-gz '$output_unaligned_reads_l'
            #elif $compressed == "BZ2":
                --un-bz2 '$output_unaligned_reads_l'
            #else:
                --un '$output_unaligned_reads_l'
            #end if
        #end if

        #if str( $adv.output_options.aligned_file ) == "true":
            #if $compressed == "GZ":
                --al-gz '$output_aligned_reads_l'
            #elif $compressed == "BZ2":
                --al-bz2 '$output_aligned_reads_l'
            #else:
                --al '$output_aligned_reads_l'
            #end if
        #end if
    #end if

#else:
    ##quotes are embedded in r1 and r2 variables, needed to allow use of <()
    #if str( $library.type ) == "paired_interleaved":
      -1 ${read1}
      -2 ${read2}
    #else:
      -1 '${read1}'
      -2 '${read2}'
    #end if
    #if str($adv.output_options.output_options_selector) == "advanced":
        #if str( $adv.output_options.unaligned_file ) == "true":
            #if $compressed == "GZ":
                --un-conc-gz '${output_unaligned_reads_l}'
            #elif $compressed == "BZ2":
                --un-conc-bz2 '${output_unaligned_reads_l}'
            #else:
                --un-conc '${output_unaligned_reads_l}'
            #end if
        #end if

        #if str( $adv.output_options.aligned_file ) == "true":
            #if $compressed == "GZ":
                --al-conc-gz '${output_aligned_reads_l}'
            #elif $compressed == "BZ2":
                --al-conc-bz2 '${output_aligned_reads_l}'
            #else:
                --al-conc '${output_aligned_reads_l}'
            #end if
        #end if
    #end if

    #if str($library.paired_options.paired_options_selector) == "advanced":
        ${library.paired_options.fr_rf_ff}
        ${library.paired_options.no_mixed}
        ${library.paired_options.no_discordant}
    #end if

#end if


## Specify strandedness of reads

#if str($library.rna_strandness):
    --rna-strandness ${library.rna_strandness}
#end if


## Input options

#if str($adv.input_options.input_options_selector) == "advanced":
    #if int( $adv.input_options.skip ) > 0:
        --skip ${adv.input_options.skip}
    #end if
    #if int( $adv.input_options.qupto ) > 0:
        --qupto ${adv.input_options.qupto}
    #end if
    --trim5 '${adv.input_options.trim5}'
    --trim3 '${adv.input_options.trim3}'
    ${adv.input_options.qv_encoding}
    ${adv.input_options.solexa_quals}
    ${adv.input_options.int_quals}
#end if


## Alignment options

#if str($adv.alignment_options.alignment_options_selector) == "advanced":
    --n-ceil ${adv.alignment_options.function_type},${adv.alignment_options.constant_term},${adv.alignment_options.coefficient}
    ${adv.alignment_options.ignore_quals}
    ${adv.alignment_options.skip_forward}
    ${adv.alignment_options.skip_reverse}
#end if


 ## Scoring options

#if str($adv.scoring_options.scoring_options_selector) == "advanced":
    --mp ${adv.scoring_options.max_mismatch},${adv.scoring_options.min_mismatch}
    ${adv.scoring_options.no_softclip}
    --np ${adv.scoring_options.ambiguous_penalty}
    --rdg ${adv.scoring_options.read_open_penalty},${adv.scoring_options.read_extend_penalty}
    --rfg ${adv.scoring_options.ref_open_penalty},${adv.scoring_options.ref_extend_penalty}
    --sp ${adv.scoring_options.soft_clip_penalty_max},${adv.scoring_options.soft_clip_penalty_min}
    --score-min ${adv.scoring_options.function_type},${adv.scoring_options.constant_term},${adv.scoring_options.coefficient}
#end if


## Spliced alignment options

#if str($adv.spliced_options.spliced_options_selector) == "advanced":
    --pen-cansplice ${adv.spliced_options.canonical_penalty}
    --pen-noncansplice ${adv.spliced_options.noncanonical_penalty}
    --pen-canintronlen ${adv.spliced_options.function_type},${adv.spliced_options.constant_term},${adv.spliced_options.coefficient}
    --pen-noncanintronlen ${adv.spliced_options.nc_function_type},${adv.spliced_options.nc_constant_term},${adv.spliced_options.nc_coefficient}
    #if str($adv.spliced_options.known_splice_gtf) != 'None':
        --known-splicesite-infile splice_sites.txt
    #end if
    ${adv.spliced_options.no_spliced_alignment_options.no_spliced_alignment}
    #if $adv.spliced_options.no_spliced_alignment_options.no_spliced_alignment == '--no-spliced-alignment'
        -I ${adv.spliced_options.no_spliced_alignment_options.minins}
        -X ${adv.spliced_options.no_spliced_alignment_options.maxins}
    #end if
    --min-intronlen ${adv.spliced_options.min_intron}
    --max-intronlen ${adv.spliced_options.max_intron}
    ${adv.spliced_options.tma}

    #if str($adv.spliced_options.novel_splicesite_outfile) == "true":
       --novel-splicesite-outfile '$novel_splicesite_output'
    #end if

    #if str($adv.spliced_options.notmplen):
        ${adv.spliced_options.notmplen}
    #end if
#end if


## Reporting options

#if str($adv.reporting_options.reporting_options_selector) == "advanced":
    #if str($adv.reporting_options.max_primary) != '':
        -k ${adv.reporting_options.max_primary}
    #end if
#end if


## Other options

#if str( $adv.other_options.other_options_selector ) == "advanced":
    ${adv.other_options.non_deterministic}
    --seed '${adv.other_options.seed}'
#end if


## Output Summary

#if str($sum.new_summary) == "true":
    --new-summary
#end if

#if str($sum.summary_file) == "true":
    --summary-file summary.txt
#end if

## Convert SAM output to sorted BAM
## using the two pipe stages has the following effect
## - hisat2 and sort run in parallel, during this time sort produces
##   presorted temporary files but does not produce output (hence
##   view does not run)
## - once hisat is finished sort will start to merge the temporary
##   files (which should be fast also on a single thread) gives the
##   sorted output to view which only compresses the files (now
##   using full parallelism again)

| samtools sort -l 0 -O bam | samtools view -O bam -@ \${GALAXY_SLOTS:-1} -o '${output_alignments}'

## Rename any output fastq files

#if $output_unaligned_reads_l and $output_unaligned_reads_r:
    #from os.path import splitext
    #set _unaligned_root, _unaligned_ext = splitext( str( $output_unaligned_reads_l ) )
    && mv '${ _unaligned_root }.1${_unaligned_ext}' '$output_unaligned_reads_l'
    && mv '${ _unaligned_root }.2${_unaligned_ext}' '$output_unaligned_reads_r'
#end if
#if $output_aligned_reads_l and $output_aligned_reads_r:
    #from os.path import splitext
    #set _aligned_root, _aligned_ext = splitext( str( $output_aligned_reads_l ) )
    && mv '${ _aligned_root }.1${_aligned_ext}' '$output_aligned_reads_l'
    && mv '${ _aligned_root }.2${_aligned_ext}' '$output_aligned_reads_r'
#end if
    ]]></command>

    <!-- Define inputs -->

    <inputs>

        <!-- Reference genome -->
            <conditional name="reference_genome">
                <param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options">
                    <option value="indexed" selected="true">Use a built-in genome</option>
                    <option value="history">Use a genome from history</option>
                </param>
                <when value="indexed">
                    <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
                        <options from_data_table="hisat2_indexes">
                            <filter type="sort_by" column="2" />
                            <validator type="no_options" message="No genomes are available for the selected input dataset" />
                        </options>
                    </param>
                </when>
                <when value="history">
                    <param name="history_item" type="data" format="fasta" label="Select the reference genome" />
                </when>
            </conditional>

        <!-- Reads -->
            <conditional name="library">
                <param name="type" type="select" label="Is this a single or paired library">
                    <option value="single">Single-end</option>
                    <option value="paired">Paired-end</option>
                    <option value="paired_collection">Paired-end Dataset Collection</option>
                    <option value="paired_interleaved">Paired-end data from single interleaved dataset</option>
                </param>

                <when value="single">
                    <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
                     <param name="rna_strandness" argument="--rna-strandness" type="select" label="Specify strand information"
                            help="'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript. With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome.">
                        <option value="">Unstranded</option>
                        <option value="F">Forward (F)</option>
                        <option value="R">Reverse (R)</option>
                    </param>
                </when>

                <when value="paired">
                    <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #1" help="Must be of datatype &quot;fastqsanger&quot;or &quot;fasta&quot;" />
                    <param name="input_2" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="FASTA/Q file #2" help="Must be of datatype &quot;fastqsanger&quot;or &quot;fasta&quot;" />
                    <expand macro="paired_end_options" />
                </when>

                <when value="paired_collection">
                    <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;" />
                    <expand macro="paired_end_options" />
                </when>
                <when value="paired_interleaved">
                    <param name="input_1" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" type="data" label="Interleaved FASTA/Q file" help="Must be of datatype &quot;fastqsanger&quot; or &quot;fasta&quot;. --interleaved"/>
                    <expand macro="paired_end_options" />
                </when>
            </conditional>

        <!-- Summary Options -->
        <section name="sum" title="Summary Options" expanded="False">
            <param name="new_summary" argument="--new-summary" type="boolean" checked="False" label="Output alignment summary in a more machine-friendly style." help="Select this option for compatibility with MultiQC" />
            <param name="summary_file" argument="--summary-file" type="boolean" checked="False" label="Print alignment summary to a file." help="Output alignment summary to a file in addition to stderr." />
        </section>

        <!-- Advanced Options  -->
        <section name="adv" title="Advanced Options" expanded="False">
            <conditional name="input_options">
                <param name="input_options_selector" type="select" label="Input options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify input options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="skip" argument="-s" type="integer" min="0" value="0" label="Skip the first N reads or pairs in the input" help="default: 0" />
                    <param name="qupto" argument="-u" type="integer" min="0" value="0" label="Align the first N reads or read pairs from the input (after the first N reads or pairs have been skipped), then stop" help="default: no limit" />
                    <param name="trim5" argument="-5" type="integer" min="0" value="0" label="Trim 5' end" help="Trim N bases from 5' (left) end of each read before alignment, default: 0" />
                    <param name="trim3" argument="-3" type="integer" min="0" value="0" label="Trim 3' end" help="Trim N bases from 3' (right) end of each read before alignment, default: 0"/>
                    <param name="qv_encoding" type="select" display="radio" label="Select quality score encoding" help="See help below for more details">
                         <option value="--phred33" selected="True">Input qualities are ASCII chars equal to the Phred quality plus 33. This is also called the "Phred+33" encoding, which is used by the very latest Illumina pipelines (--phred33)</option>
                         <option value="--phred64">Input qualities are ASCII chars equal to the Phred quality plus 64. This is also called the "Phred+64" encoding (--phred64)</option>
                    </param>
                    <param name="solexa_quals" argument="--solexa-quals" type="boolean" truevalue="--solexa-quals" falsevalue="" checked="False" label="Convert input qualities from Solexa (which can be negative) to Phred (which can't). This scheme was used in older Illumina GA Pipeline versions (prior to 1.3)" help="--solexa-quals; default: False"/>
                    <param name="int_quals" argument="--int-quals" type="boolean" truevalue="--int-quals" falsevalue="" checked="False" label="Are quality values provided as space separated integers?" help="Quality values are represented in the read input file as space-separated ASCII integers, e.g., 40 40 30 40..., rather than ASCII characters, e.g., II?I.... Integers are treated as being on the Phred quality scale unless --solexa-quals is also specified [default: False]"/>
                </when>
            </conditional>

            <conditional name="alignment_options">
                <param name="alignment_options_selector" type="select" label="Alignment options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify alignment options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="function_type" argument="--n-ceil" type="select" display="radio" label="Function governing the maximum number of ambiguous characters (usually Ns and/or .s) allowed in a read as a function of read length" help="Reads exceeding this ceiling are filtered out">
                        <option value="C">Constant [f(x) = B]</option>
                        <option value="L" selected="true">Linear [f(x) = B + A * x]</option>
                        <option value="S">Square root [f(x) = B + A * x&#178;]</option>
                        <option value="G">Natural logarithm [f(x) = B + A * log(x)]</option>
                    </param>
                    <param name="constant_term" type="float" value="0" label="Constant term (B)" help="Constant term for the above function" />
                    <param name="coefficient" type="float" value="0.15" label="Coefficient (A)" help="Coefficient for the above function" />
                    <param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" label="Ignore quality values" help="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value. I.e. input is treated as though all quality values are high. This is also the default behavior when the input doesn't specify quality values" />
                    <param argument="--nofw" name="skip_forward" type="boolean" truevalue="--nofw" falsevalue="" label="Skip forward strand of reference" help="If --nofw is specified, HISAT2 will not attempt to align unpaired reads to the forward (Watson) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes HISAT2 to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand" />
                    <param argument="--norc" name="skip_reverse" type="boolean" truevalue="--norc" falsevalue="" label="Skip reverse strand of reference" help="If --norc is specified, HISAT2 will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --norc causes HISAT2 to explore only those paired-end configurations corresponding to fragments from the forward-complement (Watson) strand" />
                </when>
            </conditional>

            <conditional name="scoring_options">
                <param name="scoring_options_selector" type="select" label="Scoring options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify scoring options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="constant_term" type="float" value="0" label="Constant term (B)" help="Constant term for the above function" />
                    <param name="coefficient" type="float" value="-0.2" label="Coefficient (A)" help="Coefficient for the above function" />
                    <param argument="--mp" name="max_mismatch" type="integer" value="6" min="0" label="Maximum mismatch penalty" help="Sets the maximum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
                    <param argument="--mp" name="min_mismatch" type="integer" value="2" min="0" label="Minimum mismatch penalty" help="Sets the minimum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
                    <param argument="--sp" name="soft_clip_penalty_max" type="integer" value="2" min="0" label="Maximum soft-clipping penalty" help="Sets the maximum (MX) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
                    <param argument="--sp" name="soft_clip_penalty_min" type="integer" value="1" min="0" label="Minimum soft-clipping penalty" help="Sets the minimum (MN) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" />
                    <param argument="--no-softclip" name="no_softclip" type="boolean" truevalue="" falsevalue="--no-softclip" label="Allow soft-clipping" />
                    <param argument="--np" name="ambiguous_penalty" type="integer" value="1" min="0" label="Ambiguous read penalty" help="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N" />
                    <param argument="--rdg" name="read_open_penalty" type="integer" value="5" min="0" label="Read gap open penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
                    <param argument="--rdg" name="read_extend_penalty" type="integer" value="3" min="0" label="Read gap extend penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
                    <param argument="--rfg" name="ref_open_penalty" type="integer" value="5" min="0" label="Reference gap open penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
                    <param argument="--rfg" name="ref_extend_penalty" type="integer" value="3" min="0" label="Reference gap extend penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" />
                    <param name="function_type" argument="--score-min" type="select" display="radio" label="Function governing the minimum alignment score needed for an alignment to be considered &quot;valid&quot; (i.e. good enough to report)" help="This is a function of read length">
                        <option value="C">Constant [f(x) = B]</option>
                        <option value="L" selected="true">Linear [f(x) = B + A * x]</option>
                        <option value="S">Square root [f(x) = B + A * x&#178;]</option>
                        <option value="G">Natural logarithm [f(x) = B + A * log(x)]</option>
                    </param>
                </when>
            </conditional>

            <conditional name="spliced_options">
                <param name="spliced_options_selector" type="select" label="Spliced alignment options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify spliced alignment options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="canonical_penalty" argument="--pen-cansplice" type="integer" value="0" min="0" label="Penalty for canonical splice sites" />
                    <param name="noncanonical_penalty" argument="--pen-noncansplice" type="integer" value="12" min="0" label="Penalty for non-canonical splice sites" />
                    <param name="function_type" argument="--pen-canintronlen" type="select" display="radio" label="Penalty function for long introns with canonical splice sites" help="Alignments with shorter introns are preferred to those with longer ones">
                        <option value="C">Constant [f(x) = B]</option>
                        <option value="L">Linear [f(x) = B + A * x]</option>
                        <option value="S">Square root [f(x) = B + A * x&#178;]</option>
                        <option value="G" selected="true">Natural logarithm [f(x) = B + A * log(x)]</option>
                    </param>
                    <param name="constant_term" type="float" value="-8" label="Constant term (B)" help="Constant term for the above function" />
                    <param name="coefficient" type="float" value="1" label="Coefficient (A)" help="Coefficient for the above function" />
                    <param name="nc_function_type" argument="--pen-noncanintronlen" type="select" display="radio" label="Penalty function for long introns with non-canonical splice sites" help="Alignments with shorter introns are preferred to those with longer ones">
                        <option value="C">Constant [f(x) = B]</option>
                        <option value="L">Linear [f(x) = B + A * x]</option>
                        <option value="S">Square root [f(x) = B + A * x&#178;]</option>
                        <option value="G" selected="true">Natural logarithm [f(x) = B + A * log(x)]</option>
                    </param>
                    <param name="nc_constant_term" type="float" value="-8" label="Constant term (B)" help="Constant term for the above function" />
                    <param name="nc_coefficient" type="float" value="1" label="Coefficient (A)" help="Coefficient for the above function" />
                    <param name="min_intron" type="integer" value="20" min="0" label="Minimum intron length" />
                    <param name="max_intron" type="integer" value="500000" min="0" label="Maximum intron length" />
                    <conditional name="no_spliced_alignment_options">
                        <param name="no_spliced_alignment" argument="--no-spliced-alignment" type="select" label="Disable spliced alignment">
                            <option value="">False</option>
                            <option value="--no-spliced-alignment">True</option>
                        </param>
                        <when value="--no-spliced-alignment">
                            <param name="minins" argument="-I" type="integer" value="0" min="0" label="Minimum fragment length for valid paired-end alignments" help="E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" />
                            <param name="maxins" argument="-X" type="integer" value="500" min="0" label="Maximum fragment length for valid paired-end alignments" help="E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" />
                        </when>
                        <when value="" />
                    </conditional>
                    <param name="known_splice_gtf" type="data" format="gtf" optional="true" label="GTF file with known splice sites" />
                    <param name="tma" type="select" display="radio" label="Transcriptome assembly reporting">
                        <option value="">Use default reporting</option>
                        <option value="--tmo">Report only those alignments within known transcripts</option>
                        <option value="--dta">Report alignments tailored for transcript assemblers including StringTie</option>
                        <option value="--dta-cufflinks">Report alignments tailored specifically for Cufflinks</option>
                    </param>
                    <param name="notmplen" argument="--no-templatelen-adjustment" type="boolean" truevalue="--no-templatelen-adjustment" falsevalue="" label="Disable automatic template length adjustment for RNA-seq reads" help="Default: false" />
                    <param name="novel_splicesite_outfile" type="boolean" checked="false" label="reports a list of novel splice sites" help="Default: false" />
                </when>
            </conditional>

            <conditional name="reporting_options">
                <param name="reporting_options_selector" type="select" label="Reporting options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify reporting options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="max_primary" argument="-k" type="integer" min="0" optional="true" label="Primary alignments" help="Search for at most K distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds K, whichever happens first. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. For reads that have more than K distinct, valid alignments, HISAT2 does not guarantee that the K alignments reported are the best possible in terms of alignment score. HISAT2 is not designed with large values for -k in mind, so when aligning reads to long repetitive genomes, a large K can be very, very slow. Default: 5 (HFM) or 10 (HGFM)" />
                </when>
            </conditional>

             <conditional name="output_options">
                <param name="output_options_selector" type="select" label="Output options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify output options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="unaligned_file" argument="--un/--un-conc" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write unaligned reads (in fastq format) to separate file(s)" help="This triggers --un parameter for single reads and --un-conc for paired reads" />
                    <param name="aligned_file" argument="--al/--al-conc" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write aligned reads (in fastq format) to separate file(s)" help="This triggers --al parameter for single reads and --al-conc for paired reads" />
                </when>
            </conditional>

            <conditional name="other_options">
                <param name="other_options_selector" type="select" label="Other options">
                    <option value="defaults">Use default values</option>
                    <option value="advanced">Specify other options</option>
                </param>
                <when value="defaults" />
                <when value="advanced">
                    <param name="seed" argument="--seed" type="integer" value="0" min="0" label="Use this number as the seed for pseudo-random number generator" help="Default=0" />
                    <param name="non_deterministic" argument="--non-deterministic" type="boolean" truevalue="--non-deterministic" falsevalue="" label="Re-initialize the pseudo-random generator for each read using the current time" help="see Help below for explanation of this option; default: False"/>
                </when>
            </conditional>
        </section>

    </inputs>

    <!-- Define outputs -->

    <outputs>

        <!-- BAM -->
        <data name="output_alignments" format="bam" label="${tool.name} on ${on_string}: aligned reads (BAM)">
            <actions>
                <expand macro="dbKeyActions" />
            </actions>
        </data>

        <!-- Unaligned fastq (L) -->
        <data name="output_unaligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (L)">
            <filter>adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['unaligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                    <when value="paired_interleaved">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>

        <!-- Aligned fastq (L) -->
        <data name="output_aligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads (L)">
            <filter>adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['aligned_file'] is True</filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                    <when value="paired_interleaved">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>

        <!-- Unaligned fastq (R) -->
        <data name="output_unaligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (R)">
            <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection') and (adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['unaligned_file'] is True) </filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                    <when value="paired_interleaved">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>

        <!-- Aligned fastq (R) -->
        <data name="output_aligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads (R)">
            <filter>(library['type'] == 'paired' or library['type'] == 'paired_collection') and (adv['output_options']['output_options_selector'] == "advanced" and adv['output_options']['aligned_file'] is True) </filter>
            <actions>
                <conditional name="library.type">
                    <when value="single">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="ext" />
                        </action>
                    </when>
                    <when value="paired_collection">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                    <when value="paired_interleaved">
                        <action type="format">
                            <option type="from_param" name="library.input_1" param_attribute="forward.ext" />
                        </action>
                    </when>
                </conditional>
                <expand macro="dbKeyActions" />
            </actions>
        </data>

     <!-- Alignment summary file -->
    <data name="summary_file" format="txt" from_work_dir="summary.txt" label="${tool.name} on ${on_string}: Mapping summary">
        <filter>sum['summary_file'] is True</filter>
        <actions>
            <expand macro="dbKeyActions" />
        </actions>
    </data>

    <!-- Novel Splice file -->
    <data name="novel_splicesite_output" format="tabular" label="${tool.name} on ${on_string}: Novel Splice Sites">
        <filter>adv['spliced_options']['spliced_options_selector'] == 'advanced' and adv['spliced_options']['novel_splicesite_outfile'] is True</filter>
        <actions>
            <expand macro="dbKeyActions" />
        </actions>
    </data>


    </outputs>

    <!-- Define tests -->

    <tests>
        <!-- Ensure bam output works -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
            <param name="adv|reporting_options|reporting_options_selector" value="advanced"/>
            <param name="novel_splicesite_outfile" value="false" />
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure advanced scoring options work -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
            <param name="adv|scoring_options|coefficient" value="-0.3"/>
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure built-in reference works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="indexed" />
            <param name="input_1" ftype="fastqsanger" dbkey="phiX" value="hisat_input_1_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" dbkey="phiX" value="hisat_input_1_reverse.fastq" />
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure trimming works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_options_selector" value="advanced" />
            <param name="trim3" value="15" />
            <param name="trim5" value="15" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_2_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" />
            <output name="output_alignments" file="hisat_output_2.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure paired options works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_options_selector" value="advanced" />
            <param name="trim3" value="15" />
            <param name="trim5" value="15" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_2_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" />
            <param name="paired_options_selector" value="advanced" />
            <param name="no_mixed" value="True" />
            <param name="no_discordant" value="True" />
            <output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure single unaligned output works -->
        <test expect_num_outputs="2">
            <param name="type" value="single" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="output_options_selector" value="advanced" />
            <param name="unaligned_file" value="true" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fasta" value="test_unaligned_reads.fasta" />
            <output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" />
        </test>
        <!-- Ensure paired unaligned/aligned output works -->
        <test expect_num_outputs="5">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="output_options_selector" value="advanced" />
            <param name="unaligned_file" value="true" />
            <param name="aligned_file" value="true" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fasta" value="test_unaligned_reads.fasta" />
            <param name="input_2" ftype="fasta" value="test_unaligned_reads.fasta" />
            <output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" />
            <output name="output_unaligned_reads_r" file="test_unaligned_reads.fasta" />
        </test>
        <!-- Ensure fastqsanger.gz works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_options_selector" value="advanced" />
            <param name="trim3" value="15" />
            <param name="trim5" value="15" />
            <param name="input_1" ftype="fastqsanger.gz" value="hisat_input_2_forward.fastq.gz" />
            <param name="input_2" ftype="fastqsanger.gz" value="hisat_input_2_reverse.fastq.gz" />
            <param name="paired_options_selector" value="advanced" />
            <param name="no_mixed" value="True" />
            <param name="no_discordant" value="True" />
            <output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure fastqsanger.bz2 works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_options_selector" value="advanced" />
            <param name="trim3" value="15" />
            <param name="trim5" value="15" />
            <param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_2_forward.fastq.bz2" />
            <param name="input_2" ftype="fastqsanger.bz2" value="hisat_input_2_reverse.fastq.bz2" />
            <param name="paired_options_selector" value="advanced" />
            <param name="no_mixed" value="True" />
            <param name="no_discordant" value="True" />
            <output name="output_alignments" file="hisat_output_3.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure paired strandness works -->
        <test expect_num_outputs="1">
            <param name="type" value="paired" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
            <param name="input_2" ftype="fastqsanger" value="hisat_input_1_reverse.fastq" />
            <param name="rna_strandness" value="FR" />
            <output name="output_alignments" file="hisat_output_4.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure summary file output works -->
        <test expect_num_outputs="2">
            <param name="type" value="single" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_forward.fastq" />
            <param name="rna_strandness" value="R" />
            <param name="new_summary" value="true" />
            <param name="summary_file" value="true" />
            <output name="summary_file" file="hisat_output.summary" ftype="txt" />
        </test>
        <!-- Ensure interleaved input works -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired_interleaved" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_interleaved.fastq" />
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure interleaved bz input works -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired_interleaved" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger.bz2" value="hisat_input_1_interleaved.fastq.bz2" />
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure interleaved gz input works -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired_interleaved" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger.gz" value="hisat_input_1_interleaved.fastq.gz" />
            <output name="output_alignments" file="hisat_output_1.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure interleaved fasta input works -->
        <test expect_num_outputs="1" >
            <param name="type" value="paired_interleaved" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fasta" value="hisat_input_1_interleaved.fasta" />
            <output name="output_alignments" file="hisat_output_1_noqual.bam" ftype="bam" lines_diff="2" />
        </test>
        <!-- Ensure novel splicesite file output works -->
        <test>
            <param name="type" value="single" />
            <param name="source" value="history" />
            <param name="history_item" ftype="fasta" value="phiX.fa" />
            <param name="input_1" ftype="fastqsanger" value="hisat_input_1_split_forward.fastq" />
            <param name="rna_strandness" value="R" />
            <param name="adv|spliced_options|spliced_options_selector" value="advanced"/>
            <param name="adv|spliced_options|novel_splicesite_outfile" value="true" />
            <output name="output_alignments" file="hisat_output_spliced_1.bam" ftype="bam" lines_diff="2" />
            <output name="novel_splicesite_output" file="novel_splicesite_out.tab" ftype="tabular" />
        </test>
    </tests>

    <help><![CDATA[
Introduction
============

What is HISAT?
--------------

`HISAT <http://ccb.jhu.edu/software/hisat>`__ is a fast and sensitive
spliced alignment program. As part of HISAT, we have developed a new
indexing scheme based on the Burrows-Wheeler transform
(`BWT <http://en.wikipedia.org/wiki/Burrows-Wheeler_transform>`__) and
the `FM index <http://en.wikipedia.org/wiki/FM-index>`__, called
hierarchical indexing, that employs two types of indexes: (1) one global
FM index representing the whole genome, and (2) many separate local FM
indexes for small regions collectively covering the genome. Our
hierarchical index for the human genome (about 3 billion bp) includes
~48,000 local FM indexes, each representing a genomic region of
~64,000bp. As the basis for non-gapped alignment, the FM index is
extremely fast with a low memory footprint, as demonstrated by
`Bowtie <http://bowtie-bio.sf.net>`__. In addition, HISAT provides
several alignment strategies specifically designed for mapping different
types of RNA-seq reads. All these together, HISAT enables extremely fast
and sensitive alignment of reads, in particular those spanning two exons
or more. As a result, HISAT is much faster >50 times than
`TopHat2 <http://ccb.jhu.edu/software/tophat>`__ with better alignment
quality. Although it uses a large number of indexes, the memory
requirement of HISAT is still modest, approximately 4.3 GB for human.
HISAT uses the `Bowtie2 <http://bowtie-bio.sf.net/bowtie2>`__
implementation to handle most of the operations on the FM index. In
addition to spliced alignment, HISAT handles reads involving indels and
supports a paired-end alignment mode. Multiple processors can be used
simultaneously to achieve greater alignment speed. HISAT outputs
alignments in `SAM <http://samtools.sourceforge.net/SAM1.pdf>`__ format,
enabling interoperation with a large number of other tools (e.g.
`SAMtools <http://samtools.sourceforge.net>`__,
`GATK <http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit>`__)
that use SAM. HISAT is distributed under the `GPLv3
license <http://www.gnu.org/licenses/gpl-3.0.html>`__, and it runs on
the command line under Linux, Mac OS X and Windows.

Running HISAT
=============

Reporting
---------

The reporting mode governs how many alignments HISAT looks for, and how
to report them.

In general, when we say that a read has an alignment, we mean that it
has a `valid
alignment <#valid-alignments-meet-or-exceed-the-minimum-score-threshold>`__.
When we say that a read has multiple alignments, we mean that it has
multiple alignments that are valid and distinct from one another.

Distinct alignments map a read to different places
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Two alignments for the same individual read are "distinct" if they map
the same read to different places. Specifically, we say that two
alignments are distinct if there are no alignment positions where a
particular read offset is aligned opposite a particular reference offset
in both alignments with the same orientation. E.g. if the first
alignment is in the forward orientation and aligns the read character at
read offset 10 to the reference character at chromosome 3, offset
3,445,245, and the second alignment is also in the forward orientation
and also aligns the read character at read offset 10 to the reference
character at chromosome 3, offset 3,445,245, they are not distinct
alignments.

Two alignments for the same pair are distinct if either the mate 1s in
the two paired-end alignments are distinct or the mate 2s in the two
alignments are distinct or both.

Default mode: search for one or more alignments, report each
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

HISAT searches for up to N distinct, primary alignments for each read,
where N equals the integer specified with the ``-k`` parameter. Primary
alignments mean alignments whose alignment score is equal or higher than
any other alignments. It is possible that multiple distinct alignments
whave the same score. That is, if ``-k 2`` is specified, HISAT will
search for at most 2 distinct alignments. The alignment score for a
paired-end alignment equals the sum of the alignment scores of the
individual mates. Each reported read or pair alignment beyond the first
has the SAM 'secondary' bit (which equals 256) set in its FLAGS field.
See the `SAM specification <http://samtools.sourceforge.net/SAM1.pdf>`__
for details.

HISAT does not "find" alignments in any specific order, so for reads
that have more than N distinct, valid alignments, HISAT does not
gaurantee that the N alignments reported are the best possible in terms
of alignment score. Still, this mode can be effective and fast in
situations where the user cares more about whether a read aligns (or
aligns a certain number of times) than where exactly it originated.

Alignment summmary
------------------

When HISAT finishes running, it prints messages summarizing what happened. These messages are printed to the "standard error" ("stderr") filehandle and can be optionally printed to a file. Choose `--new-summary` under **Summary Options** for compatibility with `MultiQC <http://multiqc.info/docs/#hisat2>`_.
For datasets consisting of unpaired reads, the summary might look like this:

::

    20000 reads; of these:
      20000 (100.00%) were unpaired; of these:
        1247 (6.24%) aligned 0 times
        18739 (93.69%) aligned exactly 1 time
        14 (0.07%) aligned >1 times
    93.77% overall alignment rate

For datasets consisting of pairs, the summary might look like this:

::

    10000 reads; of these:
      10000 (100.00%) were paired; of these:
        650 (6.50%) aligned concordantly 0 times
        8823 (88.23%) aligned concordantly exactly 1 time
        527 (5.27%) aligned concordantly >1 times
        ----
        650 pairs aligned concordantly 0 times; of these:
          34 (5.23%) aligned discordantly 1 time
        ----
        616 pairs aligned 0 times concordantly or discordantly; of these:
          1232 mates make up the pairs; of these:
            660 (53.57%) aligned 0 times
            571 (46.35%) aligned exactly 1 time
            1 (0.08%) aligned >1 times
    96.70% overall alignment rate

The indentation indicates how subtotals relate to totals.


.. class:: infomark

**HISAT2 options**

Galaxy wrapper for HISAT2 implements most, but not all, options available through the command line. Supported options are described below.

-----

**Inputs**

HISAT2 accepts files in FASTQ or FASTA format (single-end or paired-end).

Note that if your reads are from a stranded library, you need to choose the appropriate setting under **Specify strand information** above. For single-end reads, use F or R. 'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript. For paired-end reads, use either FR or RF. With this option being used, every read alignment will have an XS attribute tag: '+' means a read belongs to a transcript on '+' strand of genome. '-' means a read belongs to a transcript on '-' strand of genome. (TopHat has a similar option, --library-type option, where fr-firststrand corresponds to R and RF; fr-secondstrand corresponds to F and FR.)

------

**Input options**::

    -s/--skip <int>
            Skip (i.e. do not align) the first `<int>` reads or pairs in the input.

    -u/--qupto <int>
            Align the first `<int>` reads or read pairs from the input (after the
            `-s`/`--skip` reads or pairs have been skipped), then stop.  Default: no limit.

    -5/--trim5 <int>
            Trim `<int>` bases from 5' (left) end of each read before alignment (default: 0).

    -3/--trim3 <int>
            Trim `<int>` bases from 3' (right) end of each read before alignment (default: 0).

    --phred33
            Input qualities are ASCII chars equal to the Phred quality plus 33.  This is
            also called the "Phred+33" encoding, which is used by the very latest Illumina
            pipelines.

    --phred64
            Input qualities are ASCII chars equal to the Phred quality plus 64.  This is
            also called the "Phred+64" encoding.

    --solexa-quals
            Convert input qualities from Solexa Phred quality (which can be negative) to
            Phred Phred quality (which can't).  This scheme was used in older Illumina GA
            Pipeline versions (prior to 1.3).  Default: off.

    --int-quals
            Quality values are represented in the read input file as space-separated ASCII integers, e.g., `40 40 30 40`..., rather than ASCII characters, e.g., `II?I`....
            Integers are treated as being on the Phred quality scale unless
            `--solexa-quals` is also specified. Default: off.

------

**Alignment options**::

    --n-ceil <func>
            Sets a function governing the maximum number of ambiguous characters (usually
            `N`s and/or `.`s) allowed in a read as a function of read length.  For instance,
            specifying `-L,0,0.15` sets the N-ceiling function `f` to `f(x) = 0 + 0.15 * x`,
            where x is the read length.  Reads exceeding this ceiling are filtered out.
            Default: `L,0,0.15`.

    --ignore-quals
            When calculating a mismatch penalty, always consider the quality value at the
            mismatched position to be the highest possible, regardless of the actual value.
            I.e. input is treated as though all quality values are high.  This is also the
            default behavior when the input doesn't specify quality values (e.g. in `-f`,
            `-r`, or `-c` modes).

    --nofw/--norc
            If `--nofw` is specified, `hisat2` will not attempt to align unpaired reads to
            the forward (Watson) reference strand.  If `--norc` is specified, `hisat2` will not attempt to align unpaired reads against the reverse-complement (Crick)
            reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the
            fragments; i.e. specifying `--nofw` causes `hisat2` to explore only those
            paired-end configurations corresponding to fragments from the reverse-complement
            (Crick) strand.  Default: both strands enabled.

-----

**Scoring options**::

    --mp MX,MN
            Sets the maximum (`MX`) and minimum (`MN`) mismatch penalties, both integers. A number less than or equal to `MX` and greater than or equal to `MN` is
            subtracted from the alignment score for each position where a read character
            aligns to a reference character, the characters do not match, and neither is an
            `N`.  If `--ignore-quals` is specified, the number subtracted quals `MX`.
            Otherwise, the number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )`
            where Q is the Phred quality value.  Default: `MX` = 6, `MN` = 2.

    --sp MX,MN
            Sets the maximum (`MX`) and minimum (`MN`) penalties for soft-clipping per base, both integers. A number less than or equal to `MX` and greater than or equal to `MN` is subtracted from the alignment score for each position. The number subtracted is `MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) )` where Q is the Phred quality value. Default: `MX` = 2, `MN` = 1.

    --no-softclip
            Disallow soft-clipping.

    --np <int>
            Sets penalty for positions where the read, reference, or both, contain an
            ambiguous character such as `N`.  Default: 1.

    --rdg <int1>,<int2>
            Sets the read gap open (`<int1>`) and extend (`<int2>`) penalties.  A read gap of
            length N gets a penalty of `<int1>` + N * `<int2>`.  Default: 5, 3.

    --rfg <int1>,<int2>
            Sets the reference gap open (`<int1>`) and extend (`<int2>`) penalties.  A
            reference gap of length N gets a penalty of `<int1>` + N * `<int2>`.  Default:
            5, 3.

    --score-min <func>
            Sets a function governing the minimum alignment score needed for an alignment to
            be considered "valid" (i.e. good enough to report).  This is a function of read
            length. For instance, specifying `L,0,-0.6` sets the minimum-score function `f`
            to `f(x) = 0 + -0.6 * x`, where `x` is the read length. The default is `L,0,-0.2`.

-----

**Spliced alignment options**::

    --pen-cansplice <int>
            Sets the penalty for each pair of canonical splice sites (e.g. GT/AG). Default: 0.

    --pen-noncansplice <int>
            Sets the penalty for each pair of non-canonical splice sites (e.g. non-GT/AG). Default: 12.

    --pen-canintronlen <func>
            Sets the penalty for long introns with canonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1

    --pen-noncanintronlen <func>
            Sets the penalty for long introns with noncanonical splice sites so that alignments with shorter introns are preferred to those with longer ones. Default: G,-8,1

    --min-intronlen <int>
            Sets minimum intron length. Default: 20

    --max-intronlen <int>
            Sets maximum intron length. Default: 500000

    --no-spliced-alignment
            Disable spliced alignment.

    -I/--minins <int>
            The minimum fragment length for valid paired-end alignments.This option is valid only with `--no-spliced-alignment`. E.g. if `-I 60` is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as `-X` is also satisfied). A 19-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the `-I` constraint is applied with respect to the untrimmed mates.

            The larger the difference between `-I` and `-X`, the slower HISAT2 will run. This is because larger differences between `-I` and `-X` require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient.

            Default: 0 (essentially imposing no minimum)

    -X/--maxins <int>
            The maximum fragment length for valid paired-end alignments. This option is valid only with `--no-spliced-alignment`. E.g. if `-X 100` is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as `-I` is also satisfied). A 61-bp gap would not be valid in that case. If trimming options `-3` or `-5` are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates.

            The larger the difference between `-I` and `-X`, the slower HISAT2 will run. This is because larger differences between `-I` and `-X` require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient.

            Default: 500.

    --known-splicesite-infile <path>
            With this mode, you can provide a list of known splice sites, which HISAT2 makes use of to align reads with small anchors. You can create such a list using python hisat2_extract_splice_sites.py genes.gtf > splicesites.txt, where hisat2_extract_splice_sites.py is included in the HISAT2 package, genes.gtf is a gene annotation file, and splicesites.txt is a list of splice sites with which you provide HISAT2 in this mode. Note that it is better to use indexes built using annotated transcripts (such as genome_tran or genome_snp_tran), which works better than using this option. It has no effect to provide splice sites that are already included in the indexes.

    --tmo/--transcriptome-mapping-only
            Report only those alignments within known transcripts.

    --dta/--downstream-transcriptome-assembly
            Report alignments tailored for transcript assemblers including StringTie. With this option, HISAT2 requires longer anchor lengths for de novo discovery of splice sites. This leads to fewer alignments with short-anchors, which helps transcript assemblers improve significantly in computation and memory usage.

    --dta-cufflinks
            Report alignments tailored specifically for Cufflinks. In addition to what HISAT2 does with the above option (--dta), With this option, HISAT2 looks for novel splice sites with three signals (GT/AG, GC/AG, AT/AC), but all user-provided splice sites are used irrespective of their signals. HISAT2 produces an optional field, XS:A:[+-], for every spliced alignment.

    --no-templatelen-adjustment
            Disables template length adjustment for RNA-seq reads.

    --novel-splicesite-outfile
			In this mode, HISAT2 reports a list of splice sites in the file :
				chromosome name <tab> genomic position of the flanking base on the left side of an intron <tab> genomic position of the flanking base on the right <tab> strand (+, -, and .) '.' indicates an unknown strand for non-canonical splice sites.

-----

**Reporting options**::

    -k <int>
            It searches for at most `<int>` distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds `<int>`, whichever happens first. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. For reads that have more than `<int>` distinct, valid alignments, hisat2 does not guarantee that the `<int>` alignments reported are the best possible in terms of alignment score. Default: 5 (HFM) or 10 (HGFM)

            Note: HISAT2 is not designed with large values for `-k` in mind, and when aligning reads to long, repetitive genomes large `-k` can be very, very slow.

-----

**Paired-end options**::

    --fr/--rf/--ff
            The upstream/downstream mate orientations for a valid paired-end alignment
            against the forward reference strand.  E.g., if `--fr` is specified and there is
            a candidate paired-end alignment where mate 1 appears upstream of the reverse
            complement of mate 2 and the fragment length constraints (`-I` and `-X`) are
            met, that alignment is valid.  Also, if mate 2 appears upstream of the reverse
            complement of mate 1 and all other constraints are met, that too is valid.
            `--rf` likewise requires that an upstream mate1 be reverse-complemented and a
            downstream mate2 be forward-oriented. ` --ff` requires both an upstream mate 1
            and a downstream mate 2 to be forward-oriented.  Default: `--fr` (appropriate
            for Illumina's Paired-end Sequencing Assay).

    --no-mixed
            By default, when `hisat2` cannot find a concordant or discordant alignment for
            a pair, it then tries to find alignments for the individual mates.  This option
            disables that behavior.

    --no-discordant
            By default, `hisat2` looks for discordant alignments if it cannot find any
            concordant alignments.  A discordant alignment is an alignment where both mates
            align uniquely, but that does not satisfy the paired-end constraints
            (`--fr`/`--rf`/`--ff`, `-I`, `-X`).  This option disables that behavior.


**Output options**::

    --un/--un-gz/--un-bz2
            Write unpaired reads that fail to align to file at `<path>`. These reads correspond to the SAM records with the FLAGS `0x4` bit set and neither the `0x40` nor `0x80` bits set. If `--un-gz` is specified, output will be gzip compressed. If `--un-bz2` is specified, output will be bzip2 compressed. Reads written in this way will appear exactly as they did in the input file, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the input.

    --al/--al-gz/--al-bz2
            Write unpaired reads that align at least once to file at `<path>`. These reads correspond to the SAM records with the FLAGS `0x4`, `0x40`, and `0x80` bits unset. If `--al-gz` is specified, output will be gzip compressed. If `--al-bz2` is specified, output will be bzip2 compressed. Reads written in this way will appear exactly as they did in the input file, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the input.

    --un-conc/--un-conc-gz/--un-conc-bz2
            Write paired-end reads that fail to align concordantly to file(s) at `<path>`. These reads correspond to the SAM records with the FLAGS `0x4` bit set and either the `0x40` or `0x80` bit set (depending on whether it's mate #1 or #2). .1 and .2 strings are added to the filename to distinguish which file contains mate #1 and mate #2. If a percent symbol, %, is used in <path>, the percent symbol is replaced with 1 or 2 to make the per-mate filenames. Otherwise, .1 or .2 are added before the final dot in <path> to make the per-mate filenames. Reads written in this way will appear exactly as they did in the input files, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the inputs.

    --al-conc/--al-conc-gz/--al-conc-bz2
            Write paired-end reads that align concordantly at least once to file(s) at `<path>`. These reads correspond to the SAM records with the FLAGS `0x4` bit unset and either the `0x40` or `0x80` bit set (depending on whether it's mate #1 or #2). .1 and .2 strings are added to the filename to distinguish which file contains mate #1 and mate #2. If a percent symbol, %, is used in <path>, the percent symbol is replaced with 1 or 2 to make the per-mate filenames. Otherwise, .1 or .2 are added before the final dot in `<path>` to make the per-mate filenames. Reads written in this way will appear exactly as they did in the input files, without any modification (same sequence, same name, same quality string, same quality encoding). Reads will not necessarily appear in the same order as they did in the inputs.


**Other options**::

    --seed <int>
            Use `<int>` as the seed for pseudo-random number generator.  Default: 0.

    --non-deterministic
            Normally, HISAT2 re-initializes its pseudo-random generator for each read. It seeds the generator with a number derived from (a) the read name, (b) the nucleotide sequence, (c) the quality sequence, (d) the value of the `--seed` option. This means that if two reads are identical (same name, same nucleotides, same qualities) HISAT2 will find and report the same alignment(s) for both, even if there was ambiguity. When `--non-deterministic` is specified, HISAT2 re-initializes its pseudo-random generator for each read using the current time. This means that HISAT2 will not necessarily report the same alignment for two identical reads. This is counter-intuitive for some users, but might be more appropriate in situations where the input consists of many identical reads.
    ]]></help>
    <citations>
        <citation type="doi">10.1038/nmeth.3317</citation>
    </citations>
</tool>
author	iuc
date	Sat, 03 Aug 2019 06:09:26 -0400
parents	3fb01a8c902d
children	a86e80d3c09c