Mercurial > repos > iuc > hisat2
view hisat2.xml @ 9:2dbb7f0ea66f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hisat2 commit 1baf0d1eb5d9cb012e4ce1385431461f26f7bc05
author | iuc |
---|---|
date | Tue, 04 Apr 2017 08:15:43 -0400 |
parents | 4d0a3173cde0 |
children | da8d655e2154 |
line wrap: on
line source
<?xml version="1.0"?> <tool id="hisat2" name="HISAT2" version="2.0.5"> <description>A fast and sensitive alignment program</description> <macros> <import>hisat2_macros.xml</import> </macros> <requirements> <!-- Conda dependency --> <requirement type="package" version="2.0.5">hisat2</requirement> <requirement type="package" version="1.4">samtools</requirement> </requirements> <stdio> <regex match="hisat2-align exited with value 1" source="both" level="fatal"/> <regex match="hisat2: not found" source="both" level="fatal"/> <exit_code range="1:" /> </stdio> <version_command>hisat2 --version</version_command> <command><![CDATA[ #if str($spliced_options.spliced_options_selector) == "advanced" and str($spliced_options.known_splice_gtf) != 'None': ln -s '${spliced_options.known_splice_gtf}' splice_sites.gtf && hisat2_extract_splice_sites.py splice_sites.gtf > splice_sites.txt && #end if #if $reference_genome.reference_genome_source == "history": ln -s '$reference_genome.history_item' genome.fa && hisat2-build -p \${GALAXY_SLOTS:-1} genome.fa genome && #set index_path = 'genome' #else: #set index_path = $reference_genome.index.fields.path #end if hisat2 -p \${GALAXY_SLOTS:-1} -x '${index_path}' #if str($input_format.paired.paired_selector) == 'paired': -1 '${input_format.paired.reads_f}' -2 '${input_format.paired.reads_r}' @paired_end_options@ #else if str($input_format.paired.paired_selector) == 'paired_collection': -1 '${input_format.paired.reads.forward}' -2 '${input_format.paired.reads.reverse}' @paired_end_options@ #else: -U '${input_format.paired.reads}' #if str( $input_format.paired.unaligned_file ) == "true": --un '$output_unaligned_reads_l' #end if #if str( $input_format.paired.aligned_file ) == "true": --al '$output_aligned_reads_l' #end if #end if #if $input_format.input_format_selector == 'fasta': -f #end if #if str($max_primary) -k ${max_primary} #end if #if str($max_seeds) --max-seeds $max_seeds #end if $secondary #if str($input_options.input_options_selector) == "advanced": #if int( $input_options.skip ) > 0: -s ${input_options.skip} #end if #if int( $input_options.stop_after ) > 0: -u ${input_options.stop_after} #end if -5 ${input_options.trim_five} -3 ${input_options.trim_three} #end if #if str($scoring_options.scoring_options_selector) == "advanced": --ma ${scoring_options.match_bonus} --mp ${scoring_options.max_mismatch},${scoring_options.min_mismatch} ${scoring_options.no_softclip} --np ${scoring_options.ambiguous_penalty} --rdg ${scoring_options.read_open_penalty},${scoring_options.read_extend_penalty} --rfg ${scoring_options.ref_open_penalty},${scoring_options.ref_extend_penalty} --sp ${scoring_options.soft_clip_penalty_max},${scoring_options.soft_clip_penalty_min} --score-min ${scoring_options.function_type},${scoring_options.constant_term},${scoring_options.coefficient} #end if #if str($alignment_options.alignment_options_selector) == "advanced": --n-ceil ${alignment_options.function_type},${alignment_options.constant_term},${alignment_options.coefficient} ${alignment_options.skip_forward} ${alignment_options.skip_reverse} ${alignment_options.ignore_quals} #end if #if str($spliced_options.spliced_options_selector) == "advanced": --pen-cansplice ${spliced_options.canonical_penalty} --pen-noncansplice ${spliced_options.noncanonical_penalty} --pen-canintronlen ${spliced_options.function_type},${spliced_options.constant_term},${spliced_options.coefficient} --pen-noncanintronlen ${spliced_options.nc_function_type},${spliced_options.nc_constant_term},${spliced_options.nc_coefficient} #if str($spliced_options.known_splice_gtf) != 'None': --known-splicesite-infile splice_sites.txt #end if ${spliced_options.no_spliced_alignment_options.no_spliced_alignment} #if $spliced_options.no_spliced_alignment_options.no_spliced_alignment == '--no-spliced-alignment' -I ${spliced_options.no_spliced_alignment_options.minins} -X ${spliced_options.no_spliced_alignment_options.maxins} #end if --min-intronlen ${spliced_options.min_intron} --max-intronlen ${spliced_options.max_intron} ${spliced_options.tma} @strandedness_parameters@ #end if #if str($paired_options.paired_options_selector) == "advanced": --minins ${paired_options.minins} --maxins ${paired_options.maxins} ${paired_options.no_mixed} ${paired_options.no_discordant} ${paired_options.dovetail} ${paired_options.contain} ${paired_options.overlap} #end if | samtools sort - -@ \${GALAXY_SLOTS:-1} -l 6 -o '${output_alignments}' ## Rename any output fastq files #if str($input_format.paired.paired_selector) == 'paired' or str($input_format.paired.paired_selector) == 'paired_collection': #if $output_unaligned_reads_l and $output_unaligned_reads_r: #set left = str($output_unaligned_reads_l).replace(".dat", ".1.dat") #set right = str($output_unaligned_reads_l).replace(".dat", ".2.dat") && mv '${left}' '${output_unaligned_reads_l}' && mv '${right}' '${output_unaligned_reads_r}' #end if #if $output_aligned_reads_l and $output_aligned_reads_r: #set left = str($output_aligned_reads_l).replace(".dat", ".1.dat") #set right = str($output_aligned_reads_l).replace(".dat", ".2.dat") && mv '${left}' '${output_aligned_reads_l}' && mv '${right}' '${output_aligned_reads_r}' #end if #end if ]]></command> <inputs> <conditional name="input_format"> <param name="input_format_selector" type="select" label="Input data format"> <option value="fastq" selected="true">FASTQ</option> <option value="fasta">FASTA</option> </param> <when value="fasta"> <expand macro="paired_input_conditional" ftype="fasta" /> </when> <when value="fastq"> <expand macro="paired_input_conditional" ftype="fastq" /> </when> </conditional> <conditional name="reference_genome"> <param name="reference_genome_source" type="select" label="Source for the reference genome to align against" help="Built-in references were created using default options"> <option value="indexed" selected="True">Use a built-in genome</option> <option value="history">Use a genome from history</option> </param> <when value="indexed"> <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> <options from_data_table="hisat2_indexes"> <filter type="sort_by" column="2" /> <validator type="no_options" message="No genomes are available for the selected input dataset" /> </options> </param> </when> <when value="history"> <param name="history_item" type="data" format="fasta" label="Select the reference genome" /> </when> </conditional> <param argument="-k" name="max_primary" type="integer" value="" optional="true" label="Primary alignments" help="Search for at most K distinct, primary alignments for each read. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. The search terminates when it can't find more distinct valid alignments, or when it finds K, whichever happens first. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. For reads that have more than K distinct, valid alignments, hisat2 does not guarantee that the K alignments reported are the best possible in terms of alignment score. HISAT2 is not designed with large values for -k in mind, so when aligning reads to long repetitive genomes, a large K can be very, very slow. Default: 5 (HFM) or 10 (HGFM)" /> <param argument="--max-seeds" name="max_seeds" type="integer" value="" optional="true" label="Maximum number of seeds that will be extended" help="HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to full-length alignments. HISAT2 extends up to these many seeds and skips the rest of the seeds. Large values for --max-seeds may improve alignment sensitivity, but HISAT2 is not designed with large values for --max-seeds in mind, and when aligning reads to long repetitive genomes, a large --max-seeds can be very, very slow. Default: 5 (HFM) or 10 (HGFM)" /> <param argument="--secondary" type="boolean" truevalue="--secondary" falsevalue="" label="Report secondary alignments" /> <conditional name="alignment_options"> <param label="Alignment options" name="alignment_options_selector" type="select"> <option value="defaults">Use default values</option> <option value="advanced">Specify alignment parameters</option> </param> <when value="defaults" /> <when value="advanced"> <expand macro="function" helptext="Sets a function governing the maximum number of ambiguous characters" /> <param argument="--ignore-quals" name="ignore_quals" type="boolean" truevalue="--ignore-quals" falsevalue="" label="Ignore quality values" help="When calculating a mismatch penalty, always consider the quality value at the mismatched position to be the highest possible, regardless of the actual value. I.e. input is treated as though all quality values are high. This is also the default behavior when the input doesn't specify quality values" /> <param argument="--nofw" name="skip_forward" type="boolean" truevalue="--nofw" falsevalue="" label="Skip forward strand of reference" help="If --nofw is specified, hisat2 will not attempt to align unpaired reads to the forward (Watson) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --nofw causes hisat2 to explore only those paired-end configurations corresponding to fragments from the reverse-complement (Crick) strand" /> <param argument="--norc" name="skip_reverse" type="boolean" truevalue="--norc" falsevalue="" label="Skip reverse strand of reference" help="If --norc is specified, hisat2 will not attempt to align unpaired reads against the reverse-complement (Crick) reference strand. In paired-end mode, --nofw and --norc pertain to the fragments; i.e. specifying --norc causes hisat2 to explore only those paired-end configurations corresponding to fragments from the forward-complement (Watson) strand" /> </when> </conditional> <conditional name="input_options"> <param name="input_options_selector" type="select" label="Input options"> <option value="defaults">Use default values</option> <option value="advanced">Specify input parameters</option> </param> <when value="defaults" /> <when value="advanced"> <param argument="-s" name="skip" type="integer" min="0" value="0" label="Skip the first N reads or pairs in the input" /> <param argument="-u" name="stop_after" type="integer" min="0" value="0" label="Stop after aligning N reads" help="Align the first N reads or read pairs from the input (after the first N reads or pairs have been skipped), then stop" /> <param argument="-5" name="trim_five" type="integer" min="0" value="0" label="Trim 5' end" help="Trim N bases from 5' (left) end of each read before alignment" /> <param argument="-3" name="trim_three" type="integer" min="0" value="0" label="Trim 3' end" help="Trim N bases from 3' (right) end of each read before alignment" /> </when> </conditional> <conditional name="scoring_options"> <param name="scoring_options_selector" type="select" label="Scoring options"> <option value="defaults">Use default values</option> <option value="advanced">Specify scoring parameters</option> </param> <when value="defaults" /> <when value="advanced"> <expand macro="function" helptext="Sets a function governing the minimum alignment score needed for an alignment to be considered "valid" (i.e. good enough to report)" /> <param argument="--ma" name="match_bonus" type="integer" value="2" label="Set match bonus" help="In local mode N is added to the alignment score for each position where a read character aligns to a reference character and the characters match. Not used in end-to-end mode" /> <param argument="--mp" name="max_mismatch" type="integer" value="6" label="Maximum mismatch penalty" help="Sets the maximum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" /> <param argument="--mp" name="min_mismatch" type="integer" value="2" label="Minimum mismatch penalty" help="Sets the minimum mismatch penalty. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position where a read character aligns to a reference character, the characters do not match, and neither is an N. If --ignore-quals is specified, the number subtracted quals MX. Otherwise, the number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" /> <param argument="--no-softclip" name="no_softclip" type="boolean" truevalue="--no-softclip" falsevalue="" label="Disallow soft-clipping" /> <param argument="--np" name="ambiguous_penalty" type="integer" value="1" label="Ambiguous read penalty" help="Sets penalty for positions where the read, reference, or both, contain an ambiguous character such as N" /> <param argument="--sp" name="soft_clip_penalty_max" type="integer" value="2" label="Maximum soft-clipping penalty" help="Sets the maximum (MX) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" /> <param argument="--sp" name="soft_clip_penalty_min" type="integer" value="1" label="Minimum soft-clipping penalty" help="Sets the minimum (MN) penalty for soft-clipping per base. A number less than or equal to MX and greater than or equal to MN is subtracted from the alignment score for each position. The number subtracted is MN + floor( (MX-MN)(MIN(Q, 40.0)/40.0) ) where Q is the Phred quality value" /> <param argument="--rdg" name="read_open_penalty" type="integer" value="5" label="Read gap open penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" /> <param argument="--rdg" name="read_extend_penalty" type="integer" value="3" label="Read gap extend penalty" help="A read gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" /> <param argument="--rfg" name="ref_open_penalty" type="integer" value="5" label="Reference gap open penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" /> <param argument="--rfg" name="ref_extend_penalty" type="integer" value="3" label="Reference gap extend penalty" help="A reference gap of length N gets a penalty of [open_penalty] + N * [extend_penalty]" /> </when> </conditional> <conditional name="spliced_options"> <param name="spliced_options_selector" type="select" label="Spliced alignment parameters"> <option value="defaults">Use default values</option> <option value="advanced">Specify spliced alignment parameters</option> </param> <when value="defaults" /> <when value="advanced"> <param name="canonical_penalty" type="integer" value="0" label="Penalty for canonical splice sites" /> <param name="noncanonical_penalty" type="integer" value="3" label="Penalty for non-canonical splice sites" /> <param name="function_type" type="select" display="radio" label="Penalty for long introns with canonical splice sites"> <option value="C">Constant</option> <option value="L">Linear [f(x) = y + z * x]</option> <option value="S">Square root [f(x) = y + z * x²]</option> <option value="G">Natural logarithm [f(x) = y + z * log(x)]</option> </param> <param name="constant_term" type="integer" value="0" label="Constant term (y)" help="Constant term for long canonical introns" /> <param name="coefficient" type="integer" value="0" label="Coefficient (z)" help="Coefficient for long canonical introns" /> <param name="nc_function_type" type="select" display="radio" label="Penalty for long introns with noncanonical splice sites"> <option value="C">Constant</option> <option value="L">Linear [f(x) = y + z * x]</option> <option value="S">Square root [f(x) = y + z * x²]</option> <option value="G" selected="True">Natural logarithm [f(x) = y + z * log(x)]</option> </param> <param name="nc_constant_term" type="integer" value="-8" label="Constant term (y)" help="Constant term for long non-canonical introns" /> <param name="nc_coefficient" type="integer" value="1" label="Coefficient (z)" help="Coefficient for long non-canonical introns" /> <param name="min_intron" type="integer" value="20" label="Minimum intron length" /> <param name="max_intron" type="integer" value="500000" label="Maximum intron length" /> <param argument="--rna-strandness" name="rna_strandness" type="select" label="Specify strand-specific information" help="'F' means a read corresponds to a transcript. 'R' means a read corresponds to the reverse complemented counterpart of a transcript"> <option value="">FR Unstranded</option> <option value="R">First Strand (R/RF)</option> <option value="F">Second Strand (F/FR)</option> </param> <conditional name="no_spliced_alignment_options"> <param argument="--no-spliced-alignment" name="no_spliced_alignment" type="select" label="Disable spliced alignment"> <option value="--no-spliced-alignment">True</option> <option value="">False</option> </param> <when value="--no-spliced-alignment"> <param argument="-I" name="minins" type="integer" value="0" label="Minimum fragment length for valid paired-end alignments" help="E.g. if -I 60 is specified and a paired-end alignment consists of two 20-bp alignments in the appropriate orientation with a 20-bp gap between them, that alignment is considered valid (as long as -X is also satisfied). A 19-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -I constraint is applied with respect to the untrimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" /> <param argument="-X" name="maxins" type="integer" value="500" label="Maximum fragment length for valid paired-end alignments" help="E.g. if -X 100 is specified and a paired-end alignment consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, not the trimmed mates. The larger the difference between -I and -X, the slower HISAT2 will run. This is because larger differences between -I and -X require that HISAT2 scan a larger window to determine if a concordant alignment exists. For typical fragment length ranges (200 to 400 nucleotides), HISAT2 is very efficient" /> </when> <when value="" /> </conditional> <param name="known_splice_gtf" type="data" format="gtf" optional="true" label="GTF file with known splice sites" /> <param name="tma" type="select" display="radio" label="Transcriptome assembly reporting"> <option value="">Use default reporting.</option> <option value="--tmo">Report only those alignments within known transcripts.</option> <option value="--dta">Report alignments tailored for transcript assemblers including StringTie.</option> <option value="--dta-cufflinks">Report alignments tailored specifically for Cufflinks.</option> </param> </when> </conditional> <conditional name="paired_options"> <param name="paired_options_selector" type="select" label="Paired alignment parameters"> <option value="defaults">Use default values</option> <option value="advanced">Specify paired alignment parameters</option> </param> <when value="defaults" /> <when value="advanced"> <param argument="--minins" name="minins" type="integer" value="0" label="Minimum fragment length" help="The minimum fragment length for valid paired-end alignments. 0 sets no minimum" /> <param argument="--maxins" name="maxins" type="integer" value="500" label="Maximum fragment length" help="The maximum fragment length for valid paired-end alignments" /> <param argument="--no-mixed" name="no_mixed" type="boolean" truevalue="--no-mixed" falsevalue="" label="Disable finding alignments for individual mates" help="By default, when hisat2 cannot find a concordant or discordant alignment for a pair, it then tries to find alignments for the individual mates" /> <param argument="--no-discordant" name="no_discordant" type="boolean" truevalue="--no-discordant" falsevalue="" label="Disable looking for discordant alignments" help="By default, hisat2 looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints" /> <param argument="--dovetail" name="dovetail" type="boolean" truevalue="--dovetail" falsevalue="" label="Mates not dovetail" help="If the mates "dovetail", that is if one mate alignment extends past the beginning of the other such that the wrong mate begins upstream, consider that to be concordant" /> <param argument="--no-contain" name="contain" type="boolean" truevalue="--no-contain" falsevalue="" label="Mates cannot contain others" help="If one mate alignment contains the other, consider that to be non-concordant" /> <param argument="--no-overlap" name="overlap" type="boolean" truevalue="--no-overlap" falsevalue="" label="Mates cannot overlap" help="If one mate alignment overlaps the other at all, consider that to be non-concordant" /> </when> </conditional> </inputs> <outputs> <data name="output_alignments" format="bam"> <actions> <conditional name="reference_genome.reference_genome_source"> <when value="indexed"> <action type="metadata" name="dbkey"> <option type="from_data_table" name="hisat2_indexes" column="1" offset="0"> <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> <filter type="param_value" ref="reference_genome.index" column="0"/> </option> </action> </when> <when value="history"> <action type="metadata" name="dbkey"> <option type="from_param" name="reference_genome.history_item" param_attribute="dbkey" /> </action> </when> </conditional> </actions> </data> <data name="output_unaligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (L)" > <filter>input_format['paired']['unaligned_file'] is True</filter> <actions> <conditional name="input_format.paired.paired_selector"> <when value="single"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="ext" /> </action> </when> <when value="paired"> <action type="format"> <option type="from_param" name="input_format.paired.reads_f" param_attribute="ext" /> </action> </when> <when value="paired_collection"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="forward.ext" /> </action> </when> </conditional> </actions> </data> <data name="output_unaligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: unaligned reads (R)" > <filter>input_format['paired']['unaligned_file'] is True and input_format['paired']['paired_selector'] != 'single'</filter> <actions> <conditional name="input_format.paired.paired_selector"> <when value="paired"> <action type="format"> <option type="from_param" name="input_format.paired.reads_r" param_attribute="ext" /> </action> </when> <when value="paired_collection"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="forward.ext" /> </action> </when> </conditional> </actions> </data> <data name="output_aligned_reads_l" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads${' (L)' if str($input_format.paired.paired_selector) != 'single' else ''}" > <filter>input_format['paired']['aligned_file'] is True</filter> <actions> <conditional name="input_format.paired.paired_selector"> <when value="single"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="ext" /> </action> </when> <when value="paired"> <action type="format"> <option type="from_param" name="input_format.paired.reads_f" param_attribute="ext" /> </action> </when> <when value="paired_collection"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="forward.ext" /> </action> </when> </conditional> </actions> </data> <data name="output_aligned_reads_r" format="fastqsanger" label="${tool.name} on ${on_string}: aligned reads (R)" > <filter>input_format['paired']['aligned_file'] is True and input_format['paired']['paired_selector'] != 'single'</filter> <actions> <conditional name="input_format.paired.paired_selector"> <when value="paired"> <action type="format"> <option type="from_param" name="input_format.paired.reads_r" param_attribute="ext" /> </action> </when> <when value="paired_collection"> <action type="format"> <option type="from_param" name="input_format.paired.reads" param_attribute="forward.ext" /> </action> </when> </conditional> </actions> </data> </outputs> <tests> <test> <param name="input_format_selector" value="fastq" /> <param name="paired_selector" value="paired" /> <param name="reference_genome_source" value="history" /> <param ftype="fasta" name="history_item" value="phiX.fa" /> <param ftype="fastqsanger" name="reads_f" value="hisat_input_1_forward.fastq" /> <param ftype="fastqsanger" name="reads_r" value="hisat_input_1_reverse.fastq" /> <output file="hisat_output_1.bam" ftype="bam" name="output_alignments" lines_diff="2" /> </test> <test> <param name="input_format_selector" value="fastq" /> <param name="paired_selector" value="paired" /> <param name="reference_genome_source" value="history" /> <param ftype="fasta" name="history_item" value="phiX.fa" /> <param name="input_options_selector" value="advanced" /> <param name="trim_three" value="15" /> <param name="trim_five" value="15" /> <param ftype="fastqsanger" name="reads_f" value="hisat_input_2_forward.fastq" /> <param ftype="fastqsanger" name="reads_r" value="hisat_input_2_reverse.fastq" /> <output file="hisat_output_2.bam" ftype="bam" name="output_alignments" lines_diff="2" /> </test> <test> <param name="input_format_selector" value="fastq" /> <param name="paired_selector" value="paired" /> <param name="reference_genome_source" value="history" /> <param name="history_item" value="phiX.fa" ftype="fasta" /> <param name="input_options_selector" value="advanced" /> <param name="trim_three" value="15" /> <param name="trim_five" value="15" /> <param name="reads_f" ftype="fastqsanger" value="hisat_input_2_forward.fastq" /> <param name="reads_r" ftype="fastqsanger" value="hisat_input_2_reverse.fastq" /> <param name="paired_end_options_selector" value="advanced" /> <param name="no_mixed" value="True" /> <param name="no_discordant" value="True" /> <output name="output_alignments" ftype="bam" file="hisat_output_3.bam" lines_diff="2" /> </test> <test><!-- testing unaligned output (single dataset) --> <param name="input_format_selector" value="fasta" /> <param name="paired_selector" value="single" /> <param name="reference_genome_source" value="history" /> <param name="history_item" value="phiX.fa" ftype="fasta" /> <param name="unaligned_file" value="true" /> <param name="aligned_file" value="true" /> <param name="reference_genome_source" value="history" /> <param name="history_item" value="phiX.fa" ftype="fasta" /> <param name="reads" value="test_unaligned_reads.fasta" ftype="fasta" /> <output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" /> </test> <test> <param name="input_format_selector" value="fasta" /> <param name="paired_selector" value="paired" /> <param name="reference_genome_source" value="history" /> <param name="history_item" value="phiX.fa" ftype="fasta" /> <param name="unaligned_file" value="true" /> <param name="aligned_file" value="true" /> <param name="reference_genome_source" value="history" /> <param name="history_item" value="phiX.fa" ftype="fasta" /> <param name="reads_f" value="test_unaligned_reads.fasta" ftype="fasta" /> <param name="reads_r" value="test_unaligned_reads.fasta" ftype="fasta" /> <output name="output_unaligned_reads_l" file="test_unaligned_reads.fasta" /> <output name="output_unaligned_reads_r" file="test_unaligned_reads.fasta" /> </test> </tests> <help> <![CDATA[ Introduction ============ What is HISAT? -------------- `HISAT <http://ccb.jhu.edu/software/hisat>`__ is a fast and sensitive spliced alignment program. As part of HISAT, we have developed a new indexing scheme based on the Burrows-Wheeler transform (`BWT <http://en.wikipedia.org/wiki/Burrows-Wheeler_transform>`__) and the `FM index <http://en.wikipedia.org/wiki/FM-index>`__, called hierarchical indexing, that employs two types of indexes: (1) one global FM index representing the whole genome, and (2) many separate local FM indexes for small regions collectively covering the genome. Our hierarchical index for the human genome (about 3 billion bp) includes ~48,000 local FM indexes, each representing a genomic region of ~64,000bp. As the basis for non-gapped alignment, the FM index is extremely fast with a low memory footprint, as demonstrated by `Bowtie <http://bowtie-bio.sf.net>`__. In addition, HISAT provides several alignment strategies specifically designed for mapping different types of RNA-seq reads. All these together, HISAT enables extremely fast and sensitive alignment of reads, in particular those spanning two exons or more. As a result, HISAT is much faster >50 times than `TopHat2 <http://ccb.jhu.edu/software/tophat>`__ with better alignment quality. Although it uses a large number of indexes, the memory requirement of HISAT is still modest, approximately 4.3 GB for human. HISAT uses the `Bowtie2 <http://bowtie-bio.sf.net/bowtie2>`__ implementation to handle most of the operations on the FM index. In addition to spliced alignment, HISAT handles reads involving indels and supports a paired-end alignment mode. Multiple processors can be used simultaneously to achieve greater alignment speed. HISAT outputs alignments in `SAM <http://samtools.sourceforge.net/SAM1.pdf>`__ format, enabling interoperation with a large number of other tools (e.g. `SAMtools <http://samtools.sourceforge.net>`__, `GATK <http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit>`__) that use SAM. HISAT is distributed under the `GPLv3 license <http://www.gnu.org/licenses/gpl-3.0.html>`__, and it runs on the command line under Linux, Mac OS X and Windows. Running HISAT ============= Reporting --------- The reporting mode governs how many alignments HISAT looks for, and how to report them. In general, when we say that a read has an alignment, we mean that it has a `valid alignment <#valid-alignments-meet-or-exceed-the-minimum-score-threshold>`__. When we say that a read has multiple alignments, we mean that it has multiple alignments that are valid and distinct from one another. Distinct alignments map a read to different places ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Two alignments for the same individual read are "distinct" if they map the same read to different places. Specifically, we say that two alignments are distinct if there are no alignment positions where a particular read offset is aligned opposite a particular reference offset in both alignments with the same orientation. E.g. if the first alignment is in the forward orientation and aligns the read character at read offset 10 to the reference character at chromosome 3, offset 3,445,245, and the second alignment is also in the forward orientation and also aligns the read character at read offset 10 to the reference character at chromosome 3, offset 3,445,245, they are not distinct alignments. Two alignments for the same pair are distinct if either the mate 1s in the two paired-end alignments are distinct or the mate 2s in the two alignments are distinct or both. Default mode: search for one or more alignments, report each ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HISAT searches for up to N distinct, primary alignments for each read, where N equals the integer specified with the ``-k`` parameter. Primary alignments mean alignments whose alignment score is equal or higher than any other alignments. It is possible that multiple distinct alignments whave the same score. That is, if ``-k 2`` is specified, HISAT will search for at most 2 distinct alignments. The alignment score for a paired-end alignment equals the sum of the alignment scores of the individual mates. Each reported read or pair alignment beyond the first has the SAM 'secondary' bit (which equals 256) set in its FLAGS field. See the `SAM specification <http://samtools.sourceforge.net/SAM1.pdf>`__ for details. HISAT does not "find" alignments in any specific order, so for reads that have more than N distinct, valid alignments, HISAT does not gaurantee that the N alignments reported are the best possible in terms of alignment score. Still, this mode can be effective and fast in situations where the user cares more about whether a read aligns (or aligns a certain number of times) than where exactly it originated. Alignment summmary ------------------ When HISAT finishes running, it prints messages summarizing what happened. These messages are printed to the "standard error" ("stderr") filehandle. For datasets consisting of unpaired reads, the summary might look like this: :: 20000 reads; of these: 20000 (100.00%) were unpaired; of these: 1247 (6.24%) aligned 0 times 18739 (93.69%) aligned exactly 1 time 14 (0.07%) aligned >1 times 93.77% overall alignment rate For datasets consisting of pairs, the summary might look like this: :: 10000 reads; of these: 10000 (100.00%) were paired; of these: 650 (6.50%) aligned concordantly 0 times 8823 (88.23%) aligned concordantly exactly 1 time 527 (5.27%) aligned concordantly >1 times ---- 650 pairs aligned concordantly 0 times; of these: 34 (5.23%) aligned discordantly 1 time ---- 616 pairs aligned 0 times concordantly or discordantly; of these: 1232 mates make up the pairs; of these: 660 (53.57%) aligned 0 times 571 (46.35%) aligned exactly 1 time 1 (0.08%) aligned >1 times 96.70% overall alignment rate The indentation indicates how subtotals relate to totals. ]]> </help> <citations> <citation type="doi">10.1038/nmeth.3317</citation> </citations> </tool>