Mercurial > repos > iuc > shasta

<?xml version="1.0"?>
<tool id="shasta" name="Shasta" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>De novo assembly of long read sequencing data</description>
    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements"/>

    <expand macro="version_command"/>

    <command detect_errors="exit_code"><![CDATA[
### Initialize input reads (either unzip or symlink to have the right file extensions for shasta)

@INIT_INPUT_READS@

shasta --threads \${GALAXY_SLOTS:-4}

#if $use_config.use_config_select == "yes":
    --config '${__tool_directory__}/configs/${use_config.config_select}'
#elif $use_config.use_config_select == "file":
    --config '$in_config'
#end if

--assemblyDirectory shasta_out
--command assemble

######################
### Reads Options: ###
######################

#if str($reads.min_read_length):
    --Reads.minReadLength '${reads.min_read_length}'
#end if
#if str($reads.desired_coverage):
    --Reads.desiredCoverage '${reads.desired_coverage}'
#end if

#######################################
### Reads.palindromicReads Options: ###
#######################################

#if $reads.palindromic_reads.skip_flagging:
    --Reads.palindromicReads.skipFlagging '${reads.palindromic_reads.skip_flagging}'
#end if
#if str($reads.palindromic_reads.max_skip):
    --Reads.palindromicReads.maxSkip '${reads.palindromic_reads.max_skip}'
#end if
#if str($reads.palindromic_reads.max_drift):
    --Reads.palindromicReads.maxDrift '${reads.palindromic_reads.max_drift}'
#end if
#if str($reads.palindromic_reads.max_marker_frequency):
    --Reads.palindromicReads.maxMarkerFrequency '${reads.palindromic_reads.max_marker_frequency}'
#end if
#if str($reads.palindromic_reads.aligned_fraction_threshold):
    --Reads.palindromicReads.alignedFractionThreshold '${reads.palindromic_reads.aligned_fraction_threshold}'
#end if
#if str($reads.palindromic_reads.near_diagonal_fraction_threshold):
    --Reads.palindromicReads.nearDiagonalFractionThreshold '${reads.palindromic_reads.near_diagonal_fraction_threshold}'
#end if
#if str($reads.palindromic_reads.delta_threshold):
    --Reads.palindromicReads.deltaThreshold '${reads.palindromic_reads.delta_threshold}'
#end if

######################
### Kmers Options: ###
######################

#if $kmers.generation_method:
    --Kmers.generationMethod '${kmers.generation_method}'
#end if
#if str($kmers.k):
    --Kmers.k '${kmers.k}'
#end if
#if str($kmers.probability):
    --Kmers.probability '${kmers.probability}'
#end if
#if str($kmers.enrichment_threshold):
    --Kmers.enrichmentThreshold '${kmers.enrichment_threshold}'
#end if
#if $kmers.file:
    --Kmers.file '${kmers.file}'
#end if

########################
### MinHash Options: ###
########################

#if $minhash.version:
    --MinHash.version '${minhash.version}'
#end if
#if str($minhash.m):
    --MinHash.m '${minhash.m}'
#end if
#if str($minhash.hash_fraction):
    --MinHash.hashFraction '${minhash.hash_fraction}'
#end if
#if str($minhash.min_hash_iteration_count):
    --MinHash.minHashIterationCount '${minhash.min_hash_iteration_count}'
#end if
#if str($minhash.alignment_candidates_per_read):
    --MinHash.alignmentCandidatesPerRead '${minhash.alignment_candidates_per_read}'
#end if
#if str($minhash.min_bucket_size):
    --MinHash.minBucketSize '${minhash.min_bucket_size}'
#end if
#if str($minhash.max_bucket_size):
    --MinHash.maxBucketSize '${minhash.max_bucket_size}'
#end if
#if str($minhash.min_frequency):
    --MinHash.minFrequency '${minhash.min_frequency}'
#end if
#if $minhash.all_pairs:
    ${minhash.all_pairs}
#end if

######################
### Align Options: ###
######################

#if $align.align_method:
    --Align.alignMethod '${align.align_method}'
#end if
#if str($align.max_skip):
    --Align.maxSkip '${align.max_skip}'
#end if
#if str($align.max_drift):
    --Align.maxDrift '${align.max_drift}'
#end if
#if str($align.max_trim):
    --Align.maxTrim '${align.max_trim}'
#end if
#if str($align.max_marker_frequency):
    --Align.maxMarkerFrequency '${align.max_marker_frequency}'
#end if
#if str($align.min_aligned_marker_count):
    --Align.minAlignedMarkerCount '${align.min_aligned_marker_count}'
#end if
#if str($align.min_aligned_fraction):
    --Align.minAlignedFraction '${align.min_aligned_fraction}'
#end if
#if str($align.match_score):
    --Align.matchScore '${align.match_score}'
#end if
#if str($align.mismatch_score):
    --Align.mismatchScore '${align.mismatch_score}'
#end if
#if str($align.gap_score):
    --Align.gapScore '${align.gap_score}'
#end if
#if str($align.downsampling_factor):
    --Align.downsamplingFactor '${align.downsampling_factor}'
#end if
#if str($align.band_extend):
    --Align.bandExtend '${align.band_extend}'
#end if
#if str($align.max_band):
    --Align.maxBand '${align.max_band}'
#end if
#if $align.suppress_containments:
    ${align.suppress_containments}
#end if

###############################################
### Align.sameChannelReadAlignment Options: ###
###############################################

#if str($align.same_channel_read_alignment.suppress_delta_threshold):
    --Align.sameChannelReadAlignment.suppressDeltaThreshold '${align.same_channel_read_alignment.suppress_delta_threshold}'
#end if

##########################
### ReadGraph Options: ###
##########################

#if $read_graph.creation_method:
    --ReadGraph.creationMethod '${read_graph.creation_method}'
#end if
#if str($read_graph.max_alignment_count):
    --ReadGraph.maxAlignmentCount '${read_graph.max_alignment_count}'
#end if
#if str($read_graph.max_chimeric_read_distance):
    --ReadGraph.maxChimericReadDistance '${read_graph.max_chimeric_read_distance}'
#end if
#if str($read_graph.cross_strand_max_distance):
    --ReadGraph.crossStrandMaxDistance '${read_graph.cross_strand_max_distance}'
#end if
#if str($read_graph.contained_neighbor_count):
    --ReadGraph.containedNeighborCount '${read_graph.contained_neighbor_count}'
#end if
#if str($read_graph.uncontained_neighbor_count_per_direction):
    --ReadGraph.uncontainedNeighborCountPerDirection '${read_graph.uncontained_neighbor_count_per_direction}'
#end if
#if str($read_graph.marker_count_percentile):
    --ReadGraph.markerCountPercentile '${read_graph.marker_count_percentile}'
#end if
#if str($read_graph.aligned_fraction_percentile):
    --ReadGraph.alignedFractionPercentile '${read_graph.aligned_fraction_percentile}'
#end if
#if str($read_graph.max_skip_percentile):
    --ReadGraph.maxSkipPercentile '${read_graph.max_skip_percentile}'
#end if
#if str($read_graph.max_drift_percentile):
    --ReadGraph.maxDriftPercentile '${read_graph.max_drift_percentile}'
#end if
#if str($read_graph.max_trim_percentile):
    --ReadGraph.maxTrimPercentile '${read_graph.max_trim_percentile}'
#end if

############################
### MarkerGraph Options: ###
############################

#if str($marker_graph.min_coverage):
    --MarkerGraph.minCoverage '${marker_graph.min_coverage}'
#end if
#if str($marker_graph.max_coverage):
    --MarkerGraph.maxCoverage '${marker_graph.max_coverage}'
#end if
#if str($marker_graph.min_coverage_per_strand):
    --MarkerGraph.minCoveragePerStrand '${marker_graph.min_coverage_per_strand}'
#end if
#if str($marker_graph.low_coverage_threshold):
    --MarkerGraph.lowCoverageThreshold '${marker_graph.low_coverage_threshold}'
#end if
#if str($marker_graph.high_coverage_threshold):
    --MarkerGraph.highCoverageThreshold '${marker_graph.high_coverage_threshold}'
#end if
#if str($marker_graph.max_distance):
    --MarkerGraph.maxDistance '${marker_graph.max_distance}'
#end if
#if str($marker_graph.edge_marker_skip_threshold):
    --MarkerGraph.edgeMarkerSkipThreshold '${marker_graph.edge_marker_skip_threshold}'
#end if
#if str($marker_graph.prune_iteration_count):
    --MarkerGraph.pruneIterationCount '${marker_graph.prune_iteration_count}'
#end if
#if str($marker_graph.simplifiy_max_length):
    --MarkerGraph.simplifyMaxLength '${marker_graph.simplifiy_max_length}'
#end if
#if str($marker_graph.cross_edge_coverage_threshold):
    --MarkerGraph.crossEdgeCoverageThreshold '${marker_graph.cross_edge_coverage_threshold}'
#end if
#if str($marker_graph.refine_threshold):
    --MarkerGraph.refineThreshold '${marker_graph.refine_threshold}'
#end if
#if $marker_graph.reverse_transitive_reduction:
    ${marker_graph.reverse_transitive_reduction}
#end if

#######################################
### MarkerGraph.peakFinder Options: ###
#######################################

#if str($marker_graph.peak_finder.min_area_fraction):
    --MarkerGraph.peakFinder.minAreaFraction '${marker_graph.peak_finder.min_area_fraction}'
#end if
#if str($marker_graph.peak_finder.area_start_index):
    --MarkerGraph.peakFinder.areaStartIndex '${marker_graph.peak_finder.area_start_index}'
#end if

#########################
### Assembly Options: ###
#########################

#if str($assembly.marker_graph_edge_length_threshold_for_consensus):
    --Assembly.markerGraphEdgeLengthThresholdForConsensus '${assembly.marker_graph_edge_length_threshold_for_consensus}'
#end if
#if str($assembly.consensus_caller.consensus_caller_select) == "custom":
    --Assembly.consensusCaller 'Bayesian:${assembly.consensus_caller.consensus_caller_conf_file}'
#elif str($assembly.consensus_caller.consensus_caller_select) != "default":
    --Assembly.consensusCaller ${assembly.consensus_caller.consensus_caller_select}
#end if
## ## The below option only can be meaningfully set with memoryMode set to filesystem, which requires root access. Therefore, removing this param
## #if str($assembly.store_coverage_data_csv_length_threshold):
##     --Assembly.storeCoverageDataCsvLengthThreshold '${assembly.store_coverage_data_csv_length_threshold}'
## #end if
${assembly.write_reads_by_assembled_segment}

##################################
### Assembly.detangle Options: ###
##################################

#if $assembly.detangle.detangle_method:
    --Assembly.detangleMethod '${assembly.detangle.detangle_method}'
#end if
#if str($assembly.detangle.diagonal_read_count_min):
    --Assembly.detangle.diagonalReadCountMin '${assembly.detangle.diagonal_read_count_min}'
#end if
#if str($assembly.detangle.off_diagonal_read_count_max):
    --Assembly.detangle.offDiagonalReadCountMax '${assembly.detangle.off_diagonal_read_count_max}'
#end if
#if str($assembly.detangle.off_diagonal_ratio):
    --Assembly.detangle.offDiagonalRatio '${assembly.detangle.off_diagonal_ratio}'
#end if

###################################
### Assembly.iterative Options: ###
###################################

#if $assembly.iterative.iterative:
    $assembly.iterative.iterative
#end if
#if str($assembly.iterative.iteration_count):
    --Assembly.iterative.iterationCount '${assembly.iterative.iteration_count}'
#end if
#if str($assembly.iterative.pseudo_path_align_match_score):
    --Assembly.iterative.pseudoPathAlignMatchScore '${assembly.iterative.pseudo_path_align_match_score}'
#end if
#if str($assembly.iterative.pseudo_path_align_mismatch_score):
    --Assembly.iterative.pseudoPathAlignMismatchScore '${assembly.iterative.pseudo_path_align_mismatch_score}'
#end if
#if str($assembly.iterative.pseudo_path_align_gap_score):
    --Assembly.iterative.pseudoPathAlignGapScore '${assembly.iterative.pseudo_path_align_gap_score}'
#end if
#if str($assembly.iterative.mismatch_square_factor):
    --Assembly.iterative.mismatchSquareFactor '${assembly.iterative.mismatch_square_factor}'
#end if
#if str($assembly.iterative.min_score):
    --Assembly.iterative.minScore '${assembly.iterative.min_score}'
#end if
#if str($assembly.iterative.max_alignment_count):
    --Assembly.iterative.maxAlignmentCount '${assembly.iterative.max_alignment_count}'
#end if
#if str($assembly.iterative.bridge_removal_iteration_count):
    --Assembly.iterative.bridgeRemovalIterationCount '${assembly.iterative.bridge_removal_iteration_count}'
#end if
#if str($assembly.iterative.bridge_removal_max_distance):
    --Assembly.iterative.bridgeRemovalMaxDistance '${assembly.iterative.bridge_removal_max_distance}'
#end if

################################################################
### List input files with their corresponding file extension ###
################################################################

--input
#for $counter, $input in enumerate($in_data):
    #if $input.is_of_type("fasta","fasta.gz"):
        #set $ext = "fasta"
    #elif $input.is_of_type("fastq","fastqsanger","fastq.gz","fastqsanger.gz"):
        #set $ext = "fastq"
    #end if
    ./input_${counter}.${ext}
#end for
| tee '$out_log' 1>&2
    ]]></command>
    <inputs>
        <expand macro="input_reads"/>
        <conditional name="use_config">
            <param name="use_config_select" type="select" label="Config file options" help="If you use a config file and specify any conflicting values in the other options, the values specified in the other options will take precedence.">
                <option value="yes" selected="true">Select a packaged config file</option>
                <option value="file">Provide my own config file</option>
                <option value="no">Do not provide config file</option>
            </param>
            <when value="yes">
                <expand macro="config_selection"/>
            </when>
            <when value="file">
                <param name="in_config" type="data" format="txt" label="Config file"/>
            </when>
            <when value="no">
            </when>
        </conditional>
        <section name="outputs" expanded="true" title="Output options">
            <param name="report_graphical_fragment_assembly" type="boolean" label="Report graphical fragment assembly?" checked="true" help="The graphical fragment assembly file describes the assembly and how contigs in the assembly relate to one another in the assembly graph"/>
            <param name="report_config" type="boolean" label="Report configuration file" checked="true" help="The configuration file contains all of the parameters used by shasta based on the options selected."/>
            <param name="report_log" type="boolean" label="Report log file?" checked="true" help="Log file contains basic information about the run"/>
        </section>
        <section name="reads" expanded="true" title="Reads options">
            <param argument="--Reads.minReadLength" name="min_read_length" optional="true" type="integer" label="MinReadLength" help="Read length cutoff. Shorter reads are discarded" min="0"/>
            <param argument="--Reads.desiredCoverage" name="desired_coverage" optional="true" type="text" label="Desired coverage" help="Reduce coverage to desired value. If not zero, specifies desired coverage (number of bases). The read length cutoff specified via --Reads.minReadLength is increased to reduce coverage to the specified value. Power of 10 multipliers can be used, for example 120Gb to request 120 Gb of coverage">
                <validator type="regex" message="Desired coverage must be an integer, optionally followed by the a unit prefix (KMG)">^([0-9]*)?([KMG]bp?)?$</validator>
            </param>
            <!-- Decided not to expose the below parameter, can revisit this later if we change our minds on this-->
            <!-- <param name="no_cache" optional="true" type="boolean" /> -->

            <section name="palindromic_reads" expanded="true" title="Palindromic reads options">
                <param argument="--Reads.palindromicReads.skipFlagging" name="skip_flagging" optional="true" type="select" label="Skip flagging palindromic reads?" help="Oxford Nanopore reads should be flagged for better results.">
                    <option value="">Do not skip flagging palindromic reads unless otherwise specified in provided config file</option>
                    <option value="--Reads.palindromicReads.skipFlagging">Skip flagging of palindromic reads</option>
                </param>
                <param argument="--Reads.palindromicReads.maxSkip" name="max_skip" optional="true" type="integer" label="maxSkip for palindromic read detection" min="0"/>
                <param argument="--Reads.palindromicReads.maxDrift" name="max_drift" optional="true" type="integer" label="maxDrift for palindromic read detection" min="0"/>
                <param argument="--Reads.palindromicReads.maxMarkerFrequency" name="max_marker_frequency" optional="true" type="integer" label="maxMarkerFrequency for palindromic read detection" min="0"/>
                <param argument="--Reads.palindromicReads.alignedFractionThreshold" name="aligned_fraction_threshold" optional="true" type="float" label="alignedFractionThreshold for palindromic read detection"/>
                <param argument="--Reads.palindromicReads.nearDiagonalFractionThreshold" name="near_diagonal_fraction_threshold" optional="true" type="float" label="nearDiagonalFractionThreshold for palindromic read detection"/>
                <param argument="--Reads.palindromicReads.deltaThreshold" name="delta_threshold" optional="true" type="integer" label="deltaThreshold for palindromic read detection"/>
            </section>

        </section>
        <section name="kmers" expanded="true" title="Kmers options">
            <param argument="--Kmers.generationMethod" name="generation_method" optional="true" type="select" label="Method to generate marker k-mers">
                <option value="0">Random</option>
                <option value="1">Random, excluding globally overenriched</option>
                <option value="2">Random, excluding overenriched even in a single read</option>
                <option value="3">Read from file</option>
            </param>
            <param argument="--Kmers.k" name="k" optional="true" type="integer" label="Length of marker k-mers (In run length space)"/>
            <param argument="--Kmers.probability" name="probability" optional="true" type="float" label="Fraction k-mers used as a marker"/>
            <param argument="--Kmers.enrichmentThreshold" name="enrichment_threshold" optional="true" type="float" label="Enrichment threshold for defining cutoff in Kmers.generationMethod that select based on enrichment"/>
            <param argument="--Kmers.file" name="file" optional="true" type="data" format="txt" label="The file containing the k-mers to be used as markers, one kmer per line. Only used if the Kmers.generationMethod is 'Read from file'"/>
        </section>
        <section name="minhash" expanded="true" title="MinHash options">
            <param argument="--MinHash.version" name="version" optional="true" type="select" label="Version of the LowHash algorithm to use">
                <option value="0">Default</option>
                <option value="1">Experimental</option>
            </param>
            <param argument="--MinHash.m" name="m" optional="true" type="integer" label="The number of consecutive markers that define a MinHash/LowHash feature"/>
            <param argument="--MinHash.hashFraction" name="hash_fraction" optional="true" type="float" label="Defines how low a hash has to be to be used with the LowHash algorithm"/>
            <param argument="--MinHash.minHashIterationCount" name="min_hash_iteration_count" optional="true" type="integer" label="The number of MinHash/LowHash iterations" help="Set to 0 to let --MinHash.alignmentCandidatesPerRead control the number of iterations"/>
            <param argument="--MinHash.alignmentCandidatesPerRead" name="alignment_candidates_per_read" optional="true" type="float" label="If --MinHash.minHashIterationCount is 0, Min hash iteration is stopped when the average number of alignment candidates that each read is involved in reaches this value." help="If --MinHash.minHashIterationCount is not 0, this is not used"/>
            <param argument="--MinHash.minBucketSize" name="min_bucket_size" optional="true" type="integer" label="The minimum bucket size to be used by the LowHash algorithm"/>
            <param argument="--MinHash.maxBucketSize" name="max_bucket_size" optional="true" type="integer" label="The maximum bucket size to be used by the LowHash algorithm"/>
            <param argument="--MinHash.minFrequency" name="min_frequency" optional="true" type="integer" label="The minimum number of times a pair of reads must be found by the MinHash/LowHash algorithm in order to be considered a candidate alignment"/>
            <param argument="--MinHash.allPairs" name="all_pairs" optional="true" type="select" label="Skip the MinHash algorithm and mark all pairs of reads as alignment candidates with both orientation?" help="This should only be used for experimentation on very small runs because it is very time consuming">
                <option value="">Do not skip the MinHash algorithm unless otherwise specified in provided config file</option>
                <option value="--MinHash.allPairs">Skip the MinHash algorithm and mark all pairs of reads as alignment candidates with both orientation</option>
            </param>
        </section>
        <section name="align" expanded="true" title="Align options">
            <param argument="--Align.alignMethod" name="align_method" optional="true" type="select" label="Alignment method" help="The alignment method to be used to create the read graph and the marker graph.">
                <option value="0">Old Shasta method</option>
                <option value="1">SeqAn (slow)</option>
                <option value="3">Banded SeqAn</option>
            </param>
            <param argument="--Align.maxSkip" name="max_skip" optional="true" type="integer" label="The maximum number of markers that an alignment is allowed to skip"/>
            <param argument="--Align.maxDrift" name="max_drift" optional="true" type="integer" label="The maximum amount of marker drift that an alignment is allowed to tolerate"/>
            <param argument="--Align.maxTrim" name="max_trim" optional="true" type="integer" label="The maximum number of unaligned markers tolerated at the beginning and end of an alignment"/>
            <param argument="--Align.maxMarkerFrequency" name="max_marker_frequency" optional="true" type="integer" label="Marker frequency threshold" help="Markers more frequent than this value in either of two oriented reads being aligned are discarded and not used to compute the alignment."/>
            <param argument="--Align.minAlignedMarkerCount" name="min_aligned_marker_count" optional="true" type="integer" label="The minimum number of aligned markers for an alignment to be used"/>
            <param argument="--Align.minAlignedFraction" name="min_aligned_fraction" optional="true" type="float" label="The minimum fraction of aligned markers for an alignment to be used"/>
            <param argument="--Align.matchScore" name="match_score" optional="true" type="integer" label="Match score for marker alignments" help="Only used for 'SeqAn' and 'Banded SeqAn' alignment methods"/>
            <param argument="--Align.mismatchScore" name="mismatch_score" optional="true" type="integer" label="Mismatch score for marker alignments" help="Only used for 'SeqAn' and 'Banded SeqAn' alignment methods"/>
            <param argument="--Align.gapScore" name="gap_score" optional="true" type="integer" label="Gap score for marker alignments" help="Only used for 'SeqAn' and 'Banded SeqAn' alignment methods"/>
            <param argument="--Align.downsamplingFactor" name="downsampling_factor" optional="true" type="float" label="Downsampling factor" help="Only used for 'Banded SeqAn' alignment method"/>
            <param argument="--Align.bandExtend" name="band_extend" optional="true" type="integer" label="Amount to extend the downsampled band" help="Only used for 'Banded SeqAn' alignment method"/>
            <param argument="--Align.maxBand" name="max_band" optional="true" type="integer" label="Maximum alignment band"  help="Only used for 'Banded SeqAn' alignment method"/>

            <param argument="--Align.suppressContainments" name="suppress_containments" optional="true" type="select" label="Suppress containment alignments?" help="Containment alignments are alignments in which one read is entirely contained in another read, except possibly for up to maxTrim markers at the beggining and end">
                <option value="">Do not suppress containment alignments, unless otherwise specified in provided config file</option>
                <option value="--Align.suppressContainments">Suppress containment alignments</option>
            </param>

            <section name="same_channel_read_alignment" expanded="true" title="Same channel read alignment options">
                <param argument="--Align.sameChannelReadAlignment.suppressDeltaThreshold" name="suppress_delta_threshold" optional="true" type="integer" label="Supress delta threshold" help="If not zero, alignments between reads from the same nanopore channel and close in time are suppressed. The 'read' meta data fields from the FASTA or FASTQ header are checked. If their difference, in absolute value, is less than the value of this option, the alignment is suppressed. This can help avoid assembly artifact. This check is only done if the two reads have identical meta data fields 'runid', 'sampleid', and 'ch'. If any of these metadata fields are missing this check is suppressed and this option has no effect."/>
            </section>
        </section>
        <section name="read_graph" expanded="true" title="ReadGraph options">
            <param argument="--ReadGraph.creationMethod" name="creation_method" optional="true" type="select" label="The method used to create the read graph">
                <option value="0">Default</option>
                <option value="1">Experimental version 1</option>
                <option value="2">Experimental version 2</option>
            </param>
            <param argument="--ReadGraph.maxAlignmentCount" name="max_alignment_count" optional="true" type="integer" label="The maximum number of alignments to be kept for each read"/>
            <!--The below parameter is currently ignored per the command line help, don't bother exposing it-->
            <!-- <param argument="ReadGraph.minComponentSize" name="min_component_size" optional="true" type="integer" label="The minimum size (number of oriented reads) of a connected component of the read graph to be kept. This is currently ignored."/> -->
            <param argument="--ReadGraph.maxChimericReadDistance" name="max_chimeric_read_distance" optional="true" type="integer" label="Max chimeric read distance" help="Used for chimeric read detection"/>
            <param argument="--ReadGraph.crossStrandMaxDistance" name="cross_strand_max_distance" optional="true" type="integer" label="Maximum distance (edges) for flagCrossStrandReadGraphEdges" help="Set this to zero to entirely suppress flagCrossStrandReadGraphEdges"/>
            <param argument="--ReadGraph.containedNeighborCount" name="contained_neighbor_count" optional="true" type="integer" label="Maximum number of alignments to be kept for each contained read" help="Only used when creation method for the read graph is Experimental version 1"/>
            <param argument="--ReadGraph.uncontainedNeighborCountPerDirection" name="uncontained_neighbor_count_per_direction" optional="true" type="integer" label="Maximum number of alignments to be kept in each direction (forward, backward) for each uncontained read" help="Only used when creation method for the read graph is Experimental version 1"/>
            <!--The below parameter is currently experimental and advised not to be used, dont bother exposing it-->
            <!-- <param argument="ReadGraph.removeConflicts" name="remove_conflicts" optional="true" type="select"/> -->
            <param argument="--ReadGraph.markerCountPercentile" name="marker_count_percentile" optional="true" type="float" label="Percentile for marker Count" help="Only used when creation method for the read graph is Experimental version 2"/>
            <param argument="--ReadGraph.alignedFractionPercentile" name="aligned_fraction_percentile" optional="true"  type="float" label="Percentile for alignedFraction" help="Only used when creation method for the read graph is Experimental version 2"/>
            <param argument="--ReadGraph.maxSkipPercentile" name="max_skip_percentile" optional="true" type="float" label="Percentile for maxSkip" help="Only used when creation method for the read graph is Experimental version 2"/>
            <param argument="--ReadGraph.maxDriftPercentile" name="max_drift_percentile" optional="true" type="float" label="Percentile for maxDrift" help="Only used when creation method for the read graph is Experimental version 2"/>
            <param argument="--ReadGraph.maxTrimPercentile" name="max_trim_percentile" optional="true" type="float" label="Percentile for maxTrim" help="Only used when creation method for the read graph is Experimental version 2"/>
        </section>
        <section name="marker_graph" expanded="true" title="MarkerGraph options">
            <param argument="--MarkerGraph.minCoverage" name="min_coverage" optional="true" type="integer" label="Minimum coverage (number of supporting oriented reads) for a marker graph vertex to be created." help="Specifying 0 causes a suitable value of this parameter to be selected automatically"/>
            <param argument="--MarkerGraph.maxCoverage" name="max_coverage" optional="true" type="integer" label="Maximum coverage (number of supporting oriented reads) for a marker graph vertex"/>
            <param argument="--MarkerGraph.minCoveragePerStrand" name="min_coverage_per_strand" optional="true" type="integer" label="Minimum coverage (number of supporting oriented reads) for each strand for a marker graph vertex"/>
            <param argument="--MarkerGraph.lowCoverageThreshold" name="low_coverage_threshold" optional="true" type="integer" label="Low coverage threshold" help="Used during approximate transitive reduction. Marker graph edges with coverage lower than this value are always marked as removed regardless of reachability"/>
            <param argument="--MarkerGraph.highCoverageThreshold" name="high_coverage_threshold" optional="true" type="integer" label="High coverage threshold" help="Used during approximate transitive reduction. Marker graph edges with coverage higher than this value are never marked as removed regardless of reachability"/>
            <param argument="--MarkerGraph.maxDistance" name="max_distance" optional="true" type="integer" label="Max distance" help="Used during approximate transitive reduction"/>
            <param argument="--MarkerGraph.edgeMarkerSkipThreshold" name="edge_marker_skip_threshold" optional="true" type="integer" label="Edge marker skip threshold" help="Used during approximate transitive reduction"/>
            <param argument="--MarkerGraph.pruneIterationCount" name="prune_iteration_count" optional="true" type="integer" label="Number of prune iterations"/>
            <param argument="--MarkerGraph.simplifyMaxLength" name="simplifiy_max_length" optional="true" type="text" label="Simplify max length" help="Maximum lengths (in markers) used at each iteration of simplifyMarkerGraph. Ex.) 10,100,1000">
                <validator type="regex" message="Must be a comma separated list of integers">^[0-9]+(,[0-9]+)*$</validator>
            </param>
            <param argument="--MarkerGraph.crossEdgeCoverageThreshold" name="cross_edge_coverage_threshold" optional="true" type="float" label="Cross edge coverage threshold" help="Experimental. If this is not zero, assembly graph cross-edges with average edge coverage less than this value are removed, together with the corresponding marker graph edges. A cross edge is defined as an edge v0=&gt;v1 with out-degree(v0)&gt;1, in-degree(v1)&gt;1"/>
            <param argument="--MarkerGraph.refineThreshold" name="refine_threshold" optional="true" type="integer" label="Refine threshold" help="Length threshold, in markers, for the marker graph refinement step, or 0 to turn off refinement step"/>
            <param argument="--MarkerGraph.reverseTransitiveReduction" name="reverse_transitive_reduction" optional="true" type="select" label="Perform reverse transitive reduction?">
                <option value="">Don't perform appproximate reverse transitive reduction of the marker graph unless otherwise specified in provided config file</option>
                <option value="--MarkerGraph.reverseTransitiveReduction">Perform appproximate reverse transitive reduction of the marker graph</option>
            </param>

            <section name="peak_finder" expanded="true" title="Peak Finder options">
                <param argument="--MarkerGraph.peakFinder.minAreaFraction" name="min_area_fraction" optional="true" type="float" label="Peak finding min area fraction" help="Used in the automatic selection of --MarkerGraph.minCoverage when --MarkerGraph.minCoverage is set to 0"/>
                <param argument="--MarkerGraph.peakFinder.areaStartIndex" name="area_start_index" optional="true" type="integer" label="Peak finding area start index"  help="Used in the automatic selection of --MarkerGraph.minCoverage when --MarkerGraph.minCoverage is set to 0"/>
            </section>

        </section>
        <section name="assembly" expanded="true" title="Assembly options">
            <param argument="--Assembly.crossEdgeCoverageThreshold" name="cross_edge_coverage_threshold" optional="true" type="integer" label="Maximum average edge coverage for a cross edge of the assembly graph to be removed"/>
            <param argument="--Assembly.markerGraphEdgeLengthThresholdForConsensus" name="marker_graph_edge_length_threshold_for_consensus" optional="true" type="integer" label="Marker graph edge length threshold for consensus" help="Controls assembly of long marker graph edges"/>
            <conditional name="consensus_caller">
                <param argument="--Assembly.consensusCaller" name="consensus_caller_select" type="select" label="Consensus caller for repeat counts">
                    <option value="default" selected="true">Default or caller in provided config file</option>
                    <option value="Modal"/>
                    <option value="Median"/>
                    <option value="Bayesian:guppy-2.3.1-a"/>
                    <option value="Bayesian:guppy-2.3.5-a"/>
                    <option value="Bayesian:guppy-3.0.5-a"/>
                    <option value="Bayesian:guppy-3.4.4-a"/>
                    <option value="Bayesian:guppy-3.6.0-a"/>
                    <option value="Bayesian:r10-guppy-3.4.8-a"/>
                    <option value="custom">Custom config file</option>
                </param>
                <when value="default"/>
                <when value="Modal"/>
                <when value="Median"/>
                <when value="Bayesian:guppy-2.3.1-a"/>
                <when value="Bayesian:guppy-2.3.5-a"/>
                <when value="Bayesian:guppy-3.0.5-a"/>
                <when value="Bayesian:guppy-3.4.4-a"/>
                <when value="Bayesian:guppy-3.6.0-a"/>
                <when value="Bayesian:r10-guppy-3.4.8-a"/>
                <when value="custom">
                    <param name="consensus_caller_conf_file" type="data" format="csv" label="Custom config file for consensus caller"/>
                </when>
            </conditional>
            <!--TODO - param below outputs coverage data in a binary format, but that format is not specified anywhere in the Shasta documentation so I don't know what format I'd assign it in Galaxy. For now, don't expose this parameter and look into changing this later-->
            <!-- <param argument="Assembly.storeCoverageData" name="store_coverage_data" type="boolean" label="Store coverage data in binary format?" checked="false" truevalue="Assembly.storeCoverageData" falsevalue=""/> -->
            <!-- The below option only can be meaningfully set with memoryMode set to filesystem, which requires root access. Therefore, removing this param-->
            <!-- <param argument="Assembly.storeCoverageDataCsvLengthThreshold" name="store_coverage_data_csv_length_threshold" optional="true" type="integer" label="CSV coverage data length threshold" help="Used to specify the minimum length of an assembled segment for which coverage data in csv format shold be stored. If 0, no coverage data in csv format is stored"/> -->
            <param argument="--Assembly.writeReadsByAssembledSegment" name="write_reads_by_assembled_segment" optional="true" type="boolean" label="Write the reads that contributed to assembling each segment?" checked="false" truevalue="--Assembly.writeReadsByAssembledSegment" falsevalue=""/>

            <section name="detangle" expanded="true" title="Detangle options">
                <param argument="--Assembly.detangleMethod" name="detangle_method" optional="true" type="select" label="Method used to detangle the assembly graph" help="Controlled by Assembly.detangle.* options (experimental)">
                    <option value="0">No detangling</option>
                    <option value="1">Strict detangling</option>
                    <option value="2">Less strict detangling</option>
                </param>
                <param argument="--Assembly.detangle.diagonalReadCountMin" name="diagonal_read_count_min" optional="true" type="integer" label="Minimum number of reads on detangle matrix diagonal elements required for detangling"/>
                <param argument="--Assembly.detangle.offDiagonalReadCountMax" name="off_diagonal_read_count_max" optional="true" type="integer" label="Maximum number of reads on detangle matrix off-diagonal elements allowed for detangling"/>
                <param argument="--Assembly.detangle.offDiagonalRatio" name="off_diagonal_ratio" optional="true" type="float" label="Maximum ratio of total off-diagonal elements over diagonal element allowed for detangling"/>
            </section>

            <section name="iterative" expanded="false" title="Iterative assembly options (experimental)">
                <param argument="--Assembly.iterative" name="iterative" optional="true" type="select" label="Request iterative assembly? (experimental)">
                    <option value="">Do not perform iterative assembly unless otherwise specified in provided config file</option>
                    <option value="--Assembly.iterative">Perform iterative assembly</option>
                </param>
                <param argument="--Assembly.iterative.iterationCount" name="iteration_count" optional="true" type="integer" label="Number of iterations for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.pseudoPathAlignMatchScore" name="pseudo_path_align_match_score" optional="true" type="integer" label="Psuedopath alignment match score for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.pseudoPathAlignMismatchScore" name="pseudo_path_align_mismatch_score" optional="true" type="integer" label="Psuedopath alignment mismatch score for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.pseudoPathAlignGapScore" name="pseudo_path_align_gap_score" optional="true" type="integer" label="Psuedopath alignment gap score for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.mismatchSquareFactor" name="mismatch_square_factor" optional="true" type="float" label="Mismatch square factor for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.minScore" name="min_score" optional="true" type="float" label="Minimum psuedo-alignment score for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.maxAlignmentCount" name="max_alignment_count" optional="true" type="integer" label="Maximum number of read graph neighbors for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.bridgeRemovalIterationCount" name="bridge_removal_iteration_count" optional="true" type="integer" label="Number of read graph bridge removal iterations for iterative assembly (experimental)"/>
                <param argument="--Assembly.iterative.bridgeRemovalMaxDistance" name="bridge_removal_max_distance" optional="true" type="integer" label="Maximum distance for reag graph bridge removal for iterative assembly (experimental)"/>
            </section>

        </section>
    </inputs>
    <outputs>
        <data name="out_gfa" format="gfa1" label="${tool.name} on ${on_string} (Graphical Fragment Assembly)" from_work_dir="shasta_out/Assembly.gfa">
            <filter>outputs["report_graphical_fragment_assembly"]</filter>
        </data>
        <data name="out_conf" format="txt" label="${tool.name} on ${on_string} (Config file)" from_work_dir="shasta_out/shasta.conf">
            <filter>outputs["report_config"]</filter>
        </data>
        <data name="out_log" format="txt" label="${tool.name} on ${on_string} (Log file)">
            <filter>outputs["report_log"]</filter>
        </data>
        <!-- The below output collection only can be meaningfully produced with memoryMode set to filesystem, which requires root access. Therefore, removing this output-->
        <!-- <collection name="out_coverage_csvs" type="list" label="${tool.name} on ${on_string} (Coverage CSVs)">
            <discover_datasets directory="shasta_out/Coverage" pattern="__name_and_ext__" format="csv" visible="false"/>
            <filter>not ( assembly["store_coverage_data_csv_length_threshold"] == 0 or assembly["store_coverage_data_csv_length_threshold"] == "" )</filter>
        </collection> -->
        <data name="out_reads_csv" format="csv" label="${tool.name} on ${on_string} (Reads by assembled segment)" from_work_dir="shasta_out/ReadsBySegment.csv">
            <filter>assembly["write_reads_by_assembled_segment"]</filter>
        </data>
        <data name="out_fasta" format="fasta" label="${tool.name} on ${on_string} (FASTA)" from_work_dir="shasta_out/Assembly.fasta"/>
    </outputs>
    <tests>
        <!--
            Test full run, all outputs
        -->
        <test expect_num_outputs="5">
            <param name="in_data" ftype="fasta.gz" value="nanopore.fasta.gz"/>
            <conditional name="use_config">
                <param name="use_config_select" value="yes"/>
                <param name="config_select" value="Nanopore-Dec2019.conf"/>
            </conditional>
            <section name="assembly">
                <!-- The below option only can be meaningfully set with memoryMode set to filesystem, which requires root access. Therefore, removing this param-->
                <!-- <param name="store_coverage_data_csv_length_threshold" value="1000"/> -->
                <param name="write_reads_by_assembled_segment" value="true"/>
            </section>
            <output name="out_fasta" file="out_fasta.fasta">
                <assert_contents>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
            <output name="out_gfa" file="out_gfa.gfa1">
                <assert_contents>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
            <output name="out_log"> <!--Log file has timestamps and mac specific outputs. Can't compare vs known file.-->
                <assert_contents>
                    <has_line line="Input files: ./input_0.fasta "/>
                    <has_line line="assemblyDirectory = shasta_out"/>
                </assert_contents>
            </output>
            <output name="out_conf" file="out_config.txt">
                <assert_contents>
                    <has_n_lines n="100"/>
                </assert_contents>
            </output>
            <output name="out_reads_csv" file="out_reads.csv">
                <assert_contents>
                    <has_n_lines n="59"/>
                </assert_contents>
            </output>
            <!-- The below output collection only can be meaningfully produced with memoryMode set to filesystem, which requires root access. Therefore, removing this output-->
            <!-- <output_collection name="out_coverage_csvs" type="list">
                <element name="0" ftype="csv">
                    <assert_contents>
                        <has_n_lines n="11714"/>
                    </assert_contents>
                </element>
            </output_collection> -->
        </test>
        <!--
            Test ability to disable outputs
        -->
        <test expect_num_outputs="1">
            <param name="in_data" ftype="fasta.gz" value="nanopore.fasta.gz"/>
            <conditional name="use_config">
                <param name="use_config_select" value="yes"/>
                <param name="config_select" value="Nanopore-Dec2019.conf"/>
            </conditional>
            <section name="outputs">
                <param name="report_graphical_fragment_assembly" value="false"/>
                <param name="report_config" value="false"/>
                <param name="report_log" value="false"/>
            </section>
            <output name="out_fasta" file="out_fasta.fasta">
                <assert_contents>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
        </test>
        <!--
            Test ability to use passed config file
        -->
        <test expect_num_outputs="1">
            <param name="in_data" ftype="fasta.gz" value="nanopore.fasta.gz"/>
            <conditional name="use_config">
                <param name="use_config_select" value="file"/>
                <param name="in_config" ftype="txt" value="out_config2.txt"/>
            </conditional>
            <section name="outputs">
                <param name="report_graphical_fragment_assembly" value="false"/>
                <param name="report_config" value="false"/>
                <param name="report_log" value="false"/>
            </section>
            <output name="out_fasta" file="out_fasta.fasta">
                <assert_contents>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
        </test>
        <!--
            Test ability to not provide config and pass equivalent options through the parameters.
        -->
        <test expect_num_outputs="2">
            <param name="in_data" ftype="fasta.gz" value="nanopore.fasta.gz"/>
            <conditional name="use_config">
                <param name="use_config_select" value="no"/>
            </conditional>
            <section name="outputs">
                <param name="report_graphical_fragment_assembly" value="false"/>
                <param name="report_config" value="true"/>
                <param name="report_log" value="false"/>
            </section>
            <section name="reads">
                <param name="min_read_length" value="10000"/>
                <param name="desired_coverage" value="0"/>
                <section name="palindromic_reads">
                    <param name="skip_flagging" value=""/>
                    <param name="max_skip" value="100"/>
                    <param name="max_drift" value="100"/>
                    <param name="max_marker_frequency" value="10"/>
                    <param name="aligned_fraction_threshold" value="0.1"/>
                    <param name="near_diagonal_fraction_threshold" value="0.1"/>
                    <param name="delta_threshold" value="100"/>
                </section>
            </section>
            <section name="kmers">
                <param name="generation_method" value="0"/>
                <param name="k" value="10"/>
                <param name="probability" value="0.1"/>
                <param name="enrichmentThreshold" value="100"/>
                <!-- <param name="file" value=""/> -->
            </section>
            <section name="minhash">
                <param name="version" value="0"/>
                <param name="m" value="4"/>
                <param name="hash_fraction" value="0.01"/>
                <param name="min_hash_iteration_count" value="10"/>
                <param name="alignment_candidates_per_read" value="20"/>
                <param name="min_bucket_size" value="5"/>
                <param name="max_bucket_size" value="30"/>
                <param name="min_frequency" value="5"/>
                <param name="all_pairs" value=""/>
            </section>
            <section name="align">
                <param name="align_method" value="3"/>
                <param name="max_skip" value="30"/>
                <param name="max_drift" value="30"/>
                <param name="max_trim" value="30"/>
                <param name="max_marker_frequency" value="10"/>
                <param name="min_aligned_marker_count" value="100"/>
                <param name="min_aligned_fraction" value="0.4"/>
                <param name="match_score" value="6"/>
                <param name="mismatch_score" value="-1"/>
                <param name="gap_score" value="-1"/>
                <param name="downsampling_factor" value="0.1"/>
                <param name="band_extend" value="10"/>
                <param name="max_band" value="1000"/>
                <param name="suppress_containments" value=""/>
                <section name="same_channel_read_alignment">
                    <param name="suppress_delta_threshold" value="0"/>
                </section>
            </section>
            <section name="read_graph">
                <param name="creation_method" value="0"/>
                <param name="max_alignment_count" value="6"/>
                <param name="max_chimeric_read_distance" value="2"/>
                <param name="cross_strand_max_distance" value="6"/>
                <param name="contained_neighbor_count" value="6"/>
                <param name="uncontained_neighbor_count_per_direction" value="3"/>
                <param name="marker_count_percentile" value="0.015"/>
                <param name="aligned_fraction_percentile" value="0.12"/>
                <param name="max_skip_percentile" value="0.12"/>
                <param name="max_drift_percentile" value="0.12"/>
                <param name="max_trim_percentile" value="0.015"/>
            </section>
            <section name="marker_graph">
                <param name="min_coverage" value="10"/>
                <param name="max_coverage" value="100"/>
                <param name="min_coverage_per_strand" value="0"/>
                <param name="low_coverage_threshold" value="0"/>
                <param name="high_coverage_threshold" value="256"/>
                <param name="max_distance" value="30"/>
                <param name="edge_marker_skip_threshold" value="100"/>
                <param name="prune_iteration_count" value="6"/>
                <param name="simplifiy_max_length" value="10,100,1000"/>
                <param name="cross_edge_coverage_threshold" value="0"/>
                <param name="refine_threshold" value="0"/>
                <param name="reverse_transitive_reduction" value=""/>
                <section name="peak_finder">
                    <param name="min_area_fraction" value="0.08"/>
                    <param name="area_start_index" value="2"/>
                </section>
            </section>
            <section name="assembly">
                <param name="cross_edge_coverage_threshold" value="3"/>
                <param name="marker_graph_edge_length_threshold_for_consensus" value="1000"/>
                <conditional name="consensus_caller">
                    <param name="consensus_caller_select" value="Bayesian:guppy-3.0.5-a"/>
                </conditional>
                <!-- The below option only can be meaningfully set with memoryMode set to filesystem, which requires root access. Therefore, removing this param-->
                <!-- <param name="store_coverage_data_csv_length_threshold" value="0"/> -->
                <param name="write_reads_by_assembled_segment" value="false"/>
                <section name="detangle">
                    <param name="detangle_method" value="0"/>
                    <param name="diagonal_read_count_min" value="1"/>
                    <param name="off_diagonal_read_count_max" value="2"/>
                    <param name="off_diagonal_ratio" value="0.3"/>
                </section>
                <section name="iterative">
                    <param name="iterative" value=""/>
                    <param name="iteration_count" value="3"/>
                    <param name="pseudo_path_align_match_score" value="1"/>
                    <param name="pseudo_path_align_mismatch_score" value="-1"/>
                    <param name="pseudo_path_align_gap_score" value="-1"/>
                    <param name="mismatch_square_factor" value="3"/>
                    <param name="min_score" value="0"/>
                    <param name="max_alignment_count" value="6"/>
                    <param name="bridge_removal_iteration_count" value="3"/>
                    <param name="bridge_removal_max_distance" value="2"/>
                </section>
            </section>
            <output name="out_fasta" file="out_fasta.fasta">
                <assert_contents>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
            <output name="out_conf" file="out_config2.txt">
                <assert_contents>
                    <has_n_lines n="100"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@WID@

**Input**

- fasta / fastq data

- config file (optional)


Example config files can be found `here <https://github.com/chanzuckerberg/shasta/tree/master/conf>`_ (these files correspond to the packaged config files available for this tool).

**Output**

- Assembled genome (FASTA)
- Graphical fragment assembly (optional) (.gfa1)
- Log file (optional) (.txt)
- Config file (optional) (.txt)
- Read CSV detailing which reads align to which assembly fragments (optional) (.csv)
- Coverage CSV List (optional) (list:.csv)

**References**

@REFERENCES@
    ]]></help>
    <expand macro="citations"/>
</tool>