view segalign.xml @ 17:52a13f003b2d draft

planemo upload for repository https://github.com/richard-burhans/galaxytools/tree/main/tools/segalign commit b5d08d89e75e2dbfa03f74e45aa6baa465582eee
author richard-burhans
date Tue, 30 Jul 2024 17:25:08 +0000
parents 4966e095e3c3
children cefa7625d6cb
line wrap: on
line source

<tool id="segalign" name="SegAlign" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>A Scalable GPU System for Pairwise Whole Genome Alignments based on LASTZ's seed-filter-extend paradigm</description>
    <macros>
        <import>macros.xml</import>
        <import>sequence_options.xml</import>
        <import>scoring_options.xml</import>
        <import>seeding_options.xml</import>
        <import>ungapped_extension_options.xml</import>
        <import>gapped_extension_options.xml</import>
        <import>output_options.xml</import>
        <import>system_options.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements"/>
    <required_files>
        <include path="diagonal_partition.py"/>
        <include path="lastz-cmd.ini"/>
        <include path="package_output.py"/>
        <include path="runner.py"/>
    </required_files>
    <command detect_errors="exit_code"><![CDATA[
## Convert input sequences to 2bit -------------------------------------

mkdir -p "\$(pwd)/work" &&
faToTwoBit <(gzip -cdfq '$target') "\$(pwd)/work/ref.2bit" &&
faToTwoBit <(gzip -cdfq '$query') "\$(pwd)/work/query.2bit" &&

## Run SegAlign --------------------------------------------------------

## explicitly calling python to bypass a pulsar bug
## https://github.com/galaxyproject/pulsar/issues/341
python '$__tool_directory__/runner.py'
    --output-type tarball
    --output-file '$segalign_output'
    --diagonal-partition
    --num-cpu \${GALAXY_SLOTS:-2}
    --tool_directory '$__tool_directory__'
    '$target'
    '$query'

## Sequence Options ----------------------------------------------------

    --strand '$sequence_options.strand_selector'

## Scoring Options -----------------------------------------------------

#set $scoring_pathname = str($scoring_options.scoring)
#if $scoring_pathname != "None":
    --scoring '$scoring_pathname'
#end if
#if str($scoring_options.ambiguous_selector) != "x"
    #if str($scoring_options.set_ambiguous_params_selector) == "true"
        #set $argument_value = ','.join($scoring_options.ambiguous_selector, $scoring_options.ambiguous_reward, $scoring_options.ambiguous_penalty)
    --ambiguous '$argument_value'
    #else
    --ambiguous '$ambiguous_selector'
    #end if
#end if

## Seeding Options -----------------------------------------------------

#if str($seeding_options.seed.seed_selector) == "custom"
    --seed '$seeding_options.seed.custom_seed'
#else
    --seed '$seeding_options.seed.seed_selector'
#end if
    --step '$seeding_options.step'
#if str($seeding_options.notransition) == "true"
    --notransition
#end if

## Ungapped Extension Options ------------------------------------------

    --xdrop '$ungapped_extension_options.xdrop'
    --hspthresh '$ungapped_extension_options.hspthresh'
#if str($ungapped_extension_options.noentropy) == "true"
    --noentropy
#end if

## Gapped Extension Options --------------------------------------------

    --ydrop '$gapped_extension_options.ydrop'
#if str($gapped_extension_options.gappedthresh) != ""
    --gappedthresh '$gapped_extension_options.gappedthresh'
#end if
#if str($gapped_extension_options.notrivial) == "true"
    --notrivial
#end if

## Output Options -----------------------------------------------------

#if str($output_options.format.format_selector) == "bam"
    --format '$output_options.format.bam_options'
#else if str($output_options.format.format_selector) == "general_def"
    --format general-
#else if str($output_options.format.format_selector) == "general_full"
    --format 'general-:${output_options.format.fields}'

#else if str($output_options.format.format_selector) == "maf"
    --format '$output_options.format.maf_type'
#else if str($output_options.format.format_selector) == "blastn"
    --format=BLASTN-
#else if str($output_options.format.format_selector) == "differences"
    --format=differences
#end if
## todo :: rplot, bam
##  --action:target=multiple
##  $output_format.rplot
##  .if str( $output_format.out.format ) == "bam":
##      | samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '${output}'
##  .else:
##      > '${output}'
##  .end if
##  .if $output_format.rplot:
##      &&
##      Rscript $r_plot > /dev/null 2>&1
##  .end if

## System Options -----------------------------------------------------

    --wga_chunk_size '$system_options.wga_chunk_size'
    --lastz_interval_size '$system_options.lastz_interval_size'
    --seq_block_size '$system_options.seq_block_size'
    --num_gpu '$system_options.num_gpu'
#if str($system_options.debug) == "true"
    --debug
#end if

## Package Output ----------------------------------------------------

&&
python '$__tool_directory__/package_output.py'
    --tool_directory '$__tool_directory__'
    --format_selector '$output_options.format.format_selector'

    ]]></command>
    <inputs>
        <param name="target" type="data" format="fasta,fasta.gz" label="Target sequence file in FASTA format"/>
        <param name="query" type="data" format="fasta,fasta.gz" label="Query sequence file in FASTA format"/>
        <expand macro="sequence_options"/>
        <expand macro="scoring_options"/>
        <expand macro="seeding_options"/>
        <expand macro="ungapped_extension_options"/>
        <expand macro="gapped_extension_options"/>
        <expand macro="output_options"/>
        <expand macro="system_options"/>
    </inputs>
    <outputs>
        <data name="segalign_output" format="tgz" from_work_dir="data_package.tgz" label="SegAlign on ${on_string}"/>
    </outputs>
    <tests>
        <test expect_num_outputs="1" expect_test_failure="true">
            <param name="target" value="hg38.chr20.chunk.fa.gz" ftype="fasta.gz"/>
            <param name="query" value="mm39.chr2.chunk.fa.gz" ftype="fasta.gz"/>
            <output name="segalign_output" ftype="tgz">
                <assert_contents>
                    <has_archive_member path="galaxy/commands.json"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
    SegAlign is a scalable, GPU-accelerated system for computing pairwise WGA. SegAlign is based on the standard seed-filter-extend heuristic, in which the filtering stage dominates the runtime (e.g. 98% for human-mouse WGA), and is accelerated using GPU(s).

    https://github.com/gsneha26
    ]]></help>
    <expand macro="citations"/>
</tool>