Mercurial > repos > iuc > amas_split

<tool id="amas_split" name="AMAS split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>split multiple alignments</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <xrefs>
        <xref type="bio.tools">amas</xref>
    </xrefs>

    <expand macro="requirements" />
    <expand macro="version_command" />

    <command detect_errors="exit_code"><![CDATA[
        #import re
        set -eu;

        ## Let galaxy sniff input format
        #set $in_format = $input_file.ext
        #if $in_format == 'nex'
            #set $in_format = 'nexus'
        #end if

        ## Check if inputs are interleaved
        IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py'
            '${input_file}'
            --format '${in_format}') &&

        ## Create symlinks with original filename for consistent tests because
        ##  input filenames are used as str vars
        #set $safename_input = re.sub('[^\w\-_\.]', '_', $input_file.element_identifier)
        ln -s '${input_file}' '${safename_input}';

        python -m amas.AMAS
        split
        --split-by $split_by
        $remove_empty
        --out-format $out_format
        --in-files $safename_input
        --in-format "\${IN_FORMAT}"
        --data-type $data_type
        --cores "\${GALAXY_SLOTS:-1}"
        $check_align
    ]]></command>

    <inputs>
        <param name="input_file" type="data" format="fasta,phylip,nex" label="Sequence to split" multiple="false" help="Provide pre-aligned FASTA/PHYLIP/NEXUS file (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
        <expand macro="output_format" label="Select output format for split alignments" />
        <!-- If amas updates split to handle NEXUS format include nex format here -->
        <param name="split_by" type="data" format="txt" label="Partitions file for splitting. Note: needs to be a partions file in the Unspecified format (See help section for more information)"
               help="A file defining how to split the concatenated alignment into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' for unspecified format). See the help section for more information about partitions." />
        <param argument="--remove-empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" />
        <expand macro="data_type" />
        <expand macro="check_align" />
    </inputs>

    <outputs>
       <expand macro="collection_outputs" name="split_alignments" />
    </outputs>

    <tests>
        <test expect_num_outputs="1">
            <param name="input_file" value="inputs/concat_result.phylip" />
            <param name="split_by" value="inputs/partitions_concat_unspecified.txt" />
            <param name="remove_empty" value="true" />
            <param name="out_format" value="fasta" />
            <param name="data_type" value="dna" />
            <param name="check_align" value="false" />
            <output_collection name="split_alignments_fasta" type="list">
                <element name="concat_result_p1_concat_1-out.fas" file="outputs/expected_split_partition1.fas" ftype="fasta" />
                <element name="concat_result_p2_concat_2-out.fas" file="outputs/expected_split_partition2.fas" ftype="fasta" />
            </output_collection>
        </test>
    </tests>

    <help><![CDATA[
        **What it does**

        AMAS Split divides a concatenated alignment back into separate gene/locus files using a partitions file. This is the reverse operation of AMAS Concat.

        **Inputs**

        - **Concatenated alignment**: A single alignment file containing multiple genes/loci joined end-to-end
        - **Partitions file**: Defines the boundaries of each gene/locus (.txt file containing Unspecified formatting)
        - **Input format**: Specify the format of your concatenated alignment
        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
        - **Output format**: Select the desired format for the split alignment files
        - **Remove empty sequences**: Optionally exclude taxa with only gaps/missing data in a partition

        **Outputs**

        A collection of alignment files, one per partition/gene defined in your partitions file.

        @PARTITIONS_HELP@

        **IMPORTANT**: A .txt file containing RAxML, or NEXUS formatting, or a .nex file containing NEXUS formatting that are produced using AMAS Concat will not work.

        **Tip:** An example for your data can be generated using the AMAS concat tool.

        **Use cases**

        - Extract individual gene alignments from a concatenated dataset
        - Analyze genes separately after joint phylogenetic analysis
        - Apply gene-specific filtering or trimming
        - Recover original locus alignments from published concatenated datasets

        @AMAS_SHARED_HELP@
    ]]></help>

    <expand macro="citations" />
</tool>