view mauve_contig_mover.xml @ 6:4931255f2fcc draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/mauve_contig_mover commit 25b7aeca541e258a2bff5d0e34e9298996b4a47b"
author brinkmanlab
date Fri, 20 Nov 2020 18:58:52 +0000
parents f8b09693c9c1
children 4c4838703fa8
line wrap: on
line source

<tool id="mauve-contig-mover" name="Mauve Contig Mover" version="1.0" profile="16.04">
    <description>Reorder a multi-contig dataset against a reference genome</description>
    <edam_topics>
        <edam_topic>topic_0196</edam_topic>
        <edam_topic>topic_0091</edam_topic>
    </edam_topics>
    <edam_operations>
        <edam_operation>operation_0495</edam_operation>
        <edam_operation>operation_0491</edam_operation>
    </edam_operations>
    <requirements>
        <requirement type="package" version="2.4.0.snapshot_2015_02_13">mauve</requirement>
    </requirements>
    <version_command>mauveAligner --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
        #if $ref_select.ref_source == "db"
            #set global $index = $ref_select.ref_db
        #else if $ref_select.ref_source == "meta"
            #set global $index = $data.metadata.dbkey
        #else if $ref_select.ref_source == "key"
            #set global $index = $ref_select.key
        #else
            #set global $index = None
            #set $ref = $ref_select.ref
            #if $ref.is_of_type("genbank")
                ln -sf $ref `basename $ref`.gbk &&
                #set $ref = $os.path.basename(str($ref)) + ".gbk"
            #end if
        #end if
        
        #if $index
            ## Get path to database file
            #try
                #set $ref = next(record for record in $__app__.tool_data_tables['all_fasta'].get_fields() if str( record[1] ) == str( $index ))[-1]
            #except StopIteration
                #raise ValueError('Reference not found: '+str($index))
            #end try
        #end if

        #if $draft.is_of_type("genbank")
            ln -sf $draft `basename ${draft}.gbk` &&
            #set $draft_path = $os.path.basename(str($draft)) + ".gbk"
        #else
            #set $draft_path = str($draft)
        #end if
        
        test -r $ref && MauveCM -output output -ref $ref -draft $draft_path && $__tool_directory__/mcm_final.sh
    ]]></command>
    <inputs>
        <param name="draft" type="data" format="genbank,fasta" label="Draft" />
        <conditional name="ref_select">
            <param name="ref_source" type="select" label="Select the source of the reference to align the draft to">
                <option value="db">Data manager</option>
                <option value="file">History</option>
                <option value="meta" selected="true">Input Metadata</option>
                <option value="key">Key</option>
            </param>
            <when value="file">
                <param name="ref" type="data" format="genbank,fasta" label="Reference" />
            </when>
            <when value="db">
                <param name="ref_db" type="select" label="Using reference genome" help="Select genome from the list">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                        <column name="name" index="2" />
                        <column name="value" index="1" />
                    </options>
                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
                </param>
            </when>
            <when value="meta">
            </when>
            <when value="key">
                <!-- this is a workaround until https://github.com/galaxyproject/galaxy/issues/7496 is resolved -->
                <param name="key" type="text" label="Provide a dbkey to use" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="reordered" format="fasta" from_work_dir="final.reordered" label="${tool.name} on ${on_string}: Reordered" />
        <data name="backbone" format="tabular" from_work_dir="final.backbone" label="${tool.name} on ${on_string}: Backbone" />
        <data name="contigs" format="tabular" from_work_dir="final.contigs.tab" label="${tool.name} on ${on_string}: Contig Order" />
        <data name="features" format="tabular" from_work_dir="final.features.tab" label="${tool.name} on ${on_string}: Features">
            <filter>hasattr(draft, 'is_of_type') and draft.is_of_type("genbank")</filter>
        </data>
    </outputs>
    <tests>
        <test expect_failure="true">
            <!-- Mauve uses biojava which currently has a bug: https://github.com/biojava/biojava/issues/843 -->
            <param name="draft" value="test-data/draft.gbff" ftype="genbank" />
            <conditional name="ref_select">
                <param name="ref_source" value="file" />
                <param name="ref" value="test-data/ref.gbff" ftype="genbank" />
            </conditional>
        </test>
        <test expect_num_outputs="4">
            <param name="draft" value="test-data/draft_bioperl.gbff" ftype="genbank" />
            <conditional name="ref_select">
                <param name="ref_source" value="file" />
                <param name="ref" value="test-data/ref_bioperl.gbff" ftype="genbank" />
            </conditional>
            <output name="reordered" checksum="sha256:2a5e621227f4967337cefd68273f78d3390c94f5fd7ce4dcbf8024a342aeb976" ftype="fasta" />
            <output name="backbone" checksum="sha256:f46966bcaed3324e43cad91161f380427a7d3cc52bbf823ae811d3f5dfaf2927" ftype="tabular" />
            <output name="contigs" file="test-data/contig_order.tabular" ftype="tabular" />
            <output name="features" checksum="sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ftype="tabular" />
        </test>
    </tests>
    <help><![CDATA[
        The Mauve Contig Mover (MCM) can be used to order a draft genome relative to a related reference genome. 
        The functionality of this software module has been described in Rissman et al. 2009, a publication in Bioinformatics. 
        The Mauve Contig Mover can ease a comparative study between draft and reference sequences by ordering draft contigs according to the reference genome. 
        In many cases, true rearrangements in the draft relative to the reference can be identified. The quality of the reorder is limited by the distance between 
        the sequences, as indicated by the amount of shared gene content among the two organisms. A more distant reference will usually yield fewer ordered draft genome contigs, 
        and may also induce erroneous placements of draft contigs. In addition to ordering contigs, MCM also orient them in the most likely orientation, and, if annotated sequence 
        features are specified in an input file (e.g. with GenBank format input for the draft), MCM will output adjusted coordinates ranges for the features.
    
        Outputs:
        
        "Backbone" is the backbone output by mauveAligner representing the alignements.
        
        "Reordered" is a fasta file with the contigs reordered. Contigs aligned in reverse will be thier compliment sequence.
        
        "Contig Order" acts as an index to the fasta as the contig orders and orientations change (even if the draft was originally 
        input as a genbank, after the first alignment, it will be converted to a fasta with annotation information preserved in a file described below). 
        The file is divided into 3 sections, each containing a list of contigs. The data for each contig includes its label (name), 
        its location in the genome (numbered in pseudocoordinates from the first to last contig, and whether it is oriented the same as originally input, 
        or was complemented. 
        
        The three sections are described below:
        
        Contigs to reverse: 
            This section contains contigs whose order is reversed with respect to the previous iteration. 
            Note that contigs in this section may be oriented the same as originally input, this can be determined from the forward orcomplement designation.
        
        Ordered Contings:
            This is a list of all the contigs in the order and orientation they appear in the fasta for the draft of this iteration of the reorder. 
            Since these include all the contigs in the original input, those with no ordering information (no aligned region) will be clustered at the end. 
            These will appear as contigs with no LCBs at the end of the draft genome.
        
        Contigs with Conflicting Order information:
            This is a list of contigs containing LCBs suggesting multiple possible locations. 
            These may be of interest to verify positioning, or to look at points of potential rearrangement or misassembly.
        
        If the draft was input as an annotated genbank file, a second file will appear called "Features". This file will contain a line for each annotation, 
        information about its current orientation and location (which will change if the contig is inverted), coordinates from the previous iteration (indicating 
        relative orientation), and whether it is reversed from the original input. It will also have a label field used to identify each feature. 
        This will be gotten from the annotation, as checked in the following order: db_xref, label, gene, and locus_tag.
    ]]></help>
    <citations>
        <citation type="doi">10.5281/zenodo.3364789</citation>
        <citation type="doi">10.1093/bioinformatics/btp356</citation>
    </citations>
</tool>