diff mauve_contig_mover.xml @ 0:c14690ec0c9f draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/mauve_contig_mover commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
author brinkmanlab
date Fri, 24 Jan 2020 17:43:19 -0500
parents
children f8b09693c9c1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mauve_contig_mover.xml	Fri Jan 24 17:43:19 2020 -0500
@@ -0,0 +1,151 @@
+<tool id="mauve-contig-mover" name="Mauve Contig Mover" version="1.0" profile="16.04">
+    <description>Reorder a multi-contig dataset against a reference genome</description>
+    <edam_topics>
+        <edam_topic>topic_0196</edam_topic>
+        <edam_topic>topic_0091</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_0495</edam_operation>
+        <edam_operation>operation_0491</edam_operation>
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="2.4.0.snapshot_2015_02_13">mauve</requirement>
+    </requirements>
+    <version_command>mauveAligner --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        #if $ref_select.ref_source == "db"
+            #set global $index = $ref_select.ref_db
+        #else if $ref_select.ref_source == "meta"
+            #set global $index = $data.metadata.dbkey
+        #else if $ref_select.ref_source == "key"
+            #set global $index = $ref_select.key
+        #else
+            #set global $index = None
+            #set $ref = $ref_select.ref
+            #if $ref.is_of_type("genbank")
+                ln -sf $ref `basename $ref`.gbk &&
+                #set $ref = $os.path.basename(str($ref)) + ".gbk"
+            #end if
+        #end if
+        
+        #if $index
+            ## Get path to database file
+            #try
+                #set $ref = next(record for record in $__app__.tool_data_tables['all_fasta'].get_fields() if str( record[1] ) == str( $index ))[-1]
+            #except StopIteration
+                #raise ValueError('Reference not found: '+str($index))
+            #end try
+        #end if
+
+        #if $draft.is_of_type("genbank")
+            ln -sf $draft `basename ${draft}.gbk` &&
+            #set $draft_path = $os.path.basename(str($draft)) + ".gbk"
+        #else
+            #set $draft_path = str($draft)
+        #end if
+        
+        MauveCM -output output -ref $ref -draft $draft_path && $__tool_directory__/mcm_final.sh
+    ]]></command>
+    <inputs>
+        <param name="draft" type="data" format="genbank,fasta" label="Draft" />
+        <conditional name="ref_select">
+            <param name="ref_source" type="select" label="Select the source of the reference to align the draft to">
+                <option value="db">Data manager</option>
+                <option value="file">History</option>
+                <option value="meta" selected="true">Input Metadata</option>
+                <option value="key">Key</option>
+            </param>
+            <when value="file">
+                <param name="ref" type="data" format="genbank,fasta" label="Reference" />
+            </when>
+            <when value="db">
+                <param name="ref_db" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <filter type="sort_by" column="2" />
+                    </options>
+                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="meta">
+            </when>
+            <when value="key">
+                <!-- this is a workaround until https://github.com/galaxyproject/galaxy/issues/7496 is resolved -->
+                <param name="key" type="text" label="Provide a dbkey to use" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="reordered" format="fasta" from_work_dir="final.reordered" label="${tool.name} on ${on_string}: Reordered" />
+        <data name="backbone" format="tabular" from_work_dir="final.backbone" label="${tool.name} on ${on_string}: Backbone" />
+        <data name="contigs" format="tabular" from_work_dir="final.contigs.tab" label="${tool.name} on ${on_string}: Contig Order" />
+        <data name="features" format="tabular" from_work_dir="final.features.tab" label="${tool.name} on ${on_string}: Features">
+            <filter>hasattr(draft, 'is_of_type') and draft.is_of_type("genbank")</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_failure="true">
+            <!-- Mauve uses biojava which currently has a bug: https://github.com/biojava/biojava/issues/843 -->
+            <param name="draft" value="test-data/draft.gbff" ftype="genbank" />
+            <conditional name="ref_select">
+                <param name="ref_source" value="file" />
+                <param name="ref" value="test-data/ref.gbff" ftype="genbank" />
+            </conditional>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="draft" value="test-data/draft_bioperl.gbff" ftype="genbank" />
+            <conditional name="ref_select">
+                <param name="ref_source" value="file" />
+                <param name="ref" value="test-data/ref_bioperl.gbff" ftype="genbank" />
+            </conditional>
+            <output name="reordered" checksum="sha256:2a5e621227f4967337cefd68273f78d3390c94f5fd7ce4dcbf8024a342aeb976" ftype="fasta" />
+            <output name="backbone" checksum="sha256:f46966bcaed3324e43cad91161f380427a7d3cc52bbf823ae811d3f5dfaf2927" ftype="tabular" />
+            <output name="contigs" file="test-data/contig_order.tabular" ftype="tabular" />
+            <output name="features" checksum="sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ftype="tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+        The Mauve Contig Mover (MCM) can be used to order a draft genome relative to a related reference genome. 
+        The functionality of this software module has been described in Rissman et al. 2009, a publication in Bioinformatics. 
+        The Mauve Contig Mover can ease a comparative study between draft and reference sequences by ordering draft contigs according to the reference genome. 
+        In many cases, true rearrangements in the draft relative to the reference can be identified. The quality of the reorder is limited by the distance between 
+        the sequences, as indicated by the amount of shared gene content among the two organisms. A more distant reference will usually yield fewer ordered draft genome contigs, 
+        and may also induce erroneous placements of draft contigs. In addition to ordering contigs, MCM also orient them in the most likely orientation, and, if annotated sequence 
+        features are specified in an input file (e.g. with GenBank format input for the draft), MCM will output adjusted coordinates ranges for the features.
+    
+        Outputs:
+        
+        "Backbone" is the backbone output by mauveAligner representing the alignements.
+        
+        "Reordered" is a fasta file with the contigs reordered. Contigs aligned in reverse will be thier compliment sequence.
+        
+        "Contig Order" acts as an index to the fasta as the contig orders and orientations change (even if the draft was originally 
+        input as a genbank, after the first alignment, it will be converted to a fasta with annotation information preserved in a file described below). 
+        The file is divided into 3 sections, each containing a list of contigs. The data for each contig includes its label (name), 
+        its location in the genome (numbered in pseudocoordinates from the first to last contig, and whether it is oriented the same as originally input, 
+        or was complemented. 
+        
+        The three sections are described below:
+        
+        Contigs to reverse: 
+            This section contains contigs whose order is reversed with respect to the previous iteration. 
+            Note that contigs in this section may be oriented the same as originally input, this can be determined from the forward orcomplement designation.
+        
+        Ordered Contings:
+            This is a list of all the contigs in the order and orientation they appear in the fasta for the draft of this iteration of the reorder. 
+            Since these include all the contigs in the original input, those with no ordering information (no aligned region) will be clustered at the end. 
+            These will appear as contigs with no LCBs at the end of the draft genome.
+        
+        Contigs with Conflicting Order information:
+            This is a list of contigs containing LCBs suggesting multiple possible locations. 
+            These may be of interest to verify positioning, or to look at points of potential rearrangement or misassembly.
+        
+        If the draft was input as an annotated genbank file, a second file will appear called "Features". This file will contain a line for each annotation, 
+        information about its current orientation and location (which will change if the contig is inverted), coordinates from the previous iteration (indicating 
+        relative orientation), and whether it is reversed from the original input. It will also have a label field used to identify each feature. 
+        This will be gotten from the annotation, as checked in the following order: db_xref, label, gene, and locus_tag.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.5281/zenodo.3364789</citation>
+        <citation type="doi">10.1093/bioinformatics/btp356</citation>
+    </citations>
+</tool>