comparison mauve_contig_mover.xml @ 0:c14690ec0c9f draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/mauve_contig_mover commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
author brinkmanlab
date Fri, 24 Jan 2020 17:43:19 -0500
parents
children f8b09693c9c1
comparison
equal deleted inserted replaced
-1:000000000000 0:c14690ec0c9f
1 <tool id="mauve-contig-mover" name="Mauve Contig Mover" version="1.0" profile="16.04">
2 <description>Reorder a multi-contig dataset against a reference genome</description>
3 <edam_topics>
4 <edam_topic>topic_0196</edam_topic>
5 <edam_topic>topic_0091</edam_topic>
6 </edam_topics>
7 <edam_operations>
8 <edam_operation>operation_0495</edam_operation>
9 <edam_operation>operation_0491</edam_operation>
10 </edam_operations>
11 <requirements>
12 <requirement type="package" version="2.4.0.snapshot_2015_02_13">mauve</requirement>
13 </requirements>
14 <version_command>mauveAligner --version</version_command>
15 <command detect_errors="exit_code"><![CDATA[
16 #if $ref_select.ref_source == "db"
17 #set global $index = $ref_select.ref_db
18 #else if $ref_select.ref_source == "meta"
19 #set global $index = $data.metadata.dbkey
20 #else if $ref_select.ref_source == "key"
21 #set global $index = $ref_select.key
22 #else
23 #set global $index = None
24 #set $ref = $ref_select.ref
25 #if $ref.is_of_type("genbank")
26 ln -sf $ref `basename $ref`.gbk &&
27 #set $ref = $os.path.basename(str($ref)) + ".gbk"
28 #end if
29 #end if
30
31 #if $index
32 ## Get path to database file
33 #try
34 #set $ref = next(record for record in $__app__.tool_data_tables['all_fasta'].get_fields() if str( record[1] ) == str( $index ))[-1]
35 #except StopIteration
36 #raise ValueError('Reference not found: '+str($index))
37 #end try
38 #end if
39
40 #if $draft.is_of_type("genbank")
41 ln -sf $draft `basename ${draft}.gbk` &&
42 #set $draft_path = $os.path.basename(str($draft)) + ".gbk"
43 #else
44 #set $draft_path = str($draft)
45 #end if
46
47 MauveCM -output output -ref $ref -draft $draft_path && $__tool_directory__/mcm_final.sh
48 ]]></command>
49 <inputs>
50 <param name="draft" type="data" format="genbank,fasta" label="Draft" />
51 <conditional name="ref_select">
52 <param name="ref_source" type="select" label="Select the source of the reference to align the draft to">
53 <option value="db">Data manager</option>
54 <option value="file">History</option>
55 <option value="meta" selected="true">Input Metadata</option>
56 <option value="key">Key</option>
57 </param>
58 <when value="file">
59 <param name="ref" type="data" format="genbank,fasta" label="Reference" />
60 </when>
61 <when value="db">
62 <param name="ref_db" type="select" label="Using reference genome" help="Select genome from the list">
63 <options from_data_table="all_fasta">
64 <filter type="sort_by" column="2" />
65 </options>
66 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
67 </param>
68 </when>
69 <when value="meta">
70 </when>
71 <when value="key">
72 <!-- this is a workaround until https://github.com/galaxyproject/galaxy/issues/7496 is resolved -->
73 <param name="key" type="text" label="Provide a dbkey to use" />
74 </when>
75 </conditional>
76 </inputs>
77 <outputs>
78 <data name="reordered" format="fasta" from_work_dir="final.reordered" label="${tool.name} on ${on_string}: Reordered" />
79 <data name="backbone" format="tabular" from_work_dir="final.backbone" label="${tool.name} on ${on_string}: Backbone" />
80 <data name="contigs" format="tabular" from_work_dir="final.contigs.tab" label="${tool.name} on ${on_string}: Contig Order" />
81 <data name="features" format="tabular" from_work_dir="final.features.tab" label="${tool.name} on ${on_string}: Features">
82 <filter>hasattr(draft, 'is_of_type') and draft.is_of_type("genbank")</filter>
83 </data>
84 </outputs>
85 <tests>
86 <test expect_failure="true">
87 <!-- Mauve uses biojava which currently has a bug: https://github.com/biojava/biojava/issues/843 -->
88 <param name="draft" value="test-data/draft.gbff" ftype="genbank" />
89 <conditional name="ref_select">
90 <param name="ref_source" value="file" />
91 <param name="ref" value="test-data/ref.gbff" ftype="genbank" />
92 </conditional>
93 </test>
94 <test expect_num_outputs="4">
95 <param name="draft" value="test-data/draft_bioperl.gbff" ftype="genbank" />
96 <conditional name="ref_select">
97 <param name="ref_source" value="file" />
98 <param name="ref" value="test-data/ref_bioperl.gbff" ftype="genbank" />
99 </conditional>
100 <output name="reordered" checksum="sha256:2a5e621227f4967337cefd68273f78d3390c94f5fd7ce4dcbf8024a342aeb976" ftype="fasta" />
101 <output name="backbone" checksum="sha256:f46966bcaed3324e43cad91161f380427a7d3cc52bbf823ae811d3f5dfaf2927" ftype="tabular" />
102 <output name="contigs" file="test-data/contig_order.tabular" ftype="tabular" />
103 <output name="features" checksum="sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" ftype="tabular" />
104 </test>
105 </tests>
106 <help><![CDATA[
107 The Mauve Contig Mover (MCM) can be used to order a draft genome relative to a related reference genome.
108 The functionality of this software module has been described in Rissman et al. 2009, a publication in Bioinformatics.
109 The Mauve Contig Mover can ease a comparative study between draft and reference sequences by ordering draft contigs according to the reference genome.
110 In many cases, true rearrangements in the draft relative to the reference can be identified. The quality of the reorder is limited by the distance between
111 the sequences, as indicated by the amount of shared gene content among the two organisms. A more distant reference will usually yield fewer ordered draft genome contigs,
112 and may also induce erroneous placements of draft contigs. In addition to ordering contigs, MCM also orient them in the most likely orientation, and, if annotated sequence
113 features are specified in an input file (e.g. with GenBank format input for the draft), MCM will output adjusted coordinates ranges for the features.
114
115 Outputs:
116
117 "Backbone" is the backbone output by mauveAligner representing the alignements.
118
119 "Reordered" is a fasta file with the contigs reordered. Contigs aligned in reverse will be thier compliment sequence.
120
121 "Contig Order" acts as an index to the fasta as the contig orders and orientations change (even if the draft was originally
122 input as a genbank, after the first alignment, it will be converted to a fasta with annotation information preserved in a file described below).
123 The file is divided into 3 sections, each containing a list of contigs. The data for each contig includes its label (name),
124 its location in the genome (numbered in pseudocoordinates from the first to last contig, and whether it is oriented the same as originally input,
125 or was complemented.
126
127 The three sections are described below:
128
129 Contigs to reverse:
130 This section contains contigs whose order is reversed with respect to the previous iteration.
131 Note that contigs in this section may be oriented the same as originally input, this can be determined from the forward orcomplement designation.
132
133 Ordered Contings:
134 This is a list of all the contigs in the order and orientation they appear in the fasta for the draft of this iteration of the reorder.
135 Since these include all the contigs in the original input, those with no ordering information (no aligned region) will be clustered at the end.
136 These will appear as contigs with no LCBs at the end of the draft genome.
137
138 Contigs with Conflicting Order information:
139 This is a list of contigs containing LCBs suggesting multiple possible locations.
140 These may be of interest to verify positioning, or to look at points of potential rearrangement or misassembly.
141
142 If the draft was input as an annotated genbank file, a second file will appear called "Features". This file will contain a line for each annotation,
143 information about its current orientation and location (which will change if the contig is inverted), coordinates from the previous iteration (indicating
144 relative orientation), and whether it is reversed from the original input. It will also have a label field used to identify each feature.
145 This will be gotten from the annotation, as checked in the following order: db_xref, label, gene, and locus_tag.
146 ]]></help>
147 <citations>
148 <citation type="doi">10.5281/zenodo.3364789</citation>
149 <citation type="doi">10.1093/bioinformatics/btp356</citation>
150 </citations>
151 </tool>