comparison orthogroups_tool.xml @ 0:d33ad52f59bc draft default tip

planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:25:10 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d33ad52f59bc
1 <tool name="Orthogroups_Tool" id="orthogroups_tool" version="1.0.2">
2
3 <description>
4 Writes orthogroups found by OrthoFinder in fasta files (with their sequences).
5 </description>
6
7 <macros>
8 <import>macros.xml</import>
9 </macros>
10
11 <requirements>
12 <requirement type="package" version="2.7">python</requirement>
13 <requirement type="package" version="1.12.0">numpy</requirement>
14 <requirement type="package" version="0.20.0">pandas</requirement>
15 </requirements>
16
17 <command>
18 <![CDATA[
19 #set $infiles = ""
20 #for $input in $inputs_fasta
21 ln -s '$input' '$input.element_identifier';
22 #set $infiles = $infiles + $input.element_identifier + ","
23 #end for
24 #set $infiles = $infiles[:-1]
25
26 $__tool_directory__/scripts/format_transdecoder_headers.sh '$orthogroups' tmp_orthogroups.txt
27 &&
28 python $__tool_directory__/scripts/filter_orthofinder.py tmp_orthogroups.txt '$infiles' $nbseq
29 #if $verbosity=="T":
30 -v
31 #end if
32 #if $paralogs=="T":
33 -p
34 #end if
35
36 > '$output';
37 ]]>
38 </command>
39
40 <inputs>
41 <param name="orthogroups" type="data" format="txt" label="Select a file of Orthogroups" help="The output from OrthoFinder, 'Orthogroups.txt'."/>
42 <param name="inputs_fasta" type="data" format="fasta" multiple="true" label="Outputs from Filter Assemblies" help="Contains the nucleic sequences to retrieve."/>
43 <param name="nbseq" type="integer" value="3" label="Minimal number of sequences per orthogroup" help="Drop orthogroups with less than x sequences :" />
44 <param name="verbosity" type="boolean" checked="True" truevalue="T" falsevalue="F" label="verbosity" help="Turn on this paramater to display an additional summary table (computed before the pralogous filtering)" />
45 <param name="paralogs" type="boolean" checked="False" truevalue="T" falsevalue="F" label="paralogs" help="Turn on this parameter to obtain an additional output (orthogroups files before paralogous filtering)" />
46 </inputs>
47
48 <outputs>
49 <data format="txt" name="output" label="orthogroups_tool.output" />
50 <collection name="orthogroups_fasta" type="list" label="Orthogroups_fasta_files" >
51 <discover_datasets pattern="__name_and_ext__" directory="filtered_orthogroups" />
52 </collection>
53 <collection name="orthogroups_fasta_paralogs" type="list" label="Orthogroups_with_paralogs_fasta_files" >
54 <discover_datasets pattern="__name_and_ext__" directory="orthogroups_withParalogs" />
55 <filter>paralogs == True</filter>
56 </collection>
57 </outputs>
58
59 <tests>
60 <test>
61 <param name="orthogroups" value="Orthogroups.txt"/>
62 <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
63 <param name="nbseq" value="2"/>
64 <param name="verbosity" value="True"/>
65 <param name="paralogs" value="True"/>
66 <output name="output" value="orthogroups_tool.output" lines_diff="2"/>
67 <output_collection name="orthogroups_fasta" type="list">
68 <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" />
69 <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" />
70 <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" />
71 <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" />
72 <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" />
73 <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" />
74 <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" />
75 </output_collection>
76 <output_collection name="orthogroups_fasta_paralogs" type="list">
77 <element name="orthogroup_1_8_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_1_8_sequences_withParalogs.fasta" />
78 <element name="orthogroup_2_5_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_2_5_sequences_withParalogs.fasta" />
79 <element name="orthogroup_3_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_3_3_sequences_withParalogs.fasta" />
80 <element name="orthogroup_4_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_4_3_sequences_withParalogs.fasta" />
81 <element name="orthogroup_5_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_5_3_sequences_withParalogs.fasta" />
82 <element name="orthogroup_6_3_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_6_3_sequences_withParalogs.fasta" />
83 <element name="orthogroup_7_2_sequences_withParalogs" value="orthogroups_withParalogs/orthogroup_7_2_sequences_withParalogs.fasta" />
84 </output_collection>
85 </test>
86 <test>
87 <param name="orthogroups" value="Orthogroups.txt"/>
88 <param name="inputs_fasta" value="output_filterassemblies/AcAcaud_trinity.fasta,output_filterassemblies/AmAmphi_trinity.fasta,output_filterassemblies/ApApomp_trinity.fasta,output_filterassemblies/AsAsp1_trinity.fasta"/>
89 <param name="nbseq" value="2"/>
90 <param name="verbosity" value="True"/>
91 <param name="paralogs" value="False"/>
92 <output name="output" value="2_orthogroups_tool.output" lines_diff="2"/>
93 <output_collection name="orthogroups_fasta" type="list">
94 <element name="orthogroup_1_3_sequences" value="filtered_orthogroups/orthogroup_1_3_sequences.fasta" />
95 <element name="orthogroup_2_4_sequences" value="filtered_orthogroups/orthogroup_2_4_sequences.fasta" />
96 <element name="orthogroup_3_2_sequences" value="filtered_orthogroups/orthogroup_3_2_sequences.fasta" />
97 <element name="orthogroup_4_3_sequences" value="filtered_orthogroups/orthogroup_4_3_sequences.fasta" />
98 <element name="orthogroup_5_3_sequences" value="filtered_orthogroups/orthogroup_5_3_sequences.fasta" />
99 <element name="orthogroup_6_3_sequences" value="filtered_orthogroups/orthogroup_6_3_sequences.fasta" />
100 <element name="orthogroup_7_2_sequences" value="filtered_orthogroups/orthogroup_7_2_sequences.fasta" />
101 </output_collection>
102 </test>
103 </tests>
104
105 <help>
106
107 @HELP_AUTHORS@
108
109 <![CDATA[
110
111 **Description**
112
113 This tool takes Orthogroups found by OrthoFinder and proceeds to retrieve nucleic sequences back, then write each orthogroups in its own fasta file.
114
115 -------------------------------------------
116
117 **Step 1 : re-writing headers**
118
119 .. class:: warningmark
120
121 This tool is configured to work within the AdaptSearch toolsuite, which implies a specific format of headers. Indeed, due to the use of external tools (like TransDecoder), the genes IDs might have been modified. A regular expression rewrites all the genes IDs in order to have, for instance, this format : >Pf1004_1/1_1.000_369
122
123 -------------------------------------------
124
125 **Step 2 : reading an re-writing orthoGroups from OrthoFinder**
126
127 OrthoFinder (Emms, D.M. and Kelly, S., 2015) is a fast, accurate and comprehensive analysis tool for comparative genomics. It finds orthologues and orthogroups infers gene trees for all orthogroups and infers a rooted species tree for the species being analysed. OrthoFinder also provides comprehensive statistics for comparative genomic analyses.
128
129 Our tool focus on the orthogroups.txt file created before gene trees.The script proceeds to split each orthogroup in its own fasta file and, with the use of the output of Filter_Assemblies, to re-associate each ID with its sequence.
130
131 -------------------------------------------
132
133 **Inputs and parameters**
134
135 - File of Orthogroups : the orthogroups.txt file from OrthoFinder.
136 - Output from Filter_Assemblies (either as multiple datasets or dataset collection)
137 - Minimal number of sequences : The orthogroups with less than the specified number won't be recorded.
138 - Verbose : If 'Yes', a supplementary table will be displayed in the outputs (coutings of species and sequences in orthogroups before the removal of paralogs).
139 - Paralogs : if 'Yes', there will be a supplementary output of orthogroups file, before the removal of paralogs.
140
141 -------------------------------------------
142
143 **Outputs**
144
145 - Dataset collection of fasta files : each file represents an orthogroup, each gene within tthe group has the couple ID-nucleic sequence.
146 - If the --paralogs option is checked : another dataset collection of fasta files with all the paralogous genes.
147 - The tool log.
148
149 ---------
150
151 **The AdaptSearch Pipeline**
152
153 .. image:: adaptsearch_picture_helps.png
154
155 Changelog
156 ---------
157
158 **Version 1.0 - 11/01/2018**
159
160 ]]>
161 </help>
162
163 <citations>
164 <citation type="doi">10.1186/s13059-015-0721-2</citation>
165 </citations>
166
167 </tool>