comparison filter_assembly.xml @ 0:7a813e633d1c draft default tip

planemo upload for repository https://github.com/abims-sbr/adaptsearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author abims-sbr
date Fri, 01 Feb 2019 10:22:32 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7a813e633d1c
1 <tool name="Filter assemblies" id="filter_assemblies" version="2.0.3">
2
3 <description>
4 Filter the outputs of Velvet or Trinity assemblies
5 </description>
6
7 <macros>
8 <import>macros.xml</import>
9 </macros>
10
11 <requirements>
12 <expand macro="python_required" />
13 <requirement type="package" version="0.0.14">fastx_toolkit</requirement>
14 <requirement type="package" version="10.2011">cap3</requirement>
15 </requirements>
16
17 <command>
18 <![CDATA[
19 #set $infiles = ""
20 #for $input in $inputs
21 ln -s '$input' '$input.element_identifier';
22 #set $infiles = $infiles + $input.element_identifier + ","
23 #end for
24 #set $infiles = $infiles[:-1]
25
26 ln -s '$__tool_directory__/scripts/S02a_remove_redondancy_from_velvet_oases.py' . &&
27 ln -s '$__tool_directory__/scripts/S02b_format_fasta_name_trinity.py' . &&
28 ln -s '$__tool_directory__/scripts/S03_choose_one_variants_per_locus_trinity.py' . &&
29 ln -s '$__tool_directory__/scripts/S04_find_orf.py' . &&
30 ln -s '$__tool_directory__/scripts/S05_filter.py' . &&
31
32 python '$__tool_directory__/scripts/S01_script_to_choose.py'
33
34 '$infiles'
35 $length_seq_max
36 $percent_identity
37 $overlap_length
38 > ${log}
39 ]]>
40 </command>
41
42 <inputs>
43 <param name="inputs" type="data" format="fasta" multiple="true" label="Input files" />
44 <param name="percent_identity" type="integer" value="100" label="Overlap percent identity cutoff" help="Cap3 parameter (-p N); minimum percent identity of an overlap. The specified value should be more than 65%." />
45 <param name="overlap_length" type="integer" value="60" label="Overlap length cutoff" help="Cap3 parameter (-o N); minimum length of an overlap (in base pairs). The specified value should be more than 15 base pairs." />
46 <param name="length_seq_max" type="integer" value="100" label="Minimum sequence length" help="Keep sequences which length is higher than the minimum sequence length " />
47 </inputs>
48
49 <outputs>
50 <collection name="output_fasta" type="list" label="Filter Assemblies outputs">
51 <discover_datasets pattern="__name_and_ext__" directory="outputs" />
52 </collection>
53 <data format="txt" name="log" label="Filter Assemblies Summary"/>
54 </outputs>
55
56 <tests>
57 <test>
58 <param name="inputs" ftype="fasta" value="trinity/Pfiji_trinity.fasta,trinity/Apomp_trinity.fasta,trinity/Amphi_trinity.fasta,trinity/Acaud_trinity.fasta,velvet/Pg_transcriptome_90109.fasta,velvet/Ap_transcriptome_35099.fasta,velvet/Ac_transcriptome_25591.fasta" />
59 <param name="percent_identity" value="100" />
60 <param name="overlap_length" value="60" />
61 <param name="length_seq_max" value="100" />
62 <output name="log" value="trinity_and_velvet_up.output" />
63 <output_collection name="output_fasta" type="list">
64 <element name="AcAc_transcriptome_25591" value="velvet_out/AcAc_transcriptome_25591.fasta" />
65 <element name="ApAp_transcriptome_35099" value="velvet_out/ApAp_transcriptome_35099.fasta" />
66 <element name="PgPg_transcriptome_90109" value="velvet_out/PgPg_transcriptome_90109.fasta" />
67 <element name="AcAcaud_trinity" value="trinity_out/AcAcaud_trinity.fasta" />
68 <element name="AmAmphi_trinity" value="trinity_out/AmAmphi_trinity.fasta" />
69 <element name="ApApomp_trinity" value="trinity_out/ApApomp_trinity.fasta" />
70 <element name="PfPfiji_trinity" value="trinity_out/PfPfiji_trinity.fasta" />
71 </output_collection>
72 </test>
73 <test>
74 <param name="inputs" ftype="fasta" value="trinity/Pfiji_trinity.fasta,trinity/Apomp_trinity.fasta,trinity/Amphi_trinity.fasta,trinity/Acaud_trinity.fasta" />
75 <param name="percent_identity" value="100" />
76 <param name="overlap_length" value="60" />
77 <param name="length_seq_max" value="100" />
78 <output name="log" value="trinity_up.output" />
79 <output_collection name="output_fasta" type="list">
80 <element name="AcAcaud_trinity" value="trinity_out/AcAcaud_trinity.fasta" />
81 <element name="AmAmphi_trinity" value="trinity_out/AmAmphi_trinity.fasta" />
82 <element name="ApApomp_trinity" value="trinity_out/ApApomp_trinity.fasta" />
83 <element name="PfPfiji_trinity" value="trinity_out/PfPfiji_trinity.fasta" />
84 </output_collection>
85 </test>
86 <test>
87 <param name="inputs" ftype="fasta" value="velvet/Pg_transcriptome_90109.fasta,velvet/Ap_transcriptome_35099.fasta,velvet/Ac_transcriptome_25591.fasta" />
88 <param name="percent_identity" value="100" />
89 <param name="overlap_length" value="60" />
90 <param name="length_seq_max" value="100" />
91 <output name="log" value="velvet_up.output" />
92 <output_collection name="output_fasta" type="list">
93 <element name="AcAc_transcriptome_25591" value="velvet_out/AcAc_transcriptome_25591.fasta" />
94 <element name="ApAp_transcriptome_35099" value="velvet_out/ApAp_transcriptome_35099.fasta" />
95 <element name="PgPg_transcriptome_90109" value="velvet_out/PgPg_transcriptome_90109.fasta" />
96 </output_collection>
97 </test>
98
99 </tests>
100
101 <help>
102
103 @HELP_AUTHORS@
104
105 <![CDATA[
106
107 **Description**
108
109 This tool reformats Velvet Oases or Trinity assemblies for the AdaptSearch galaxy suite and selects only one variant per gene according to its length and quality check.
110
111 ---------
112
113 **Input format**
114
115 (1) Sequences are in the sequential format:
116
117 | >seqname1
118 | AAAGAGAGACCACATGTCAGTAGC -on one or several lines -
119 | >seqname2
120 | AAGGCCTGACCACATGAGTTAAGC -on one or several lines -
121 | etc ...
122 |
123
124 2) The file name should begin with a two letter abbreviation of the species name (for isntance, 'Ap' if the species is Alvinella pompejana).
125
126 **For Velvet Oases assemblies input**
127
128 The headers must be as follow : *>Locus_i_Transcript_i/j_Confidence_x.xxx_Length_N* where i is the locus number, j the transcript variant among all versions of the transcript, x.xxx the confidence value and N the length.
129
130 **For Trinity assemblies inputs**
131
132 The headers must be as follow : *>cj_gj_ij Len=j path=[j:0-j]* where all the j are integers (locus number, transcript variant, length, position...)
133
134 **The tool handles the case if input files come from both assemblers (there is no need for input files to be exclusively from one or another assembler).**
135
136 ---------
137
138 **Parameters**
139
140 - 'Input files' : a collection of fasta files (one file per species).
141 - 'Overlap percent identity cutoff' : cap3 -p parameter : minimum percent identity of an overlap.
142 must be > 65 ; default : 100.
143 - 'Overlap length cutoff' (integer) : cap3 -o parameter : minimum length of an overlap (in base pairs).
144 must be > 15 ; default : 60.
145 - 'Minimum sequence length' (integer) : only keep sequences which are longer than the specified value.
146 default : 100.
147
148 ---------
149
150 **Steps**:
151
152 The tool:
153 1) Modifies the sequence name to add the species abbreviation using the 2 first letters of the name of the transcriptome file : note that each species abbreviation must be unique
154 2) Selects one allelic sequence from each transcript (c or locus) using the length of the sequence and its level of confidence
155 3) Selects the best ORF from the sequence between two stop codons
156 4) Performs a CAP3 from the full set of ORFs to minimize redundancy
157 5) Retrieves the initial transcript sequences from the remaining set of proceeded ORF sequences
158
159 **Outputs**
160
161 - 'Filter Assemblies Summary' : the log file.
162 - 'Filter Assemblies outputs' : the main results.
163
164 ---------
165
166 **The AdaptSearch Pipeline**
167
168 .. image:: adaptsearch_picture_helps.png
169
170 ---------
171
172 Changelog
173 ---------
174
175 **Version 2.1 - 15/01/2018**
176
177 - Input files can be a mix from files coming either from Trinity or Velvet Oases assemblers
178
179 **Version 2.0 - 14/04/2017**
180
181 - NEW: Replace the zip between tools by Dataset Collection
182
183 **Version 1.0 - 13/04/2017**
184
185 - TEST: Add funtional test with planemo
186 - IMPROVEMENT: Use conda dependencies for cap3, fastaformatter and python
187
188 ]]>
189 </help>
190
191 </tool>