comparison sampcomp.xml @ 0:557cf45ff2c8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
author iuc
date Tue, 05 May 2020 06:57:10 -0400
parents
children c43f4b80f5a9
comparison
equal deleted inserted replaced
-1:000000000000 0:557cf45ff2c8
1 <?xml version="1.0"?>
2 <tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@">
3 <description>to compare Nanopolished datasets</description>
4 <macros>
5 <import>macros.xml</import>
6 </macros>
7 <expand macro="requirements"/>
8 <version_command><![CDATA[nanocompore --version]]></version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 ## initialize
11 ## requires a minimum of 3 threads
12 threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) &&
13 ## same name pattern required
14 #for $i, $current in enumerate($file1_rep)
15 ln -s '$current.file' 'sample_1_${i}.tsv' &&
16 ln -s '$current.index' 'sample_1_${i}.tsv.idx' &&
17 #end for
18 #for $i, $current in enumerate($file2_rep)
19 ln -s '$current.file' 'sample_2_${i}.tsv' &&
20 ln -s '$current.index' 'sample_2_${i}.tsv.idx' &&
21 #end for
22
23 ## run
24 nanocompore sampcomp
25 ## required
26 --label1 '$label1'
27 #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))])
28 --file_list1 '$files1'
29 --label2 '$label2'
30 #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))])
31 --file_list2 '$files2'
32 --fasta '$fasta'
33 ## optional
34 #if $ap.bed
35 --bed '$ap.bed'
36 #end if
37 --max_invalid_kmers_freq $ap.max_invalid_kmers_freq
38 --min_coverage $ap.min_coverage
39 --min_ref_length $ap.min_ref_length
40 --comparison_methods '$ap.comparison_methods'
41 --sequence_context $ap.sequence_context
42 --sequence_context_weights '$ap.sequence_context_weights'
43 --pvalue_thr $ap.pvalue_thr
44 $ap.logit
45 $ap.allow_warnings
46 --outpath 'results'
47 --nthreads \$threads
48 --log_level debug
49
50 && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat'
51 ]]></command>
52 <inputs>
53 <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/>
54 <repeat name="file1_rep" min="1" title="First condition files">
55 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/>
56 <param name="index" type="data" format="tabular" label="Select index file"/>
57 </repeat>
58 <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/>
59 <repeat name="file2_rep" min="1" title="Second condition files">
60 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/>
61 <param name="index" type="data" format="tabular" label="Select index file"/>
62 </repeat>
63 <param argument="--fasta" type="data" format="fasta" label="Select mapping file"/>
64
65 <section name="ap" title="Advanced parameters">
66 <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/>
67 <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/>
68 <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/>
69 <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/>
70 <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods">
71 <option value="GMM" selected="true">GMM</option>
72 <option value="KS" selected="true">KS</option>
73 <option value="TT">TT</option>
74 <option value="MW">MW</option>
75 </param>
76 <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/>
77 <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values">
78 <option value="uniform" selected="true">Uniform</option>
79 <option value="harmonic">Harmonic</option>
80 </param>
81 <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/>
82 <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/>
83 <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/>
84 <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">
85 <option value="results" selected="true">Results</option>
86 <option value="shift" selected="true">Shift stats</option>
87 <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option>
88 <option value="log">Log</option>
89 </param>
90 </section>
91 </inputs>
92 <outputs>
93 <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results">
94 <filter>'results' in ap['out']</filter>
95 </data>
96 <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats">
97 <filter>'shift' in ap['out']</filter>
98 </data>
99 <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database">
100 <filter>'db' in ap['out']</filter>
101 </data>
102 <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log">
103 <filter>'log' in ap['out']</filter>
104 </data>
105 </outputs>
106 <tests>
107 <!-- #1 -->
108 <test expect_num_outputs="3">
109 <repeat name="file1_rep">
110 <param name="file" value="sample1.tsv"/>
111 <param name="index" value="sample1.tsv.idx"/>
112 </repeat>
113 <repeat name="file2_rep">
114 <param name="file" value="sample2.tsv"/>
115 <param name="index" value="sample2.tsv.idx"/>
116 </repeat>
117 <param name="fasta" value="reference.fa"/>
118 <output name="out_results">
119 <assert_contents>
120 <has_n_lines n="3"/>
121 <has_text_matching expression="pos&#09;chr.+"/>
122 <has_text_matching expression="22102&#09;NA.+"/>
123 </assert_contents>
124 </output>
125 <output name="out_shift">
126 <assert_contents>
127 <has_n_lines n="3"/>
128 <has_text_matching expression="ref\_id&#09;pos.+"/>
129 <has_text_matching expression="chr&#09;22102.+"/>
130 </assert_contents>
131 </output>
132 <output name="out_db">
133 <assert_contents>
134 <has_size value="5408256"/>
135 </assert_contents>
136 </output>
137 </test>
138 <!-- #2 -->
139 <test expect_num_outputs="4">
140 <param name="label1" value="C1"/>
141 <repeat name="file1_rep">
142 <param name="file" value="sample1.tsv"/>
143 <param name="index" value="sample1.tsv.idx"/>
144 </repeat>
145 <param name="label2" value="C2"/>
146 <repeat name="file2_rep">
147 <param name="file" value="sample2.tsv"/>
148 <param name="index" value="sample2.tsv.idx"/>
149 </repeat>
150 <param name="fasta" value="reference.fa"/>
151 <section name="ap">
152 <param name="max_invalid_kmers_freq" value="0.2"/>
153 <param name="min_coverage" value="31"/>
154 <param name="min_ref_length" value="101"/>
155 <param name="comparison_methods" value="GMM,KS,TT,MW"/>
156 <param name="sequence_context" value="1"/>
157 <param name="sequence_context_weights" value="harmonic"/>
158 <param name="pvalue_thr" value="0.06"/>
159 <param name="logit" value="true"/>
160 <param name="allow_warnings" value="true"/>
161 <param name="out" value="results,shift,db,log"/>
162 </section>
163 <output name="out_results">
164 <assert_contents>
165 <has_n_lines n="3"/>
166 <has_text_matching expression="pos&#09;chr.+"/>
167 <has_text_matching expression="22102&#09;NA.+"/>
168 </assert_contents>
169 </output>
170 <output name="out_shift">
171 <assert_contents>
172 <has_n_lines n="3"/>
173 <has_text_matching expression="ref\_id&#09;pos.+"/>
174 <has_text_matching expression="chr&#09;22102.+"/>
175 </assert_contents>
176 </output>
177 <output name="out_db">
178 <assert_contents>
179 <has_size value="5410304"/>
180 </assert_contents>
181 </output>
182 <output name="out_log">
183 <assert_contents>
184 <has_n_lines n="31"/>
185 <has_text_matching expression=".+package\_name.+"/>
186 </assert_contents>
187 </output>
188 </test>
189 </tests>
190 <help><![CDATA[
191 .. class:: infomark
192
193 **What it does**
194
195 @WID@
196
197 SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters.
198
199 First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage.
200
201 **Input**
202
203 SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended.
204
205 A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step.
206
207 Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space.
208
209 **Output**
210
211 The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB.
212
213 .. class:: infomark
214
215 **References**
216
217 @REFERENCES@
218 ]]></help>
219 <expand macro="citations"/>
220 </tool>