Mercurial > repos > iuc > nanocompore_sampcomp
comparison sampcomp.xml @ 0:557cf45ff2c8 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
author | iuc |
---|---|
date | Tue, 05 May 2020 06:57:10 -0400 |
parents | |
children | c43f4b80f5a9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:557cf45ff2c8 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> | |
3 <description>to compare Nanopolished datasets</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements"/> | |
8 <version_command><![CDATA[nanocompore --version]]></version_command> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 ## initialize | |
11 ## requires a minimum of 3 threads | |
12 threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && | |
13 ## same name pattern required | |
14 #for $i, $current in enumerate($file1_rep) | |
15 ln -s '$current.file' 'sample_1_${i}.tsv' && | |
16 ln -s '$current.index' 'sample_1_${i}.tsv.idx' && | |
17 #end for | |
18 #for $i, $current in enumerate($file2_rep) | |
19 ln -s '$current.file' 'sample_2_${i}.tsv' && | |
20 ln -s '$current.index' 'sample_2_${i}.tsv.idx' && | |
21 #end for | |
22 | |
23 ## run | |
24 nanocompore sampcomp | |
25 ## required | |
26 --label1 '$label1' | |
27 #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))]) | |
28 --file_list1 '$files1' | |
29 --label2 '$label2' | |
30 #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))]) | |
31 --file_list2 '$files2' | |
32 --fasta '$fasta' | |
33 ## optional | |
34 #if $ap.bed | |
35 --bed '$ap.bed' | |
36 #end if | |
37 --max_invalid_kmers_freq $ap.max_invalid_kmers_freq | |
38 --min_coverage $ap.min_coverage | |
39 --min_ref_length $ap.min_ref_length | |
40 --comparison_methods '$ap.comparison_methods' | |
41 --sequence_context $ap.sequence_context | |
42 --sequence_context_weights '$ap.sequence_context_weights' | |
43 --pvalue_thr $ap.pvalue_thr | |
44 $ap.logit | |
45 $ap.allow_warnings | |
46 --outpath 'results' | |
47 --nthreads \$threads | |
48 --log_level debug | |
49 | |
50 && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat' | |
51 ]]></command> | |
52 <inputs> | |
53 <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/> | |
54 <repeat name="file1_rep" min="1" title="First condition files"> | |
55 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/> | |
56 <param name="index" type="data" format="tabular" label="Select index file"/> | |
57 </repeat> | |
58 <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/> | |
59 <repeat name="file2_rep" min="1" title="Second condition files"> | |
60 <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/> | |
61 <param name="index" type="data" format="tabular" label="Select index file"/> | |
62 </repeat> | |
63 <param argument="--fasta" type="data" format="fasta" label="Select mapping file"/> | |
64 | |
65 <section name="ap" title="Advanced parameters"> | |
66 <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/> | |
67 <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/> | |
68 <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/> | |
69 <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/> | |
70 <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods"> | |
71 <option value="GMM" selected="true">GMM</option> | |
72 <option value="KS" selected="true">KS</option> | |
73 <option value="TT">TT</option> | |
74 <option value="MW">MW</option> | |
75 </param> | |
76 <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/> | |
77 <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values"> | |
78 <option value="uniform" selected="true">Uniform</option> | |
79 <option value="harmonic">Harmonic</option> | |
80 </param> | |
81 <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/> | |
82 <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/> | |
83 <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/> | |
84 <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> | |
85 <option value="results" selected="true">Results</option> | |
86 <option value="shift" selected="true">Shift stats</option> | |
87 <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option> | |
88 <option value="log">Log</option> | |
89 </param> | |
90 </section> | |
91 </inputs> | |
92 <outputs> | |
93 <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results"> | |
94 <filter>'results' in ap['out']</filter> | |
95 </data> | |
96 <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats"> | |
97 <filter>'shift' in ap['out']</filter> | |
98 </data> | |
99 <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database"> | |
100 <filter>'db' in ap['out']</filter> | |
101 </data> | |
102 <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log"> | |
103 <filter>'log' in ap['out']</filter> | |
104 </data> | |
105 </outputs> | |
106 <tests> | |
107 <!-- #1 --> | |
108 <test expect_num_outputs="3"> | |
109 <repeat name="file1_rep"> | |
110 <param name="file" value="sample1.tsv"/> | |
111 <param name="index" value="sample1.tsv.idx"/> | |
112 </repeat> | |
113 <repeat name="file2_rep"> | |
114 <param name="file" value="sample2.tsv"/> | |
115 <param name="index" value="sample2.tsv.idx"/> | |
116 </repeat> | |
117 <param name="fasta" value="reference.fa"/> | |
118 <output name="out_results"> | |
119 <assert_contents> | |
120 <has_n_lines n="3"/> | |
121 <has_text_matching expression="pos	chr.+"/> | |
122 <has_text_matching expression="22102	NA.+"/> | |
123 </assert_contents> | |
124 </output> | |
125 <output name="out_shift"> | |
126 <assert_contents> | |
127 <has_n_lines n="3"/> | |
128 <has_text_matching expression="ref\_id	pos.+"/> | |
129 <has_text_matching expression="chr	22102.+"/> | |
130 </assert_contents> | |
131 </output> | |
132 <output name="out_db"> | |
133 <assert_contents> | |
134 <has_size value="5408256"/> | |
135 </assert_contents> | |
136 </output> | |
137 </test> | |
138 <!-- #2 --> | |
139 <test expect_num_outputs="4"> | |
140 <param name="label1" value="C1"/> | |
141 <repeat name="file1_rep"> | |
142 <param name="file" value="sample1.tsv"/> | |
143 <param name="index" value="sample1.tsv.idx"/> | |
144 </repeat> | |
145 <param name="label2" value="C2"/> | |
146 <repeat name="file2_rep"> | |
147 <param name="file" value="sample2.tsv"/> | |
148 <param name="index" value="sample2.tsv.idx"/> | |
149 </repeat> | |
150 <param name="fasta" value="reference.fa"/> | |
151 <section name="ap"> | |
152 <param name="max_invalid_kmers_freq" value="0.2"/> | |
153 <param name="min_coverage" value="31"/> | |
154 <param name="min_ref_length" value="101"/> | |
155 <param name="comparison_methods" value="GMM,KS,TT,MW"/> | |
156 <param name="sequence_context" value="1"/> | |
157 <param name="sequence_context_weights" value="harmonic"/> | |
158 <param name="pvalue_thr" value="0.06"/> | |
159 <param name="logit" value="true"/> | |
160 <param name="allow_warnings" value="true"/> | |
161 <param name="out" value="results,shift,db,log"/> | |
162 </section> | |
163 <output name="out_results"> | |
164 <assert_contents> | |
165 <has_n_lines n="3"/> | |
166 <has_text_matching expression="pos	chr.+"/> | |
167 <has_text_matching expression="22102	NA.+"/> | |
168 </assert_contents> | |
169 </output> | |
170 <output name="out_shift"> | |
171 <assert_contents> | |
172 <has_n_lines n="3"/> | |
173 <has_text_matching expression="ref\_id	pos.+"/> | |
174 <has_text_matching expression="chr	22102.+"/> | |
175 </assert_contents> | |
176 </output> | |
177 <output name="out_db"> | |
178 <assert_contents> | |
179 <has_size value="5410304"/> | |
180 </assert_contents> | |
181 </output> | |
182 <output name="out_log"> | |
183 <assert_contents> | |
184 <has_n_lines n="31"/> | |
185 <has_text_matching expression=".+package\_name.+"/> | |
186 </assert_contents> | |
187 </output> | |
188 </test> | |
189 </tests> | |
190 <help><![CDATA[ | |
191 .. class:: infomark | |
192 | |
193 **What it does** | |
194 | |
195 @WID@ | |
196 | |
197 SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters. | |
198 | |
199 First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage. | |
200 | |
201 **Input** | |
202 | |
203 SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended. | |
204 | |
205 A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step. | |
206 | |
207 Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space. | |
208 | |
209 **Output** | |
210 | |
211 The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB. | |
212 | |
213 .. class:: infomark | |
214 | |
215 **References** | |
216 | |
217 @REFERENCES@ | |
218 ]]></help> | |
219 <expand macro="citations"/> | |
220 </tool> |