comparison stacks_kmerfilter.xml @ 2:8a55d29c8fcf draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Thu, 16 Jul 2020 07:28:45 -0400
parents 38c9f9a680f0
children 1544278c272e
comparison
equal deleted inserted replaced
1:38c9f9a680f0 2:8a55d29c8fcf
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <expand macro="version_cmd"/> 7 <expand macro="version_cmd"/>
8 <command detect_errors="aggressive"><![CDATA[ 8 <command detect_errors="aggressive"><![CDATA[
9 @FASTQ_INPUT_FUNCTIONS@ 9 @FASTQ_INPUT_FUNCTIONS@
10 10 trap ">&2 cat '$output_log'" err exit &&
11 mkdir stacks_inputs stacks_outputs && 11 mkdir stacks_inputs stacks_outputs &&
12 12
13 #set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select) 13 #set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
14 $link_command 14 $link_command
15 15
37 #end if 37 #end if
38 #if str($options_normalization.normalize)!="": 38 #if str($options_normalization.normalize)!="":
39 --normalize $options_normalization.normalize 39 --normalize $options_normalization.normalize
40 #end if 40 #end if
41 #if $options_kmer_char.write_k_freq 41 #if $options_kmer_char.write_k_freq
42 --read_k_freq $kfreq 42 --write-k-freq $kfreqdist
43 #end if 43 #end if
44 ## TODO read_k_freq
44 $options_kmer_char.k_dist 45 $options_kmer_char.k_dist
45 #if $options_kmer_char.k_dist 46 #if $options_kmer_char.k_dist
46 | sed 's/KmerFrequency/# KmerFrequency/' > $kfreqdist 47 > '$kfreq'
47 #end if 48 #end if
48 @TEE_APPEND_LOG@ 49 @TEE_APPEND_LOG@
49 @CAT_LOG_TO_STDERR@ 50
50 51 #if $options_kmer_char.k_dist
51 ## move outputs such that Galaxy can find them 52 && sed -i -e 's/KmerFrequency/# KmerFrequency/' '$kfreq'
52 ## if filtering is on then ...filt...fq is created 53 #elif $options_kmer_char.write_k_freq
53 ## if normalization is on then ...norm...fq is created 54 && sed -i -e 's/# Kmer Count/#Kmer\tCount/; s/ /\t/' '$kfreqdist';
54 ## if both are active then both files are created, but only norm is needed 55 #else
55 #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="": 56 ## move outputs such that Galaxy can find them
56 #if str($options_normalization.normalize)!="": 57 ## if filtering is on then ...filt...fq is created
57 #set infix="norm" 58 ## if normalization is on then ...norm...fq is created
58 #else 59 ## if both are active then both files are created, but only norm is needed
59 #set infix="fil" 60 #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
60 #end if 61 #if str($options_normalization.normalize)!="":
61 #if $capture: 62 #set infix="norm"
63 #else
64 #set infix="fil"
65 #end if
66 #if $capture:
67 #if $input_type.input_type_select == "single"
68 && mv stacks_outputs/*.discards.fastq '$discarded'
69 #else
70 && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
71 && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
72 #end if
73 #end if
62 #if $input_type.input_type_select == "single" 74 #if $input_type.input_type_select == "single"
63 && mv stacks_outputs/*.discards.fastq '$discarded' 75 && mv stacks_outputs/*.${infix}.fastq '$clean'
64 #else 76 #else
65 && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward' 77 && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
66 && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse' 78 && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
67 #end if 79 #end if
68 #end if 80 #end if
69 #if $input_type.input_type_select == "single" 81 #end if
70 && mv stacks_outputs/*.${infix}.fastq '$clean'
71 #else
72 && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
73 && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
74 #end if
75 #end if
76
77 ]]></command> 82 ]]></command>
78 <inputs> 83 <inputs>
79 <expand macro="fastq_input_bc"/> 84 <expand macro="fastq_input_bc"/>
80 <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" /> 85 <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/>
81 <section name="options_filtering" title="Filtering options" expanded="False"> 86 <section name="options_filtering" title="Filtering options" expanded="False">
82 <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers" /> 87 <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers"/>
83 <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers" /> 88 <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers"/>
84 <param argument="--k_len" type="integer" value="15" label="K-mer size" /> 89 <param argument="--k_len" type="integer" value="15" label="K-mer size"/>
85 </section> 90 </section>
86 <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False"> 91 <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
87 <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant" /> 92 <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant"/>
88 <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." /> 93 <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)."/>
89 <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" /> 94 <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)"/>
90 </section> 95 </section>
91 <section name="options_normalization" title="Normalization options" expanded="False"> 96 <section name="options_normalization" title="Normalization options" expanded="False">
92 <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage" /> 97 <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage"/>
93 </section> 98 </section>
94 <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False"> 99 <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
95 <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit" /> 100 <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit"/>
96 <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit" /> 101 <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit"/>
97 </section> 102 </section>
98 <!--<section name="options_advanced_input" title="Advanced input options" expanded="False"> 103 <!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
99 <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files" /> 104 <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files"/>
100 </section>--> 105 </section>-->
101 <expand macro="in_log"/> 106 <expand macro="in_log"/>
102 </inputs> 107 </inputs>
103 <outputs> 108 <outputs>
104 <expand macro="out_log"/> 109 <expand macro="out_log"/>
105 <data name="clean" format="fastqsanger" label="${tool.name} on ${on_string}"> 110 <expand macro="fastq_output_filter">
106 <filter>input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter> 111 <filter>not options_kmer_char['k_dist'] and not options_kmer_char['write_k_freq']</filter>
107 </data> 112 </expand>
108 <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}">
109 <filter>input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
110 </collection>
111 <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads">
112 <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
113 </data>
114 <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads">
115 <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
116 </collection>
117 <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies"> 113 <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies">
118 <filter>options_kmer_char['write_k_freq']</filter> 114 <filter>options_kmer_char['k_dist']</filter>
119 </data> 115 </data>
120 <data format="tabular" name="kfreqdist" label="${tool.name} on ${on_string} kmer frequency distribution"> 116 <data format="tabular" name="kfreqdist" label="${tool.name} on ${on_string} kmer frequency distribution">
121 <filter>options_kmer_char['k_dist']</filter> 117 <filter>options_kmer_char['write_k_freq']</filter>
122 </data> 118 </data>
123 </outputs> 119 </outputs>
124 <tests> 120 <tests>
125 <!-- default output for filtering --> 121 <!-- default output for filtering -->
126 <test> 122 <test expect_num_outputs="2">
127 <conditional name="input_type"> 123 <conditional name="input_type">
128 <param name="input_type_select" value="single" /> 124 <param name="input_type_select" value="single"/>
129 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> 125 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
130 </conditional> 126 </conditional>
131 <param name="add_log" value="yes" /> 127 <param name="add_log" value="yes"/>
132 <output name="output_log" ftype="txt" file="kmerfilter/kmerfilter.log" lines_diff="8"/> 128 <output name="output_log" ftype="txt" file="kmerfilter/kmerfilter.log" lines_diff="8"/>
133 <param name="rare" value="--rare"/> 129 <param name="rare" value="--rare"/>
134 <param name="abundant" value="--abundant" /> 130 <param name="abundant" value="--abundant"/>
135 <param name="k_len" value="16" /> 131 <param name="k_len" value="16"/>
136 <assert_command> 132 <assert_command>
137 <has_text text="--rare" /> 133 <has_text text="--rare"/>
138 <has_text text="--abundant" /> 134 <has_text text="--abundant"/>
139 <has_text text="--k_len 16" /> 135 <has_text text="--k_len 16"/>
140 </assert_command> 136 </assert_command>
141 <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/> 137 <param name="add_log" value="yes"/>
142 </test> 138 <output name="output_log"><assert_contents><has_text text="5 retained reads."/></assert_contents></output>
143 <test> 139 <output name="clean" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed1_0001.1.1.fq.single.gz"/>
144 <conditional name="input_type"> 140 </test>
145 <param name="input_type_select" value="paired" /> 141 <test expect_num_outputs="7">
142 <conditional name="input_type">
143 <param name="input_type_select" value="paired"/>
146 <param name="fqinputs"> 144 <param name="fqinputs">
147 <collection type="paired"> 145 <collection type="paired">
148 <element name="forward" value="clonefilter/R1_0001.1.fq.gz" /> 146 <element name="forward" value="clonefilter/R1_0001.1.fq.gz"/>
149 <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" /> 147 <element name="reverse" value="clonefilter/R2_0001.2.fq.gz"/>
150 </collection> 148 </collection>
151 </param> 149 </param>
152 </conditional> 150 </conditional>
153 <param name="capture" value="-D" /> 151 <param name="capture" value="-D"/>
154 <param name="normalize" value="1" /> 152 <param name="normalize" value="1"/>
155 <assert_command> 153 <assert_command>
156 <has_text text="--normalize 1" /> 154 <has_text text="--normalize 1"/>
157 </assert_command> 155 </assert_command>
156 <param name="add_log" value="yes"/>
157 <output name="output_log"><assert_contents><has_text text="8 retained reads."/></assert_contents></output>
158 <output_collection name="clean_pair" type="paired"> 158 <output_collection name="clean_pair" type="paired">
159 <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" /> 159 <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed1_0001.1.1.fq.gz"/>
160 <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" /> 160 <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed2_0001.2.2.fq.gz"/>
161 </output_collection> 161 </output_collection>
162 <output_collection name="discarded_pair" type="paired"> 162 <output_collection name="discarded_pair" type="paired">
163 <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" /> 163 <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Discarded1_0001.1.1.fq.gz"/>
164 <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" /> 164 <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Discarded2_0001.2.2.fq.gz"/>
165 </output_collection> 165 </output_collection>
166 </test> 166 </test>
167 <!-- kfreq output --> 167 <!-- kfreq output -->
168 <test> 168 <test expect_num_outputs="2">
169 <conditional name="input_type"> 169 <conditional name="input_type">
170 <param name="input_type_select" value="single" /> 170 <param name="input_type_select" value="single"/>
171 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> 171 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
172 </conditional> 172 </conditional>
173 <section name="options_kmer_char"> 173 <section name="options_kmer_char">
174 <param name="write_k_freq" value="--write_k_freq" /> 174 <param name="write_k_freq" value="--write_k_freq"/>
175 </section> 175 </section>
176 <param name="add_log" value="yes"/>
177 <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output>
178 <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
179 </test>
180 <!-- kfreqdist output -->
181 <test expect_num_outputs="1">
182 <conditional name="input_type">
183 <param name="input_type_select" value="single"/>
184 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
185 </conditional>
186 <section name="options_kmer_char">
187 <param name="k_dist" value="--k_dist"/>
188 </section>
189 <param name="add_log" value="no"/>
190 <assert_stderr><has_text text="Generating kmer distribution..."/></assert_stderr>
176 <output name="kfreq" file="kmerfilter/kfreq.tsv"/> 191 <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
177 </test>
178 <!-- kfreqdist output -->
179 <test>
180 <conditional name="input_type">
181 <param name="input_type_select" value="single" />
182 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
183 </conditional>
184 <section name="options_kmer_char">
185 <param name="k_dist" value="--k_dist" />
186 </section>
187 <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
188 </test> 192 </test>
189 </tests> 193 </tests>
190 <help> 194 <help>
191 <![CDATA[ 195 <![CDATA[
192 .. class:: infomark 196 .. class:: infomark
194 Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. 198 Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data.
195 199
196 @STACKS_INFOS@ 200 @STACKS_INFOS@
197 ]]> 201 ]]>
198 </help> 202 </help>
199 <expand macro="citation" /> 203 <expand macro="citation"/>
200 </tool> 204 </tool>