comparison prinseq.xml @ 2:74afc47f326c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prinseq/ commit 626b990b38e0585abfb6a06a2516ff498dc2257b
author iuc
date Fri, 03 Mar 2017 14:58:49 -0500
parents 6b865dde1baa
children 02befcb391f5
comparison
equal deleted inserted replaced
1:6b865dde1baa 2:74afc47f326c
1 <tool id="prinseq" name="PRINSEQ" version="0.1.0"> 1 <tool id="prinseq" name="PRINSEQ" version="0.20.4">
2 <description>to process quality of sequences</description> 2 <description>to process quality of sequences</description>
3
4 <requirements> 3 <requirements>
5 <requirement type="package" version="5.18.1">perl</requirement>
6 <!--<requirement type="package" version="2.90">perl-json</requirement>
7 <requirement type="package" version="1.106">perl-cairo</requirement>-->
8 <requirement type="package" version="0.20.4">prinseq</requirement> 4 <requirement type="package" version="0.20.4">prinseq</requirement>
9 </requirements> 5 </requirements>
10 6
11 <stdio> 7 <stdio>
12 <exit_code range="1:" level="fatal" description="" /> 8 <exit_code range="1:" level="fatal" description="" />
32 28
33 && 29 &&
34 30
35 prinseq-lite.pl 31 prinseq-lite.pl
36 #if $seq_type.seq_type_opt == "single": 32 #if $seq_type.seq_type_opt == "single":
37 -fastq "$seq_type.input_singles" 33 -fastq '$seq_type.input_singles'
38 #if $seq_type.input_singles.is_of_type('fastqillumina'): 34 #if $seq_type.input_singles.is_of_type('fastqillumina'):
39 -phred64 35 -phred64
40 #end if 36 #end if
41 #else: 37 #else:
42 -fastq "$seq_type.input_mate1" 38 -fastq '$seq_type.input_mate1'
43 -fastq2 "$seq_type.input_mate2" 39 -fastq2 '$seq_type.input_mate2'
44 #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext: 40 #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext:
45 #import sys 41 #import sys
46 #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' ) 42 #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' )
47 #end if 43 #end if
48 #if $seq_type.input_mate1.is_of_type('fastqillumina'): 44 #if $seq_type.input_mate1.is_of_type('fastqillumina'):
67 #end if 63 #end if
68 #end if 64 #end if
69 65
70 #set quality_filter_treatments=$filter_treatments.quality_filter_treatments 66 #set quality_filter_treatments=$filter_treatments.quality_filter_treatments
71 #if $quality_filter_treatments.apply_quality_filter_treatments == "true": 67 #if $quality_filter_treatments.apply_quality_filter_treatments == "true":
72 #set min_quality_filter_treatments=$quality_filter_treatments.min_quality_filter_treatments 68 #set min_quality_filter_treatments=$quality_filter_treatments.min_quality_filter_treatments
73 #if $min_quality_filter_treatments.apply_min_quality_filter_treatments == "true": 69 #if $min_quality_filter_treatments.apply_min_quality_filter_treatments == "true":
74 -min_qual_score $min_quality_filter_treatments.min_quality_filter_treatment_value 70 -min_qual_score $min_quality_filter_treatments.min_quality_filter_treatment_value
75 #end if 71 #end if
76 72
77 #set max_quality_filter_treatments=$quality_filter_treatments.max_quality_filter_treatments 73 #set max_quality_filter_treatments=$quality_filter_treatments.max_quality_filter_treatments
209 -trim_qual_rule $quality_trimming_treatments.rule_quality_trimming_treatments 205 -trim_qual_rule $quality_trimming_treatments.rule_quality_trimming_treatments
210 -trim_qual_window $quality_trimming_treatments.window_quality_trimming_treatments 206 -trim_qual_window $quality_trimming_treatments.window_quality_trimming_treatments
211 -trim_qual_step $quality_trimming_treatments.step_quality_trimming_treatments 207 -trim_qual_step $quality_trimming_treatments.step_quality_trimming_treatments
212 #end if 208 #end if
213 209
214 #end if 210 #end if
215 211
216 #* 212 #*
217 -graph_stats "$graph_stats" 213 -graph_stats "$graph_stats"
218 -graph_data tmp/stats.gd 214 -graph_data tmp/stats.gd
219 215
220 216 &&
221 &&
222 217
223 prinseq-graphs-noPCA.pl -i "tmp/stats.gd" -html_all -o stats_html 218 prinseq-graphs-noPCA.pl -i "tmp/stats.gd" -html_all -o stats_html
224 *# 219 *#
225 ]]> 220 ]]>
226 </command> 221 </command>
227 222
228 <inputs> 223 <inputs>
229 <conditional name="seq_type"> 224 <conditional name="seq_type">
230 <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?"> 225 <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?">
259 </param> 254 </param>
260 <when value="true"> 255 <when value="true">
261 <param name="min_length_filter_treatment_value" type="integer" min="0" max="3000" value="60" label="Minimum length threshold to conserve sequences" help="(-min_len)"/> 256 <param name="min_length_filter_treatment_value" type="integer" min="0" max="3000" value="60" label="Minimum length threshold to conserve sequences" help="(-min_len)"/>
262 </when> 257 </when>
263 <when value="false" /> 258 <when value="false" />
264 </conditional> 259 </conditional>
265 <conditional name="max_length_filter_treatments"> 260 <conditional name="max_length_filter_treatments">
266 <param name="apply_max_length_filter_treatments" type="select" label="Filter too big sequences?" help="By default, no treatment based on a maximal length is made."> 261 <param name="apply_max_length_filter_treatments" type="select" label="Filter too big sequences?" help="By default, no treatment based on a maximal length is made.">
267 <option value="true">Yes</option> 262 <option value="true">Yes</option>
268 <option value="false" selected="true">No</option> 263 <option value="false" selected="true">No</option>
269 </param> 264 </param>
270 <when value="true"> 265 <when value="true">
271 <param name="max_length_filter_treatment_value" type="integer" min="0" max="3000" value="1000" label="Maximal length threshold to conserve sequences" help="(-max_len)"/> 266 <param name="max_length_filter_treatment_value" type="integer" min="0" max="3000" value="1000" label="Maximal length threshold to conserve sequences" help="(-max_len)"/>
272 </when> 267 </when>
273 <when value="false" /> 268 <when value="false" />
274 </conditional> 269 </conditional>
275 </when> 270 </when>
276 <when value="false" /> 271 <when value="false" />
277 </conditional> 272 </conditional>
278 <conditional name="quality_filter_treatments"> 273 <conditional name="quality_filter_treatments">
279 <param name="apply_quality_filter_treatments" type="select" label="Filter sequences based on quality score?" help="By default, sequences with a mean score below 15 are removed."> 274 <param name="apply_quality_filter_treatments" type="select" label="Filter sequences based on quality score?" help="By default, sequences with a mean score below 15 are removed.">
280 <option value="true" selected="true">Yes</option> 275 <option value="true" selected="true">Yes</option>
281 <option value="false">No</option> 276 <option value="false">No</option>
282 </param> 277 </param>
310 <conditional name="min_mean_quality_filter_treatments"> 305 <conditional name="min_mean_quality_filter_treatments">
311 <param name="apply_min_mean_quality_filter_treatments" type="select" label="Filter sequences based with too small mean score?" help="By default, sequences with a mean score below 15 are removed."> 306 <param name="apply_min_mean_quality_filter_treatments" type="select" label="Filter sequences based with too small mean score?" help="By default, sequences with a mean score below 15 are removed.">
312 <option value="true" selected="true">Yes</option> 307 <option value="true" selected="true">Yes</option>
313 <option value="false">No</option> 308 <option value="false">No</option>
314 </param> 309 </param>
315 <when value="true"> 310 <when value="true">
316 <param name="min_mean_quality_filter_treatment_value" type="integer" min="0" max="40" value="15" label="Minimum mean score threshold to conserve sequences" help="(-min_qual_mean)"/> 311 <param name="min_mean_quality_filter_treatment_value" type="integer" min="0" max="40" value="15" label="Minimum mean score threshold to conserve sequences" help="(-min_qual_mean)"/>
317 </when> 312 </when>
318 <when value="false" /> 313 <when value="false" />
319 </conditional> 314 </conditional>
320 <conditional name="max_mean_quality_filter_treatments"> 315 <conditional name="max_mean_quality_filter_treatments">
321 <param name="apply_max_mean_quality_filter_treatments" type="select" label="Filter sequences based with too high mean score?" help="By default, no treatment based on a maximum mean score is made."> 316 <param name="apply_max_mean_quality_filter_treatments" type="select" label="Filter sequences based with too high mean score?" help="By default, no treatment based on a maximum mean score is made.">
322 <option value="true">Yes</option> 317 <option value="true">Yes</option>
323 <option value="false" selected="true">No</option> 318 <option value="false" selected="true">No</option>
324 </param> 319 </param>
325 <when value="true"> 320 <when value="true">
326 <param name="max_mean_quality_filter_treatment_value" type="integer" min="0" max="40" value="40" label="Maximum mean score threshold to conserve sequences" help="(-max_qual_mean)"/> 321 <param name="max_mean_quality_filter_treatment_value" type="integer" min="0" max="40" value="40" label="Maximum mean score threshold to conserve sequences" help="(-max_qual_mean)"/>
327 </when> 322 </when>
328 <when value="false" /> 323 <when value="false" />
329 </conditional> 324 </conditional>
330 </when> 325 </when>
399 </param> 394 </param>
400 <when value="true"> 395 <when value="true">
401 <param name="method_complexity_filter_treatments" type="select" display="radio" label="Method to filter low complexity sequences" help="(-lc_method)"> 396 <param name="method_complexity_filter_treatments" type="select" display="radio" label="Method to filter low complexity sequences" help="(-lc_method)">
402 <option value="dust">Dust</option> 397 <option value="dust">Dust</option>
403 <option value="entropy" >Entropy</option> 398 <option value="entropy" >Entropy</option>
404 </param> 399 </param>
405 <param name="threshold_complexity_filter_treatments" type="integer" min="0" max="100" value="2" label="Threshold value used to filter sequences by sequence complexity" help="The dust method uses the threshold as maximum allowed score and the entropy method as minimum allowed value.(-lc_threshold)"/> 400 <param name="threshold_complexity_filter_treatments" type="integer" min="0" max="100" value="2" label="Threshold value used to filter sequences by sequence complexity" help="The dust method uses the threshold as maximum allowed score and the entropy method as minimum allowed value.(-lc_threshold)"/>
406 </when> 401 </when>
407 <when value="false" /> 402 <when value="false" />
408 </conditional> 403 </conditional>
409 </when> 404 </when>
410 <when value="false" /> 405 <when value="false" />
411 </conditional> 406 </conditional>
412 407
413 <conditional name="trimming_treatments"> 408 <conditional name="trimming_treatments">
414 <param name="apply_trimming_treatments" type="select" label="Apply trimming treatments?" help=""> 409 <param name="apply_trimming_treatments" type="select" label="Apply trimming treatments?" help="">
415 <option value="true" selected="true">Yes</option> 410 <option value="true" selected="true">Yes</option>
416 <option value="false">No</option> 411 <option value="false">No</option>
620 <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option> 615 <option value="dn" selected="True">Dinucleotide odds ratios, includes the PCA plots</option>
621 </param>--> 616 </param>-->
622 </inputs> 617 </inputs>
623 618
624 <outputs> 619 <outputs>
625 <data format="fastq" name="good_sequence_file" 620 <data format="fastq" name="good_sequence_file" from_work_dir="tmp/good_sequences.fastq"
626 from_work_dir="tmp/good_sequences.fastq"
627 label="${tool.name} on ${on_string}: Good sequences" > 621 label="${tool.name} on ${on_string}: Good sequences" >
628 <filter>seq_type['seq_type_opt'] == "single"</filter> 622 <filter>seq_type['seq_type_opt'] == "single"</filter>
629 </data> 623 </data>
630 <data format="fastq" name="rejected_sequence_file" 624 <data format="fastq" name="rejected_sequence_file" from_work_dir="tmp/rejected_sequences.fastq"
631 from_work_dir="tmp/rejected_sequences.fastq"
632 label="${tool.name} on ${on_string}: Rejected sequences" > 625 label="${tool.name} on ${on_string}: Rejected sequences" >
633 <filter>seq_type['seq_type_opt'] == "single"</filter> 626 <filter>seq_type['seq_type_opt'] == "single"</filter>
634 </data> 627 </data>
635 628
636 <data format="fastq" name="good_sequences_1_file" 629 <data format="fastq" name="good_sequences_1_file" from_work_dir="tmp/good_sequences_1.fastq"
637 from_work_dir="tmp/good_sequences_1.fastq"
638 label="${tool.name} on ${on_string}: Good sequences for R1" > 630 label="${tool.name} on ${on_string}: Good sequences for R1" >
639 <filter>seq_type['seq_type_opt'] == "paired"</filter> 631 <filter>seq_type['seq_type_opt'] == "paired"</filter>
640 </data> 632 </data>
641 <data format="fastq" name="good_sequences_1_singletons_file" 633 <data format="fastq" name="good_sequences_1_singletons_file" from_work_dir="tmp/good_sequences_1_singletons.fastq"
642 from_work_dir="tmp/good_sequences_1_singletons.fastq"
643 label="${tool.name} on ${on_string}: Good singleton sequences for R1" > 634 label="${tool.name} on ${on_string}: Good singleton sequences for R1" >
644 <filter>seq_type['seq_type_opt'] == "paired"</filter> 635 <filter>seq_type['seq_type_opt'] == "paired"</filter>
645 </data> 636 </data>
646 <data format="fastq" name="rejected_sequence_1_file" 637 <data format="fastq" name="rejected_sequence_1_file" from_work_dir="tmp/rejected_sequences_1.fastq"
647 from_work_dir="tmp/rejected_sequences_1.fastq"
648 label="${tool.name} on ${on_string}: Rejected sequences for R1" > 638 label="${tool.name} on ${on_string}: Rejected sequences for R1" >
649 <filter>seq_type['seq_type_opt'] == "paired"</filter> 639 <filter>seq_type['seq_type_opt'] == "paired"</filter>
650 </data> 640 </data>
651 <data format="fastq" name="good_sequences_2_file" 641 <data format="fastq" name="good_sequences_2_file" from_work_dir="tmp/good_sequences_2.fastq"
652 from_work_dir="tmp/good_sequences_2.fastq"
653 label="${tool.name} on ${on_string}: Good sequences for R2" > 642 label="${tool.name} on ${on_string}: Good sequences for R2" >
654 <filter>seq_type['seq_type_opt'] == "paired"</filter> 643 <filter>seq_type['seq_type_opt'] == "paired"</filter>
655 </data> 644 </data>
656 <data format="fastq" name="good_sequences_2_singletons_file" 645 <data format="fastq" name="good_sequences_2_singletons_file" from_work_dir="tmp/good_sequences_2_singletons.fastq"
657 from_work_dir="tmp/good_sequences_2_singletons.fastq"
658 label="${tool.name} on ${on_string}: Good singleton sequences for R2" > 646 label="${tool.name} on ${on_string}: Good singleton sequences for R2" >
659 <filter>seq_type['seq_type_opt'] == "paired"</filter> 647 <filter>seq_type['seq_type_opt'] == "paired"</filter>
660 </data> 648 </data>
661 <data format="fastq" name="rejected_sequence_2_file" 649 <data format="fastq" name="rejected_sequence_2_file" from_work_dir="tmp/rejected_sequences_2.fastq"
662 from_work_dir="tmp/rejected_sequences_2.fastq"
663 label="${tool.name} on ${on_string}: Rejected sequences for R2" > 650 label="${tool.name} on ${on_string}: Rejected sequences for R2" >
664 <filter>seq_type['seq_type_opt'] == "paired"</filter> 651 <filter>seq_type['seq_type_opt'] == "paired"</filter>
665 </data> 652 </data>
666 653
667 <!--<data format="html" name="html_file" 654 <!--<data format="html" name="html_file" from_work_dir="stats_html.html"
668 from_work_dir="stats_html.html"
669 label="${tool.name} on ${on_string}: Summary" />--> 655 label="${tool.name} on ${on_string}: Summary" />-->
670 </outputs> 656 </outputs>
671 657
672 <tests> 658 <tests>
673 <test> 659 <test>
674 <param name='seq_type_opt' value="single"/> 660 <param name='seq_type_opt' value="single"/>
675 <param name="input_singles" value="prinseq_input_sequences.fastq"/> 661 <param name="input_singles" value="prinseq_input_sequences.fastq"/>
676 <param name='apply_filter_treatments' value="true"/> 662 <param name='apply_filter_treatments' value="true"/>
677 <param name='apply_length_filter_treatments' value="true"/> 663 <param name='apply_length_filter_treatments' value="true"/>
678 <param name='apply_min_length_filter_treatments' value="true"/> 664 <param name='apply_min_length_filter_treatments' value="true"/>
679 <param name="min_length_filter_treatment_value" value="60"/> 665 <param name="min_length_filter_treatment_value" value="60"/>
680 <param name='apply_max_length_filter_treatments' value="false" /> 666 <param name='apply_max_length_filter_treatments' value="false" />
681 <param name='apply_quality_filter_treatments' value="true"/> 667 <param name='apply_quality_filter_treatments' value="true"/>
682 <param name='apply_min_quality_filter_treatments' value="false" /> 668 <param name='apply_min_quality_filter_treatments' value="false" />
683 <param name='apply_max_quality_filter_treatments' value="false"/> 669 <param name='apply_max_quality_filter_treatments' value="false"/>
686 <param name="min_mean_quality_filter_treatment_value" value="15"/> 672 <param name="min_mean_quality_filter_treatment_value" value="15"/>
687 <param name='apply_max_mean_quality_filter_treatments' value="false"/> 673 <param name='apply_max_mean_quality_filter_treatments' value="false"/>
688 <param name='apply_base_content_filter_treatments' value="true"/> 674 <param name='apply_base_content_filter_treatments' value="true"/>
689 <param name='apply_GC_perc_content_filter_treatments' value="false"/> 675 <param name='apply_GC_perc_content_filter_treatments' value="false"/>
690 <param name='apply_N_number_content_filter_treatments' value="false"/> 676 <param name='apply_N_number_content_filter_treatments' value="false"/>
691 <param name='apply_N_percentage_content_filter_treatments' value="true"/> 677 <param name='apply_N_percentage_content_filter_treatments' value="true"/>
692 <param name="N_percentage_content_filter_treatment_value" value="2"/> 678 <param name="N_percentage_content_filter_treatment_value" value="2"/>
693 <param name='apply_other_base_content_filter_treatments' value="false"/> 679 <param name='apply_other_base_content_filter_treatments' value="false"/>
694 <param name='apply_complexity_filter_treatments' value="false"/> 680 <param name='apply_complexity_filter_treatments' value="false"/>
695 <param name='apply_trimming_treatments' value="true" /> 681 <param name='apply_trimming_treatments' value="true" />
696 <param name='apply_length_trimming_treatments' value="false"/> 682 <param name='apply_length_trimming_treatments' value="false"/>
708 <output name="good_sequence_file" file="prinseq_good_sequences.fastq"/> 694 <output name="good_sequence_file" file="prinseq_good_sequences.fastq"/>
709 </test> 695 </test>
710 </tests> 696 </tests>
711 697
712 <help><![CDATA[ 698 <help><![CDATA[
713
714 **What it does** 699 **What it does**
715 700
716 PRINSEQ is a tool for easy and rapid quality control and data processing of metagenomic and metatranscriptomic datasets. 701 PRINSEQ is a tool for easy and rapid quality control and data processing of metagenomic and metatranscriptomic datasets.
717 This tool allow to process the sequences with filtering and trimming. 702 This tool allow to process the sequences with filtering and trimming.
718 More information on `PRINSEQ manual <http://prinseq.sourceforge.net/manual.html>`_. 703 More information on `PRINSEQ manual <http://prinseq.sourceforge.net/manual.html>`_.
719 704
720 ----- 705 -----
721 706
722 **Input** 707 **Input**
734 719
735 The parameters are numerous in PRINSEQ given the wanted treatments. 720 The parameters are numerous in PRINSEQ given the wanted treatments.
736 721
737 Several filter treatments are proposed: 722 Several filter treatments are proposed:
738 723
739 - Filters based on sequence length 724 - Filters based on sequence length
740 - Filters based on quality score 725 - Filters based on quality score
741 - Filters based on base content 726 - Filters based on base content
742 727
743 And several trimming treatments eliminate sequence parts: 728 And several trimming treatments eliminate sequence parts:
744 729
752 737
753 **Output** 738 **Output**
754 739
755 The output file is a sequence file with sequences and quality from input file 740 The output file is a sequence file with sequences and quality from input file
756 which have undergone filter and trimming. 741 which have undergone filter and trimming.
757
758 ]]> 742 ]]>
759 </help> 743 </help>
760 744
761 <citations> 745 <citations>
762 <citation type="doi">10.1093/bioinformatics/btr026</citation> 746 <citation type="doi">10.1093/bioinformatics/btr026</citation>