Mercurial > repos > iuc > prinseq
diff prinseq.xml @ 5:1ee282794de3 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/prinseq/ commit 34e8262534e22f0d391a81b06374744c4af8da24"
author | iuc |
---|---|
date | Sun, 20 Mar 2022 10:51:09 +0000 |
parents | 654b3a274ed5 |
children |
line wrap: on
line diff
--- a/prinseq.xml Wed Jul 07 09:22:27 2021 +0000 +++ b/prinseq.xml Sun Mar 20 10:51:09 2022 +0000 @@ -1,5 +1,8 @@ -<tool id="prinseq" name="PRINSEQ" version="@TOOL_VERSION+galaxy1"> +<tool id="prinseq" name="PRINSEQ" version="@TOOL_VERSION+galaxy2" profile="20.05"> <description>to process quality of sequences</description> + <xrefs> + <xref type="bio.tools">prinseq</xref> + </xrefs> <macros> <token name="@TOOL_VERSION">0.20.4</token> </macros> @@ -8,7 +11,6 @@ </requirements> <stdio> - <exit_code range="1:" level="fatal" description="" /> <regex match="ERROR" source="stderr" level="fatal" @@ -25,36 +27,57 @@ ]]> </version_command> - <command> + <command detect_errors="exit_code"> <![CDATA[ - mkdir tmp/ + mkdir tmp/ && + + #if $seq_type.seq_type_opt == "single" + #set fwd = $seq_type.input_singles + #set rev = None + #else if $seq_type.seq_type_opt == "paired" + #set fwd = $seq_type.input_mate1 + #set rev = $seq_type.input_mate2 + #else + #set fwd = $seq_type.input_collection.forward + #set rev = $seq_type.input_collection.reverse + #end if - && + #if $rev and $fwd.ext != $rev.ext: + >&2 echo 'Both pairs from your paired-end library need to be from the same filetype.' && + exit 1; + #end if + + #if $fwd.ext.endswith(".gz") + gunzip -c '$fwd' > fwd.fastq && + #else + ln -s '$fwd' fwd.fastq && + #end if + + #if $rev + #if $rev.ext.endswith(".gz") + gunzip -c '$rev' > rev.fastq && + #else + ln -s '$rev' rev.fastq && + #end if + #end if + + ## create empty output files + #if $seq_type.seq_type_opt == "single" + touch tmp/good_sequences.fastq tmp/rejected_sequences.fastq && + #else + touch tmp/good_sequences_1.fastq tmp/good_sequences_1_singletons.fastq tmp/rejected_sequences_1.fastq && + touch tmp/good_sequences_2.fastq tmp/good_sequences_2_singletons.fastq tmp/rejected_sequences_2.fastq && + #end if + prinseq-lite.pl - #if $seq_type.seq_type_opt == "single": - -fastq '$seq_type.input_singles' - #if $seq_type.input_singles.is_of_type('fastqillumina'): - -phred64 - #end if - #elif $seq_type.seq_type_opt == "paired": - -fastq '$seq_type.input_mate1' - -fastq2 '$seq_type.input_mate2' - #if $seq_type.input_mate1.ext != $seq_type.input_mate2.ext: - #import sys - #silent sys.stderr.write( 'Both pairs from your paired-end library need to be from the same filetype.' ) - #end if - #if $seq_type.input_mate1.is_of_type('fastqillumina'): - -phred64 - #end if - #else - -fastq '$seq_type.input_collection.forward' - -fastq2 '$seq_type.input_collection.reverse' - #if $seq_type.input_collection.forward.is_of_type('fastqillumina'): - -phred64 - #end if + -fastq fwd.fastq + #if $rev + -fastq2 rev.fastq #end if - + #if $fwd.ext.startswith('fastqillumina'): + -phred64 + #end if -out_good "tmp/good_sequences" -out_bad "tmp/rejected_sequences" @@ -226,9 +249,16 @@ prinseq-graphs-noPCA.pl -i "tmp/stats.gd" -html_all -o stats_html *# + + #if $fwd.ext.endswith('.gz') + && for f in tmp/*.fastq; + do + gzip -c \$f > tmp_file && + mv tmp_file \$f; + done + #end if ]]> </command> - <inputs> <conditional name="seq_type"> <param name="seq_type_opt" type="select" label="Is this library paired- or single-end?"> @@ -237,11 +267,11 @@ <option value="paired_collection">Paired Collection</option> </param> <when value="single"> - <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> + <param name="input_singles" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="FASTQ file" help="FASTQ files." /> </when> <when value="paired"> - <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> - <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq" label="FASTQ file" help="FASTQ files." /> + <param name="input_mate1" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="FASTQ file" help="FASTQ files." /> + <param name="input_mate2" type="data" format="fastqsanger,fastqsanger.gz,fastqillumina,fastqillumina.gz" label="FASTQ file" help="FASTQ files." /> </when> <when value="paired_collection"> <param name="input_collection" type="data_collection" collection_type="paired" label="FASTQ collection" help="FASTQ data in a paired collection" /> @@ -630,49 +660,55 @@ </inputs> <outputs> - <data format_source="input_singles" name="good_sequence_file" from_work_dir="tmp/good_sequences.fastq" + <data name="good_sequence_file" format_source="input_singles" from_work_dir="tmp/good_sequences.fastq" label="${tool.name} on ${on_string}: Good sequences" > <filter>seq_type['seq_type_opt'] == "single"</filter> </data> - <data format_source="input_singles" name="rejected_sequence_file" from_work_dir="tmp/rejected_sequences.fastq" + <data name="rejected_sequence_file" format_source="input_singles" from_work_dir="tmp/rejected_sequences.fastq" label="${tool.name} on ${on_string}: Rejected sequences" > <filter>seq_type['seq_type_opt'] == "single"</filter> </data> - <data format_source="input_mate1" name="good_sequences_1_file" from_work_dir="tmp/good_sequences_1.fastq" + <data name="good_sequences_1_file" format_source="input_mate1" from_work_dir="tmp/good_sequences_1.fastq" label="${tool.name} on ${on_string}: Good sequences for R1" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <data format_source="input_mate1" name="good_sequences_1_singletons_file" from_work_dir="tmp/good_sequences_1_singletons.fastq" + <data name="good_sequences_1_singletons_file" format_source="input_mate1" from_work_dir="tmp/good_sequences_1_singletons.fastq" label="${tool.name} on ${on_string}: Good singleton sequences for R1" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <data format_source="input_mate1" name="rejected_sequence_1_file" from_work_dir="tmp/rejected_sequences_1.fastq" + <data name="rejected_sequence_1_file" format_source="input_mate1" from_work_dir="tmp/rejected_sequences_1.fastq" label="${tool.name} on ${on_string}: Rejected sequences for R1" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <data format_source="input_mate2" name="good_sequences_2_file" from_work_dir="tmp/good_sequences_2.fastq" + <data name="good_sequences_2_file" format_source="input_mate2" from_work_dir="tmp/good_sequences_2.fastq" label="${tool.name} on ${on_string}: Good sequences for R2" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <data format_source="input_mate2" name="good_sequences_2_singletons_file" from_work_dir="tmp/good_sequences_2_singletons.fastq" + <data name="good_sequences_2_singletons_file" format_source="input_mate2" from_work_dir="tmp/good_sequences_2_singletons.fastq" label="${tool.name} on ${on_string}: Good singleton sequences for R2" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <data format_source="input_mate2" name="rejected_sequence_2_file" from_work_dir="tmp/rejected_sequences_2.fastq" + <data name="rejected_sequence_2_file" format_source="input_mate2" from_work_dir="tmp/rejected_sequences_2.fastq" label="${tool.name} on ${on_string}: Rejected sequences for R2" > <filter>seq_type['seq_type_opt'] == "paired"</filter> </data> - <collection name="good_sequences_collection" type="paired"> + <collection name="good_sequences_collection" format_source="input_collection" type="paired"> + <data name="forward" from_work_dir="tmp/good_sequences_1.fastq"/> + <data name="reverse" from_work_dir="tmp/good_sequences_2.fastq"/> <filter>seq_type['seq_type_opt'] == "paired_collection"</filter> </collection> - <collection name="singletons_collection" type="paired"> + <collection name="singletons_collection" format_source="input_collection" type="paired"> + <data name="forward" from_work_dir="tmp/good_sequences_1_singletons.fastq"/> + <data name="reverse" from_work_dir="tmp/good_sequences_2_singletons.fastq"/> <filter>seq_type['seq_type_opt'] == "paired_collection"</filter> </collection> - <collection name="rejected_sequences_collection" type="paired"> + <collection name="rejected_sequences_collection" format_source="input_collection" type="paired"> + <data name="forward" from_work_dir="tmp/rejected_sequences_1.fastq"/> + <data name="reverse" from_work_dir="tmp/rejected_sequences_2.fastq"/> <filter>seq_type['seq_type_opt'] == "paired_collection"</filter> </collection> @@ -681,9 +717,9 @@ </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name='seq_type_opt' value="single"/> - <param name="input_singles" value="prinseq_input_sequences.fastq" ftype="fastqsanger"/> + <param name="input_singles" value="prinseq_input_sequences.fastq.gz" ftype="fastqsanger.gz"/> <param name='apply_filter_treatments' value="true"/> <param name='apply_length_filter_treatments' value="true"/> <param name='apply_min_length_filter_treatments' value="true"/> @@ -715,8 +751,158 @@ <param name="rule_quality_trimming_treatments" value="lt" /> <param name="window_quality_trimming_treatments" value="1"/> <param name="step_quality_trimming_treatments" value="1"/> - - <output name="good_sequence_file" file="prinseq_good_sequences.fastq" ftype="fastqsanger"/> + <output name="good_sequence_file" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="11219" delta="1000"/> + </assert_contents> + </output> + <output name="rejected_sequence_file" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="14208" delta="1000"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="6"> + <param name='seq_type_opt' value="paired"/> + <param name="input_mate1" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq" ftype="fastqsanger"/> + <param name="input_mate2" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq" ftype="fastqsanger"/> + <param name='apply_filter_treatments' value="true"/> + <param name='apply_length_filter_treatments' value="true"/> + <param name='apply_min_length_filter_treatments' value="true"/> + <param name="min_length_filter_treatment_value" value="50"/> + <param name='apply_max_length_filter_treatments' value="false" /> + <param name='apply_quality_filter_treatments' value="true"/> + <param name='apply_min_quality_filter_treatments' value="false" /> + <param name='apply_max_quality_filter_treatments' value="false"/> + <param name='apply_mean_quality_filter_treatments' value="true"/> + <param name='apply_min_mean_quality_filter_treatments' value="true"/> + <param name="min_mean_quality_filter_treatment_value" value="15"/> + <param name='apply_max_mean_quality_filter_treatments' value="false"/> + <param name='apply_base_content_filter_treatments' value="true"/> + <param name='apply_GC_perc_content_filter_treatments' value="false"/> + <param name='apply_N_number_content_filter_treatments' value="false"/> + <param name='apply_N_percentage_content_filter_treatments' value="true"/> + <param name="N_percentage_content_filter_treatment_value" value="2"/> + <param name='apply_other_base_content_filter_treatments' value="false"/> + <param name='apply_complexity_filter_treatments' value="false"/> + <param name='apply_trimming_treatments' value="true" /> + <param name='apply_length_trimming_treatments' value="false"/> + <param name='apply_position_trimming_treatments' value="false"/> + <param name='apply_tail_trimming_treatments' value="false"/> + <param name='apply_quality_trimming_treatments' value="true"/> + <param name='apply_left_quality_trimming_treatments' value="false"/> + <param name='apply_right_quality_trimming_treatments' value="true" /> + <param name="right_quality_trimming_treatment_value" value="20"/> + <param name="type_quality_trimming_treatments" value="min"/> + <param name="rule_quality_trimming_treatments" value="lt" /> + <param name="window_quality_trimming_treatments" value="1"/> + <param name="step_quality_trimming_treatments" value="1"/> + <output name="good_sequences_1_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="36"/> + </assert_contents> + </output> + <output name="good_sequences_1_singletons_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="44"/> + </assert_contents> + </output> + <output name="rejected_sequence_1_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="good_sequences_2_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="36"/> + </assert_contents> + </output> + <output name="good_sequences_2_singletons_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="8"/> + </assert_contents> + </output> + <output name="rejected_sequence_2_file" ftype="fastqsanger"> + <assert_contents> + <has_n_lines n="36"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="9"> + <param name='seq_type_opt' value="paired_collection"/> + <param name="input_collection"> + <collection type="paired"> + <element name="forward" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq.gz" ftype="fastqsanger.gz"/> + <element name="reverse" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq.gz" ftype="fastqsanger.gz"/> + </collection> + </param> + <param name='apply_filter_treatments' value="true"/> + <param name='apply_length_filter_treatments' value="true"/> + <param name='apply_min_length_filter_treatments' value="true"/> + <param name="min_length_filter_treatment_value" value="50"/> + <param name='apply_max_length_filter_treatments' value="false" /> + <param name='apply_quality_filter_treatments' value="true"/> + <param name='apply_min_quality_filter_treatments' value="false" /> + <param name='apply_max_quality_filter_treatments' value="false"/> + <param name='apply_mean_quality_filter_treatments' value="true"/> + <param name='apply_min_mean_quality_filter_treatments' value="true"/> + <param name="min_mean_quality_filter_treatment_value" value="15"/> + <param name='apply_max_mean_quality_filter_treatments' value="false"/> + <param name='apply_base_content_filter_treatments' value="true"/> + <param name='apply_GC_perc_content_filter_treatments' value="false"/> + <param name='apply_N_number_content_filter_treatments' value="false"/> + <param name='apply_N_percentage_content_filter_treatments' value="true"/> + <param name="N_percentage_content_filter_treatment_value" value="2"/> + <param name='apply_other_base_content_filter_treatments' value="false"/> + <param name='apply_complexity_filter_treatments' value="false"/> + <param name='apply_trimming_treatments' value="true" /> + <param name='apply_length_trimming_treatments' value="false"/> + <param name='apply_position_trimming_treatments' value="false"/> + <param name='apply_tail_trimming_treatments' value="false"/> + <param name='apply_quality_trimming_treatments' value="true"/> + <param name='apply_left_quality_trimming_treatments' value="false"/> + <param name='apply_right_quality_trimming_treatments' value="true" /> + <param name="right_quality_trimming_treatment_value" value="20"/> + <param name="type_quality_trimming_treatments" value="min"/> + <param name="rule_quality_trimming_treatments" value="lt" /> + <param name="window_quality_trimming_treatments" value="1"/> + <param name="step_quality_trimming_treatments" value="1"/> + <output_collection name="good_sequences_collection" type="paired"> + <element name="forward" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="605" delta="100"/> + </assert_contents> + </element> + <element name="reverse" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="667" delta="100"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="singletons_collection" type="paired"> + <element name="forward" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="720" delta="100"/> + </assert_contents> + </element> + <element name="reverse" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="219" delta="100"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="rejected_sequences_collection" type="paired"> + <element name="forward" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="0" delta="0"/> + </assert_contents> + </element> + <element name="reverse" ftype="fastqsanger.gz"> + <assert_contents> + <has_size value="718" delta="100"/> + </assert_contents> + </element> + </output_collection> </test> </tests>