diff split_libraries_fastq.xml @ 4:1327fee2bf93 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit 3b54163c4f7daff76fcc589c4a9057bb03904380
author iuc
date Sat, 05 Aug 2017 07:23:17 -0400
parents 20194da2549d
children c2ffcfff57f6
line wrap: on
line diff
--- a/split_libraries_fastq.xml	Mon Jul 10 16:45:26 2017 -0400
+++ b/split_libraries_fastq.xml	Sat Aug 05 07:23:17 2017 -0400
@@ -1,76 +1,80 @@
 <tool id="qiime_split_libraries_fastq" name="Split fastq libraries" version="@WRAPPER_VERSION@.0">
-    <description>to performs demultiplexing of Fastq sequence data</description>
+    <description> to performs demultiplexing of Fastq sequence data (split_libraries_fastq)</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
     <version_command>split_libraries_fastq.py --version</version_command>
     <command detect_errors="aggressive"><![CDATA[
-        split_libraries_fastq.py
-            #set $seq_files = ''
-            #set $sep = ''
-            #for $file in $sequence_read_fps
-                #set $seq_files += $sep + str($file)
-                #set $sep = ','
-            #end for
-            --sequence_read_fps '$seq_files'
+split_libraries_fastq.py
+    #set $seq_files = ''
+    #set $sep = ''
+    #for $file in $sequence_read_fps
+        #set $seq_files += $sep + str($file)
+        #set $sep = ','
+    #end for
+    --sequence_read_fps '$seq_files'
 
-            -o split_libraries
+    -o split_libraries
 
-            #set $mapping_files = ''
-            #set $sep = ''
-            #for $file in $mapping_fps
-                #set $mapping_files += $sep + str($file)
-                #set $sep = ','
-            #end for
-            --mapping_fps '$mapping_files'
+    #set $mapping_files = ''
+    #set $sep = ''
+    #for $file in $mapping_fps
+        #set $mapping_files += $sep + str($file)
+        #set $sep = ','
+    #end for
+    #if $mapping_files != 'None'
+        --mapping_fps '$mapping_files'
+    #end if
 
-            #set $barcode_files = ''
-            #set $sep = ''
-            #for $file in $barcode_read_fps
-                #set $barcode_files += $sep + str($file)
-                #set $sep = ','
-            #end for
-            --barcode_read_fps '$barcode_files'
-
-            $store_qual_scores
-            #if str($sample_ids):
-                --sample_ids '$sample_ids'
-            #end if
-            $store_demultiplexed_fastq
-            $retain_unassigned_reads
+    #set $barcode_files = ''
+    #set $sep = ''
+    #for $file in $barcode_read_fps
+        #set $barcode_files += $sep + str($file)
+        #set $sep = ','
+    #end for
+    #if $barcode_files != 'None'
+        --barcode_read_fps '$barcode_files'
+    #end if
 
-            --max_bad_run_length '$max_bad_run_length'
-            --min_per_read_length_fraction '$min_per_read_length_fraction'
-            --sequence_max_n '$sequence_max_n'
-            --start_seq_id '$start_seq_id'
-            $rev_comp_barcode
-            $rev_comp_mapping_barcodes
-            $rev_comp
-            --phred_quality_threshold '$phred_quality_threshold'
-            #if str( $barcode.barcode_type ) != "custom_length"
-                --barcode_type '$barcode.barcode_type'
-            #else
-                --barcode_type '$barcode.barcode_length'
-            #end if
-            --max_barcode_errors '$max_barcode_errors'
-            $phred_offset
+    $store_qual_scores
+    #if str($sample_ids) != ''
+        --sample_ids '$sample_ids'
+    #end if
+    $store_demultiplexed_fastq
+    $retain_unassigned_reads
+
+    --max_bad_run_length '$max_bad_run_length'
+    --min_per_read_length_fraction '$min_per_read_length_fraction'
+    --sequence_max_n '$sequence_max_n'
+    --start_seq_id '$start_seq_id'
+    $rev_comp_barcode
+    $rev_comp_mapping_barcodes
+    $rev_comp
+    --phred_quality_threshold '$phred_quality_threshold'
+    #if str( $barcode.barcode_type ) != "custom_length"
+        --barcode_type '$barcode.barcode_type'
+    #else
+        --barcode_type '$barcode.barcode_length'
+    #end if
+    --max_barcode_errors '$max_barcode_errors'
+    $phred_offset
     ]]></command>
     <inputs>
-        <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Input fastq files" multiple="True"/>
-        <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" label="Metadata mapping files (optional)" multiple="True" optional="True"/>
-        <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Barcode read files (optional)" multiple="True" optional="True"/>
-        <param argument="--store_qual_scores" type="boolean" label="Store quality strings in files?" truevalue="--store_qual_scores" falsevalue="" checked="False"/>
-        <param argument="--sample_ids" type="text" label="Comma-separated list of samples ids to be applied to all sequences (optional)" optional="True" help="It must be one per input file path (used when data is not multiplexed)"/>
-        <param argument="--store_demultiplexed_fastq" type="boolean" label="Write demultiplexed fastq files?" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="False"/>
-        <param argument="--retain_unassigned_reads" type="boolean" label="Retain sequences which don’t map to a barcode in the mapping file?" truevalue="--retain_unassigned_reads" falsevalue="" checked="False" help="Sample ID will be 'Unassigned'"/>
+        <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" label="Input fastq files"/>
+        <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" multiple="true" optional="true" label="Metadata mapping files"/>
+        <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" optional="true" label="Barcode read files"/>
+        <param argument="--store_qual_scores" type="boolean" truevalue="--store_qual_scores" falsevalue="" checked="false" label="Store quality strings in files?"/>
+        <param argument="--sample_ids" type="text" optional="true" label="Comma-separated list of samples ids to be applied to all sequences" help="It must be one per input file path (used when data is not multiplexed)"/>
+        <param argument="--store_demultiplexed_fastq" type="boolean" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="false" label="Write demultiplexed fastq files?"/>
+        <param argument="--retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false"  label="Retain sequences which don’t map to a barcode in the mapping file?" help="Sample ID will be 'Unassigned'"/>
         <param argument="--max_bad_run_length" type="integer" value="3" label="Maximum number of consecutive low quality base calls allowed before truncating a read"/>
         <param argument="--min_per_read_length_fraction" type="float" value="0.75" label="Minimum number of consecutive high quality base calls to include a read (per single end read) as a fraction of the input read length"/>
         <param argument="--sequence_max_n" type="integer" value="0" label="Maximum number of N characters allowed in a sequence to retain it" help="This is applied after quality trimming, and is total over combined paired end reads if applicable"/>
         <param argument="--start_seq_id" type="integer" value="0" label="Start seq_ids as ascending integers beginning with start_seq_id"/>
-        <param argument="--rev_comp_barcode" type="boolean" label="Reverse complement barcode reads before lookup?" truevalue="--rev_comp_barcode" falsevalue="" checked="False"/>
-        <param argument="--rev_comp_mapping_barcodes" type="boolean" label="Reverse complement barcode in mapping before lookup?" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="False" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/>
-        <param argument="--rev_comp" type="boolean" label="Reverse omplement sequence before writing to output file?" truevalue="--rev_comp" falsevalue="" checked="False"/>
+        <param argument="--rev_comp_barcode" type="boolean" truevalue="--rev_comp_barcode" falsevalue="" checked="false" label="Reverse complement barcode reads before lookup?"/>
+        <param argument="--rev_comp_mapping_barcodes" type="boolean" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="false" label="Reverse complement barcode in mapping before lookup?" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/>
+        <param argument="--rev_comp" type="boolean" truevalue="--rev_comp" falsevalue="" checked="false" label="Reverse complement sequence before writing to output file?"/>
         <param argument="--phred_quality_threshold" type="integer" value="3" label="Maximum unacceptable Phred quality score" help="E.g., for Q20 and better, 19 must be specified"/>
         <conditional name="barcode">
             <param argument="--barcode_type" type="select" label="Type of barcode">
@@ -108,9 +112,50 @@
     </outputs>
     <tests>
         <test>
-            <param name="sequence_read_fps" value="split_libraries_fastq/forward_reads.fastq"/>
-            <param name="mapping_fps" value="split_libraries_fastq/map.tsv"/>
-            <param name="barcode_read_fps" value="split_libraries_fastq/barcodes.fastq"/>
+            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
+            <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
+            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
+            <param name="store_qual_scores" value=""/>
+            <param name="store_demultiplexed_fastq" value=""/>
+            <param name="retain_unassigned_reads" value=""/>
+            <param name="max_bad_run_length" value="3"/>
+            <param name="min_per_read_length_fraction" value="0.75"/>
+            <param name="sequence_max_n" value="0"/>
+            <param name="start_seq_id" value="0"/>
+            <param name="rev_comp_barcode" value=""/>
+            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
+            <param name="rev_comp" value=""/>
+            <param name="phred_quality_threshold" value="19"/>
+            <conditional name="barcode">
+                <param name="barcode_type" value="golay_12"/>
+            </conditional>
+            <param name="max_barcode_errors" value="1.5"/>
+            <param name="phred_offset" value=""/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="Median sequence length: 151.00"></has_text>
+                    <has_text text="s41"></has_text>
+                    <has_text text="s122"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
+                    <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="histograms">
+                <assert_contents>
+                    <has_text text="Length"></has_text>
+                    <has_text text="127.0"></has_text>
+                    <has_text text="157.0"></has_text>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
+            <param name="mapping_fps" value="split_libraries_fastq/map.txt"/>
+            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/>
             <param name="store_qual_scores" value="--store_qual_scores"/>
             <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq"/>
             <param name="retain_unassigned_reads" value=""/>
@@ -119,23 +164,165 @@
             <param name="sequence_max_n" value="0"/>
             <param name="start_seq_id" value="0"/>
             <param name="rev_comp_barcode" value=""/>
-            <param name="rev_comp_mapping_barcodes" value=""/>
+            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
             <param name="rev_comp" value=""/>
-            <param name="barcode_selector" value="golay_12"/>
+            <param name="phred_quality_threshold" value="19"/>
+            <conditional name="barcode">
+                <param name="barcode_type" value="golay_12"/>
+            </conditional>
+            <param name="max_barcode_errors" value="1.5"/>
+            <param name="phred_offset" value=""/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="Median sequence length: 151.00"></has_text>
+                    <has_text text="s41"></has_text>
+                    <has_text text="s122"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
+                    <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs_fastq" ftype="fastq">
+                <assert_contents>
+                    <has_text text="@s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
+                    <has_text text="@s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs_qual" ftype="qual">
+                <assert_contents>
+                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
+                    <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="histograms">
+                <assert_contents>
+                    <has_text text="Length"></has_text>
+                    <has_text text="127.0"></has_text>
+                    <has_text text="157.0"></has_text>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
+            <param name="mapping_fps" value="split_libraries_fastq/map.txt,split_libraries_fastq/map.txt"/>
+            <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz,split_libraries_fastq/lane2_barcode.fastq.gz"/>
+            <param name="store_qual_scores" value=""/>
+            <param name="store_demultiplexed_fastq" value=""/>
+            <param name="retain_unassigned_reads" value=""/>
+            <param name="max_bad_run_length" value="3"/>
+            <param name="min_per_read_length_fraction" value="0.75"/>
+            <param name="sequence_max_n" value="0"/>
+            <param name="start_seq_id" value="0"/>
+            <param name="rev_comp_barcode" value=""/>
+            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
+            <param name="rev_comp" value=""/>
+            <param name="phred_quality_threshold" value="19"/>
+            <conditional name="barcode">
+                <param name="barcode_type" value="golay_12"/>
+            </conditional>
             <param name="max_barcode_errors" value="1.5"/>
             <param name="phred_offset" value=""/>
             <output name="log">
                 <assert_contents>
-                    <has_line line="Median sequence length: 132.50"></has_line>
-                    <has_text text="L1S76"></has_text>
-                    <has_text text="L1S281"></has_text>
-                    <has_text text="L1S8"></has_text>
+                    <has_text text="Median sequence length: 151.00"></has_text>
+                    <has_text text="s41"></has_text>
+                    <has_text text="s122"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text>
+                    <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="histograms">
+                <assert_contents>
+                    <has_text text="Length"></has_text>
+                    <has_text text="127.0"></has_text>
+                    <has_text text="157.0"></has_text>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/>
+            <param name="store_qual_scores" value=""/>
+            <param name="sample_ids" value="my.sample.1"/>
+            <param name="store_demultiplexed_fastq" value=""/>
+            <param name="retain_unassigned_reads" value=""/>
+            <param name="max_bad_run_length" value="3"/>
+            <param name="min_per_read_length_fraction" value="0.75"/>
+            <param name="sequence_max_n" value="0"/>
+            <param name="start_seq_id" value="0"/>
+            <param name="rev_comp_barcode" value=""/>
+            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
+            <param name="rev_comp" value=""/>
+            <param name="phred_quality_threshold" value="19"/>
+            <conditional name="barcode">
+                <param name="barcode_type" value="not-barcoded"/>
+            </conditional>
+            <param name="phred_offset" value=""/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="Median sequence length: 151.00"></has_text>
+                    <has_text text="my.sample.1"></has_text>
+                    <has_text text="Total number seqs written"></has_text>
                 </assert_contents>
             </output>
-            <output name="seqs" file="split_libraries_fastq/sequences.fasta"/>
-            <output name="histograms" file="split_libraries_fastq/histograms.tabular"/>
-            <output name="seqs_qual" file="split_libraries_fastq/sequence_qualities.qual"/>
-            <output name="seqs_fastq" file="split_libraries_fastq/demultiplexed_sequences.fastq"/>
+            <output name="seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
+                    <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="histograms">
+                <assert_contents>
+                    <has_text text="Length"></has_text>
+                    <has_text text="124.0"></has_text>
+                    <has_text text="154.0"></has_text>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/>
+            <param name="store_qual_scores" value=""/>
+            <param name="sample_ids" value="my.sample.1,my.sample.2"/>
+            <param name="store_demultiplexed_fastq" value=""/>
+            <param name="retain_unassigned_reads" value=""/>
+            <param name="max_bad_run_length" value="3"/>
+            <param name="min_per_read_length_fraction" value="0.75"/>
+            <param name="sequence_max_n" value="0"/>
+            <param name="start_seq_id" value="0"/>
+            <param name="rev_comp_barcode" value=""/>
+            <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/>
+            <param name="rev_comp" value=""/>
+            <param name="phred_quality_threshold" value="19"/>
+            <conditional name="barcode">
+                <param name="barcode_type" value="not-barcoded"/>
+            </conditional>
+            <param name="phred_offset" value=""/>
+            <output name="log">
+                <assert_contents>
+                    <has_text text="Median sequence length: 151.00"></has_text>
+                    <has_text text="my.sample.1"></has_text>
+                    <has_text text="Total number seqs written"></has_text>
+                </assert_contents>
+            </output>
+            <output name="seqs" ftype="fasta">
+                <assert_contents>
+                    <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
+                    <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text>
+                </assert_contents>
+            </output>
+            <output name="histograms">
+                <assert_contents>
+                    <has_text text="Length"></has_text>
+                    <has_text text="124.0"></has_text>
+                    <has_text text="154.0"></has_text>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[