Mercurial > repos > iuc > qiime_split_libraries_fastq
diff split_libraries_fastq.xml @ 4:1327fee2bf93 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime/ commit 3b54163c4f7daff76fcc589c4a9057bb03904380
author | iuc |
---|---|
date | Sat, 05 Aug 2017 07:23:17 -0400 |
parents | 20194da2549d |
children | c2ffcfff57f6 |
line wrap: on
line diff
--- a/split_libraries_fastq.xml Mon Jul 10 16:45:26 2017 -0400 +++ b/split_libraries_fastq.xml Sat Aug 05 07:23:17 2017 -0400 @@ -1,76 +1,80 @@ <tool id="qiime_split_libraries_fastq" name="Split fastq libraries" version="@WRAPPER_VERSION@.0"> - <description>to performs demultiplexing of Fastq sequence data</description> + <description> to performs demultiplexing of Fastq sequence data (split_libraries_fastq)</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <version_command>split_libraries_fastq.py --version</version_command> <command detect_errors="aggressive"><![CDATA[ - split_libraries_fastq.py - #set $seq_files = '' - #set $sep = '' - #for $file in $sequence_read_fps - #set $seq_files += $sep + str($file) - #set $sep = ',' - #end for - --sequence_read_fps '$seq_files' +split_libraries_fastq.py + #set $seq_files = '' + #set $sep = '' + #for $file in $sequence_read_fps + #set $seq_files += $sep + str($file) + #set $sep = ',' + #end for + --sequence_read_fps '$seq_files' - -o split_libraries + -o split_libraries - #set $mapping_files = '' - #set $sep = '' - #for $file in $mapping_fps - #set $mapping_files += $sep + str($file) - #set $sep = ',' - #end for - --mapping_fps '$mapping_files' + #set $mapping_files = '' + #set $sep = '' + #for $file in $mapping_fps + #set $mapping_files += $sep + str($file) + #set $sep = ',' + #end for + #if $mapping_files != 'None' + --mapping_fps '$mapping_files' + #end if - #set $barcode_files = '' - #set $sep = '' - #for $file in $barcode_read_fps - #set $barcode_files += $sep + str($file) - #set $sep = ',' - #end for - --barcode_read_fps '$barcode_files' - - $store_qual_scores - #if str($sample_ids): - --sample_ids '$sample_ids' - #end if - $store_demultiplexed_fastq - $retain_unassigned_reads + #set $barcode_files = '' + #set $sep = '' + #for $file in $barcode_read_fps + #set $barcode_files += $sep + str($file) + #set $sep = ',' + #end for + #if $barcode_files != 'None' + --barcode_read_fps '$barcode_files' + #end if - --max_bad_run_length '$max_bad_run_length' - --min_per_read_length_fraction '$min_per_read_length_fraction' - --sequence_max_n '$sequence_max_n' - --start_seq_id '$start_seq_id' - $rev_comp_barcode - $rev_comp_mapping_barcodes - $rev_comp - --phred_quality_threshold '$phred_quality_threshold' - #if str( $barcode.barcode_type ) != "custom_length" - --barcode_type '$barcode.barcode_type' - #else - --barcode_type '$barcode.barcode_length' - #end if - --max_barcode_errors '$max_barcode_errors' - $phred_offset + $store_qual_scores + #if str($sample_ids) != '' + --sample_ids '$sample_ids' + #end if + $store_demultiplexed_fastq + $retain_unassigned_reads + + --max_bad_run_length '$max_bad_run_length' + --min_per_read_length_fraction '$min_per_read_length_fraction' + --sequence_max_n '$sequence_max_n' + --start_seq_id '$start_seq_id' + $rev_comp_barcode + $rev_comp_mapping_barcodes + $rev_comp + --phred_quality_threshold '$phred_quality_threshold' + #if str( $barcode.barcode_type ) != "custom_length" + --barcode_type '$barcode.barcode_type' + #else + --barcode_type '$barcode.barcode_length' + #end if + --max_barcode_errors '$max_barcode_errors' + $phred_offset ]]></command> <inputs> - <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Input fastq files" multiple="True"/> - <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" label="Metadata mapping files (optional)" multiple="True" optional="True"/> - <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" label="Barcode read files (optional)" multiple="True" optional="True"/> - <param argument="--store_qual_scores" type="boolean" label="Store quality strings in files?" truevalue="--store_qual_scores" falsevalue="" checked="False"/> - <param argument="--sample_ids" type="text" label="Comma-separated list of samples ids to be applied to all sequences (optional)" optional="True" help="It must be one per input file path (used when data is not multiplexed)"/> - <param argument="--store_demultiplexed_fastq" type="boolean" label="Write demultiplexed fastq files?" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="False"/> - <param argument="--retain_unassigned_reads" type="boolean" label="Retain sequences which don’t map to a barcode in the mapping file?" truevalue="--retain_unassigned_reads" falsevalue="" checked="False" help="Sample ID will be 'Unassigned'"/> + <param argument="--sequence_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" label="Input fastq files"/> + <param argument="--mapping_fps" type="data" format="txt,tabular,tsv,csv" multiple="true" optional="true" label="Metadata mapping files"/> + <param argument="--barcode_read_fps" type="data" format="fastq,fastqsanger,fastqsolexa" multiple="true" optional="true" label="Barcode read files"/> + <param argument="--store_qual_scores" type="boolean" truevalue="--store_qual_scores" falsevalue="" checked="false" label="Store quality strings in files?"/> + <param argument="--sample_ids" type="text" optional="true" label="Comma-separated list of samples ids to be applied to all sequences" help="It must be one per input file path (used when data is not multiplexed)"/> + <param argument="--store_demultiplexed_fastq" type="boolean" truevalue="--store_demultiplexed_fastq" falsevalue="" checked="false" label="Write demultiplexed fastq files?"/> + <param argument="--retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false" label="Retain sequences which don’t map to a barcode in the mapping file?" help="Sample ID will be 'Unassigned'"/> <param argument="--max_bad_run_length" type="integer" value="3" label="Maximum number of consecutive low quality base calls allowed before truncating a read"/> <param argument="--min_per_read_length_fraction" type="float" value="0.75" label="Minimum number of consecutive high quality base calls to include a read (per single end read) as a fraction of the input read length"/> <param argument="--sequence_max_n" type="integer" value="0" label="Maximum number of N characters allowed in a sequence to retain it" help="This is applied after quality trimming, and is total over combined paired end reads if applicable"/> <param argument="--start_seq_id" type="integer" value="0" label="Start seq_ids as ascending integers beginning with start_seq_id"/> - <param argument="--rev_comp_barcode" type="boolean" label="Reverse complement barcode reads before lookup?" truevalue="--rev_comp_barcode" falsevalue="" checked="False"/> - <param argument="--rev_comp_mapping_barcodes" type="boolean" label="Reverse complement barcode in mapping before lookup?" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="False" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/> - <param argument="--rev_comp" type="boolean" label="Reverse omplement sequence before writing to output file?" truevalue="--rev_comp" falsevalue="" checked="False"/> + <param argument="--rev_comp_barcode" type="boolean" truevalue="--rev_comp_barcode" falsevalue="" checked="false" label="Reverse complement barcode reads before lookup?"/> + <param argument="--rev_comp_mapping_barcodes" type="boolean" truevalue="--rev_comp_mapping_barcodes" falsevalue="" checked="false" label="Reverse complement barcode in mapping before lookup?" help="It is useful if barcodes in mapping file are reverse complements of golay codes"/> + <param argument="--rev_comp" type="boolean" truevalue="--rev_comp" falsevalue="" checked="false" label="Reverse complement sequence before writing to output file?"/> <param argument="--phred_quality_threshold" type="integer" value="3" label="Maximum unacceptable Phred quality score" help="E.g., for Q20 and better, 19 must be specified"/> <conditional name="barcode"> <param argument="--barcode_type" type="select" label="Type of barcode"> @@ -108,9 +112,50 @@ </outputs> <tests> <test> - <param name="sequence_read_fps" value="split_libraries_fastq/forward_reads.fastq"/> - <param name="mapping_fps" value="split_libraries_fastq/map.tsv"/> - <param name="barcode_read_fps" value="split_libraries_fastq/barcodes.fastq"/> + <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/> + <param name="mapping_fps" value="split_libraries_fastq/map.txt"/> + <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/> + <param name="store_qual_scores" value=""/> + <param name="store_demultiplexed_fastq" value=""/> + <param name="retain_unassigned_reads" value=""/> + <param name="max_bad_run_length" value="3"/> + <param name="min_per_read_length_fraction" value="0.75"/> + <param name="sequence_max_n" value="0"/> + <param name="start_seq_id" value="0"/> + <param name="rev_comp_barcode" value=""/> + <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/> + <param name="rev_comp" value=""/> + <param name="phred_quality_threshold" value="19"/> + <conditional name="barcode"> + <param name="barcode_type" value="golay_12"/> + </conditional> + <param name="max_barcode_errors" value="1.5"/> + <param name="phred_offset" value=""/> + <output name="log"> + <assert_contents> + <has_text text="Median sequence length: 151.00"></has_text> + <has_text text="s41"></has_text> + <has_text text="s122"></has_text> + </assert_contents> + </output> + <output name="seqs" ftype="fasta"> + <assert_contents> + <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text> + <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="histograms"> + <assert_contents> + <has_text text="Length"></has_text> + <has_text text="127.0"></has_text> + <has_text text="157.0"></has_text> + </assert_contents> + </output> + </test> + <test> + <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/> + <param name="mapping_fps" value="split_libraries_fastq/map.txt"/> + <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz"/> <param name="store_qual_scores" value="--store_qual_scores"/> <param name="store_demultiplexed_fastq" value="--store_demultiplexed_fastq"/> <param name="retain_unassigned_reads" value=""/> @@ -119,23 +164,165 @@ <param name="sequence_max_n" value="0"/> <param name="start_seq_id" value="0"/> <param name="rev_comp_barcode" value=""/> - <param name="rev_comp_mapping_barcodes" value=""/> + <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/> <param name="rev_comp" value=""/> - <param name="barcode_selector" value="golay_12"/> + <param name="phred_quality_threshold" value="19"/> + <conditional name="barcode"> + <param name="barcode_type" value="golay_12"/> + </conditional> + <param name="max_barcode_errors" value="1.5"/> + <param name="phred_offset" value=""/> + <output name="log"> + <assert_contents> + <has_text text="Median sequence length: 151.00"></has_text> + <has_text text="s41"></has_text> + <has_text text="s122"></has_text> + </assert_contents> + </output> + <output name="seqs" ftype="fasta"> + <assert_contents> + <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text> + <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="seqs_fastq" ftype="fastq"> + <assert_contents> + <has_text text="@s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text> + <has_text text="@s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="seqs_qual" ftype="qual"> + <assert_contents> + <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text> + <has_text text="s107_6 M00176:17:000000000-A0CNA:1:1:15276:1779 1:N:0:0 orig_bc=ATCTCCTCTCCA new_bc=ATCTCCTCTCCA bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="histograms"> + <assert_contents> + <has_text text="Length"></has_text> + <has_text text="127.0"></has_text> + <has_text text="157.0"></has_text> + </assert_contents> + </output> + </test> + <test> + <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/> + <param name="mapping_fps" value="split_libraries_fastq/map.txt,split_libraries_fastq/map.txt"/> + <param name="barcode_read_fps" value="split_libraries_fastq/lane1_barcode.fastq.gz,split_libraries_fastq/lane2_barcode.fastq.gz"/> + <param name="store_qual_scores" value=""/> + <param name="store_demultiplexed_fastq" value=""/> + <param name="retain_unassigned_reads" value=""/> + <param name="max_bad_run_length" value="3"/> + <param name="min_per_read_length_fraction" value="0.75"/> + <param name="sequence_max_n" value="0"/> + <param name="start_seq_id" value="0"/> + <param name="rev_comp_barcode" value=""/> + <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/> + <param name="rev_comp" value=""/> + <param name="phred_quality_threshold" value="19"/> + <conditional name="barcode"> + <param name="barcode_type" value="golay_12"/> + </conditional> <param name="max_barcode_errors" value="1.5"/> <param name="phred_offset" value=""/> <output name="log"> <assert_contents> - <has_line line="Median sequence length: 132.50"></has_line> - <has_text text="L1S76"></has_text> - <has_text text="L1S281"></has_text> - <has_text text="L1S8"></has_text> + <has_text text="Median sequence length: 151.00"></has_text> + <has_text text="s41"></has_text> + <has_text text="s122"></has_text> + </assert_contents> + </output> + <output name="seqs" ftype="fasta"> + <assert_contents> + <has_text text="s161_0 M00176:17:000000000-A0CNA:1:1:16738:1773 1:N:0:0 orig_bc=CTCGCTTCACTT new_bc=CTCGCTTCACTT bc_diffs=0"></has_text> + <has_text text="s26_84 M00176:17:000000000-A0CNA:1:1:18075:1844 1:N:0:0 orig_bc=AGGGTTCCAGTT new_bc=AGGGTTCCAGTT bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="histograms"> + <assert_contents> + <has_text text="Length"></has_text> + <has_text text="127.0"></has_text> + <has_text text="157.0"></has_text> + </assert_contents> + </output> + </test> + <test> + <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz"/> + <param name="store_qual_scores" value=""/> + <param name="sample_ids" value="my.sample.1"/> + <param name="store_demultiplexed_fastq" value=""/> + <param name="retain_unassigned_reads" value=""/> + <param name="max_bad_run_length" value="3"/> + <param name="min_per_read_length_fraction" value="0.75"/> + <param name="sequence_max_n" value="0"/> + <param name="start_seq_id" value="0"/> + <param name="rev_comp_barcode" value=""/> + <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/> + <param name="rev_comp" value=""/> + <param name="phred_quality_threshold" value="19"/> + <conditional name="barcode"> + <param name="barcode_type" value="not-barcoded"/> + </conditional> + <param name="phred_offset" value=""/> + <output name="log"> + <assert_contents> + <has_text text="Median sequence length: 151.00"></has_text> + <has_text text="my.sample.1"></has_text> + <has_text text="Total number seqs written"></has_text> </assert_contents> </output> - <output name="seqs" file="split_libraries_fastq/sequences.fasta"/> - <output name="histograms" file="split_libraries_fastq/histograms.tabular"/> - <output name="seqs_qual" file="split_libraries_fastq/sequence_qualities.qual"/> - <output name="seqs_fastq" file="split_libraries_fastq/demultiplexed_sequences.fastq"/> + <output name="seqs" ftype="fasta"> + <assert_contents> + <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text> + <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="histograms"> + <assert_contents> + <has_text text="Length"></has_text> + <has_text text="124.0"></has_text> + <has_text text="154.0"></has_text> + </assert_contents> + </output> + </test> + <test> + <param name="sequence_read_fps" value="split_libraries_fastq/lane1_read1.fastq.gz,split_libraries_fastq/lane2_read1.fastq.gz"/> + <param name="store_qual_scores" value=""/> + <param name="sample_ids" value="my.sample.1,my.sample.2"/> + <param name="store_demultiplexed_fastq" value=""/> + <param name="retain_unassigned_reads" value=""/> + <param name="max_bad_run_length" value="3"/> + <param name="min_per_read_length_fraction" value="0.75"/> + <param name="sequence_max_n" value="0"/> + <param name="start_seq_id" value="0"/> + <param name="rev_comp_barcode" value=""/> + <param name="rev_comp_mapping_barcodes" value="--rev_comp_mapping_barcodes"/> + <param name="rev_comp" value=""/> + <param name="phred_quality_threshold" value="19"/> + <conditional name="barcode"> + <param name="barcode_type" value="not-barcoded"/> + </conditional> + <param name="phred_offset" value=""/> + <output name="log"> + <assert_contents> + <has_text text="Median sequence length: 151.00"></has_text> + <has_text text="my.sample.1"></has_text> + <has_text text="Total number seqs written"></has_text> + </assert_contents> + </output> + <output name="seqs" ftype="fasta"> + <assert_contents> + <has_text text="my.sample.1_0 M00176:17:000000000-A0CNA:1:1:15487:1773 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text> + <has_text text="my.sample.1_12 M00176:17:000000000-A0CNA:1:1:14889:1778 1:N:0:0 orig_bc=AAAAAAAAAAAA new_bc=AAAAAAAAAAAA bc_diffs=0"></has_text> + </assert_contents> + </output> + <output name="histograms"> + <assert_contents> + <has_text text="Length"></has_text> + <has_text text="124.0"></has_text> + <has_text text="154.0"></has_text> + </assert_contents> + </output> </test> </tests> <help><![CDATA[