Mercurial > repos > devteam > fastq_groomer
changeset 2:71e5fa25b8a2 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_groomer commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:58:07 -0400 |
parents | e4d28c94242d |
children | 8611b80a14d6 |
files | fastq_groomer.py fastq_groomer.xml test-data/sanger_full_range_as_cssanger.fastqcssanger.bz2 test-data/sanger_full_range_original_sanger.fastqsanger.gz tool_dependencies.xml |
diffstat | 5 files changed, 371 insertions(+), 387 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_groomer.py Wed Nov 11 12:40:56 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -#Dan Blankenberg -import sys -from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter - -def main(): - input_filename = sys.argv[1] - input_type = sys.argv[2] - output_filename = sys.argv[3] - output_type = sys.argv[4] - force_quality_encoding = sys.argv[5] - summarize_input = sys.argv[6] == 'summarize_input' - if force_quality_encoding == 'None': - force_quality_encoding = None - - aggregator = fastqAggregator() - out = fastqWriter( open( output_filename, 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding ) - read_count = None - if summarize_input: - reader = fastqVerboseErrorReader - else: - reader = fastqReader - for read_count, fastq_read in enumerate( reader( open( input_filename ), format = input_type, apply_galaxy_conventions = True ) ): - if summarize_input: - aggregator.consume_read( fastq_read ) - out.write( fastq_read ) - out.close() - - if read_count is not None: - print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type ) - if input_type != output_type and 'solexa' in [ input_type, output_type ]: - print "Converted between Solexa and PHRED scores." - if summarize_input: - print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() ) or "None" ) - ascii_range = aggregator.get_ascii_range() - decimal_range = aggregator.get_decimal_range() - print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed - print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] ) - else: - print "No valid FASTQ reads were provided." - - -if __name__ == "__main__": main()
--- a/fastq_groomer.xml Wed Nov 11 12:40:56 2015 -0500 +++ b/fastq_groomer.xml Sat Sep 30 14:58:07 2017 -0400 @@ -1,342 +1,377 @@ -<tool id="fastq_groomer" name="FASTQ Groomer" version="1.0.4"> - <description>convert between various FASTQ quality formats</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_groomer.py '$input_file' '$input_type' '$output_file' -#if str( $options_type['options_type_selector'] ) == 'basic': -#if str( $input_type ) == 'cssanger': -'cssanger' +<tool id="fastq_groomer" name="FASTQ Groomer" version="1.1.1"> + <description>convert between various FASTQ quality formats</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-groomer '$input_file' +#if $input_file.extension.endswith(".gz"): + #set $suffix = ".gz" +#elif $input_file.extension.endswith(".bz2"): + #set $suffix = ".bz2" #else: -'sanger' +#set $suffix = "" #end if -'ascii' 'summarize_input' +$input_type$suffix '$output_file' +#if $options_type['options_type_selector'] == 'basic': + #if str($input_type) == 'cssanger': + cssanger + #else: + sanger + #end if + ascii summarize_input #else: -'${options_type.output_type}' '${options_type.force_quality_encoding}' '${options_type.summarize_input}' + ${options_type.output_type} ${options_type.force_quality_encoding} ${options_type.summarize_input} #end if -</command> - <inputs> - <param name="input_file" type="data" format="fastq" label="File to groom" /> - <param name="input_type" type="select" label="Input FASTQ quality scores type"> - <option value="solexa">Solexa</option> - <option value="illumina">Illumina 1.3-1.7</option> - <option value="sanger" selected="True">Sanger & Illumina 1.8+</option> - <option value="cssanger">Color Space Sanger</option> - </param> - <conditional name="options_type"> - <param name="options_type_selector" type="select" label="Advanced Options"> - <option value="basic" selected="True">Hide Advanced Options</option> - <option value="advanced">Show Advanced Options</option> - </param> - <when value="basic"> - <!-- no options --> - </when> - <when value="advanced"> - <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format."> - <option value="solexa">Solexa</option> - <option value="illumina">Illumina 1.3-1.7</option> - <option value="sanger" selected="True">Sanger (recommended)</option> - <option value="cssanger">Color Space Sanger</option> - </param> - <param name="force_quality_encoding" type="select" label="Force Quality Score encoding"> - <option value="None">Use Source Encoding</option> - <option value="ascii" selected="True">ASCII</option> - <option value="decimal">Decimal</option> - </param> - <param name="summarize_input" type="select" label="Summarize input data"> - <option value="summarize_input" selected="True">Summarize Input</option> - <option value="dont_summarize_input">Do not Summarize Input (faster)</option> - </param> - </when> - </conditional> - </inputs> - <outputs> - <data name="output_file" format="fastqsanger"> - <change_format> - <when input="input_type" value="cssanger" format="fastqcssanger" /> - <when input="options_type.output_type" value="solexa" format="fastqsolexa" /> - <when input="options_type.output_type" value="illumina" format="fastqillumina" /> - <when input="options_type.output_type" value="sanger" format="fastqsanger" /> - <when input="options_type.output_type" value="cssanger" format="fastqcssanger" /> - </change_format> - </data> - </outputs> - <tests> - <!-- These tests include test files adapted from supplemental material in Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16. --> - <!-- Unfortunately, cannot test for expected failures --> - <!-- Test basic options --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="basic" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="basic" /> - <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> - </test> - <test> - <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="illumina" /> - <param name="options_type_selector" value="basic" /> - <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="basic" /> - <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="basic" /> - <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> - </test> - <!-- Test grooming from illumina --> - <test> - <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="illumina" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="illumina" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="illumina_full_range_original_illumina.fastqillumina" /> - </test> - <test> - <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="illumina" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="illumina" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="illumina_full_range_as_solexa.fastqsolexa" /> - </test> - <test> - <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> - <param name="input_type" value="illumina" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="cssanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="illumina_full_range_as_cssanger.fastqcssanger" /> - </test> - <!-- Test grooming from sanger --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="illumina" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="cssanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> - </test> - <!-- Test grooming from solexa --> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" /> - </test> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="illumina" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_as_illumina.fastqillumina" /> - </test> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="cssanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_as_cssanger.fastqcssanger" /> - </test> - <!-- Test grooming from cssanger --> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="cssanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="illumina" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger_adapter_base_with_quality_score.fastqcssanger_fake_score" ftype="fastq" /> - <param name="input_type" value="cssanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="cssanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> - </test> - <!-- Test fastq with line wrapping --> - <test> - <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="wrapping_as_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="illumina" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="wrapping_as_illumina.fastqillumina" /> - </test> - <test> - <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="None" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="wrapping_as_solexa.fastqsolexa" /> - </test> - <!-- Test forcing quality score encoding --> - <!-- Sanger, range 0 - 93 --> - <test> - <param name="input_file" value="sanger_full_range_as_decimal_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="ascii" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="decimal" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_as_decimal_sanger.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_as_tab_decimal_sanger.fastqsanger" ftype="fastq" /> - <param name="input_type" value="sanger" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="sanger" /> - <param name="force_quality_encoding" value="ascii" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- Solexa, range -5 - 62 --> - <test> - <param name="input_file" value="solexa_full_range_as_decimal_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="ascii" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" /> - </test> - <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> - <param name="input_type" value="solexa" /> - <param name="options_type_selector" value="advanced" /> - <param name="output_type" value="solexa" /> - <param name="force_quality_encoding" value="decimal" /> - <param name="summarize_input" value="summarize_input" /> - <output name="output_file" file="solexa_full_range_as_decimal_solexa.fastqsolexa" /> - </test> - </tests> - <help> + ]]></command> + <inputs> + <param name="input_file" type="data" format="fastq,fastq.gz,fastq.bz2" label="File to groom" /> + <param name="input_type" type="select" label="Input FASTQ quality scores type"> + <option value="solexa">Solexa</option> + <option value="illumina">Illumina 1.3-1.7</option> + <option value="sanger" selected="true">Sanger & Illumina 1.8+</option> + <option value="cssanger">Color Space Sanger</option> + </param> + <conditional name="options_type"> + <param name="options_type_selector" type="select" label="Advanced Options"> + <option value="basic" selected="true">Hide Advanced Options</option> + <option value="advanced">Show Advanced Options</option> + </param> + <when value="basic" /> + <when value="advanced"> + <param name="output_type" type="select" label="Output FASTQ quality scores type" help="Galaxy tools are designed to work with the Sanger Quality score format"> + <option value="solexa">Solexa</option> + <option value="illumina">Illumina 1.3-1.7</option> + <option value="sanger" selected="true">Sanger</option> + <option value="cssanger">Color Space Sanger</option> + <option value="solexa.gz">Solexa (gz compressed)</option> + <option value="illumina.gz">Illumina 1.3-1.7 (gz compressed)</option> + <option value="sanger.gz">Sanger (gz compressed - recommended)</option> + <option value="cssanger.gz">Color Space Sanger (gz compressed)</option> + <option value="solexa.bz2">Solexa (bz2 compressed)</option> + <option value="illumina.bz2">Illumina 1.3-1.7 (bz2 compressed)</option> + <option value="sanger.bz2">Sanger (bz2 compressed)</option> + <option value="cssanger.bz2">Color Space Sanger (bz2 compressed)</option> + </param> + <param name="force_quality_encoding" type="select" label="Force Quality Score encoding"> + <option value="None">Use Source Encoding</option> + <option value="ascii" selected="true">ASCII</option> + <option value="decimal">Decimal</option> + </param> + <param name="summarize_input" type="select" label="Summarize input data"> + <option value="summarize_input" selected="true">Summarize Input</option> + <option value="dont_summarize_input">Do not Summarize Input (faster)</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_file" format="fastqsanger"> + <change_format> + <when input="input_type" value="cssanger" format="fastqcssanger" /> + <when input="options_type.output_type" value="solexa" format="fastqsolexa" /> + <when input="options_type.output_type" value="illumina" format="fastqillumina" /> + <when input="options_type.output_type" value="sanger" format="fastqsanger" /> + <when input="options_type.output_type" value="cssanger" format="fastqcssanger" /> + <when input="options_type.output_type" value="solexa.gz" format="fastqsolexa.gz" /> + <when input="options_type.output_type" value="illumina.gz" format="fastqillumina.gz" /> + <when input="options_type.output_type" value="sanger.gz" format="fastqsanger.gz" /> + <when input="options_type.output_type" value="cssanger.gz" format="fastqcssanger.gz" /> + <when input="options_type.output_type" value="solexa.bz2" format="fastqsolexa.bz2" /> + <when input="options_type.output_type" value="illumina.bz2" format="fastqillumina.bz2" /> + <when input="options_type.output_type" value="sanger.bz2" format="fastqsanger.bz2" /> + <when input="options_type.output_type" value="cssanger.bz2" format="fastqcssanger.bz2" /> + </change_format> + </data> + </outputs> + <tests> + <!-- These tests include test files adapted from supplemental material in Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16. --> + <!-- Test basic options --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> + </test> + <test> + <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="illumina" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> + </test> + <!-- Test grooming from illumina --> + <test> + <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="illumina" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="illumina" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="illumina_full_range_original_illumina.fastqillumina" /> + </test> + <test> + <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="illumina" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="illumina_full_range_as_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="illumina" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="illumina_full_range_as_solexa.fastqsolexa" /> + </test> + <test> + <param name="input_file" value="illumina_full_range_original_illumina.fastqillumina" ftype="fastq" /> + <param name="input_type" value="illumina" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="cssanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="illumina_full_range_as_cssanger.fastqcssanger" /> + </test> + <!-- Test grooming from sanger --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="illumina" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="cssanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> + </test> + <!-- Test grooming from solexa --> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" /> + </test> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="illumina" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_as_illumina.fastqillumina" /> + </test> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_as_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="cssanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_as_cssanger.fastqcssanger" /> + </test> + <!-- Test grooming from cssanger --> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="cssanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="illumina" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_illumina.fastqillumina" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_solexa.fastqsolexa" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger_adapter_base_with_quality_score.fastqcssanger_fake_score" ftype="fastq" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="cssanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" /> + </test> + <!-- Test fastq with line wrapping --> + <test> + <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="wrapping_as_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="illumina" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="wrapping_as_illumina.fastqillumina" /> + </test> + <test> + <param name="input_file" value="wrapping_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="None" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="wrapping_as_solexa.fastqsolexa" /> + </test> + <!-- Test forcing quality score encoding --> + <!-- Sanger, range 0 - 93 --> + <test> + <param name="input_file" value="sanger_full_range_as_decimal_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="ascii" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="decimal" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_as_decimal_sanger.fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_tab_decimal_sanger.fastqsanger" ftype="fastq" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="sanger" /> + <param name="force_quality_encoding" value="ascii" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> + </test> + <!-- Solexa, range -5 - 62 --> + <test> + <param name="input_file" value="solexa_full_range_as_decimal_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="ascii" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" /> + </test> + <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastq" /> + <param name="input_type" value="solexa" /> + <param name="options_type_selector" value="advanced" /> + <param name="output_type" value="solexa" /> + <param name="force_quality_encoding" value="decimal" /> + <param name="summarize_input" value="summarize_input" /> + <output name="output_file" file="solexa_full_range_as_decimal_solexa.fastqsolexa" /> + </test> + <!-- compressed formats --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastq.gz" /> + <param name="input_type" value="sanger" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" decompress="true" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger.bz2" ftype="fastq.bz2" /> + <param name="input_type" value="cssanger" /> + <param name="options_type_selector" value="basic" /> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" decompress="true" /> + </test> + </tests> + <help><![CDATA[ **What it does** This tool offers several conversions options relating to the FASTQ format. When using *Basic* options, the output will be *sanger* formatted or *cssanger* formatted (when the input is Color Space Sanger). -When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum). +When converting, if a quality score falls outside of the target score range, it will be coerced to the closest available value (i.e. the minimum or maximum). When converting between Solexa and the other formats, quality scores are mapped between Solexa and PHRED scales using the equations found in `Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.`_ @@ -351,10 +386,10 @@ SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS ...............................IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII ..........................XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ | | | | | | 33 59 64 73 104 126 - + S - Sanger Phred+33, 93 values (0, 93) (0 to 60 expected in raw reads) I - Illumina 1.3 Phred+64, 62 values (0, 62) (0 to 40 expected in raw reads) X - Solexa Solexa+64, 67 values (-5, 62) (-5 to 40 expected in raw reads) @@ -367,12 +402,9 @@ ------ - -.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: http://www.ncbi.nlm.nih.gov/pubmed/20015970 - - </help> - <citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> - </citations> - +.. _Cock PJ, Fields CJ, Goto N, Heuer ML, Rice PM. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucleic Acids Res. 2009 Dec 16.: https://doi.org/10.1093/nar/gkp1137 + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Wed Nov 11 12:40:56 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>