Mercurial > repos > devteam > fastq_trimmer
changeset 2:430b9da91435 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_trimmer commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 13:55:56 -0400 |
parents | e0cfb5a703ce |
children | 2d0d13b0b0f1 |
files | fastq_trimmer.py fastq_trimmer.xml tool_dependencies.xml |
diffstat | 3 files changed, 90 insertions(+), 150 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_trimmer.py Wed Nov 11 12:42:58 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -#Dan Blankenberg -import sys -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - -def main(): - input_filename = sys.argv[1] - output_filename = sys.argv[2] - left_offset = sys.argv[3] - right_offset = sys.argv[4] - percent_offsets = sys.argv[5] == 'offsets_percent' - input_type = sys.argv[6] or 'sanger' - keep_zero_length = sys.argv[7] == 'keep_zero_length' - - out = fastqWriter( open( output_filename, 'wb' ), format = input_type ) - num_reads_excluded = 0 - num_reads = None - for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - if percent_offsets: - left_column_offset = int( round( float( left_offset ) / 100.0 * float( len( fastq_read ) ) ) ) - right_column_offset = int( round( float( right_offset ) / 100.0 * float( len( fastq_read ) ) ) ) - else: - left_column_offset = int( left_offset ) - right_column_offset = int( right_offset ) - if right_column_offset > 0: - right_column_offset = -right_column_offset - else: - right_column_offset = None - fastq_read = fastq_read.slice( left_column_offset, right_column_offset ) - if keep_zero_length or len( fastq_read ): - out.write( fastq_read ) - else: - num_reads_excluded += 1 - out.close() - if num_reads is None: - print "No valid fastq reads could be processed." - else: - print "%i fastq reads were processed." % ( num_reads + 1 ) - if num_reads_excluded: - print "%i reads of zero length were excluded from the output." % num_reads_excluded - -if __name__ == "__main__": main()
--- a/fastq_trimmer.xml Wed Nov 11 12:42:58 2015 -0500 +++ b/fastq_trimmer.xml Sat Sep 30 13:55:56 2017 -0400 @@ -1,125 +1,112 @@ -<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0"> - <description>by column</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command> - <inputs> - <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/> - <conditional name="offset_type"> - <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> - <option value="offsets_absolute" selected="true">Absolute Values</option> - <option value="offsets_percent">Percentage of Read Length</option> - </param> - <when value="offsets_absolute"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left"> - <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right"> - <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - </when> - <when value="offsets_percent"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - </when> - </conditional> - <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/> - </inputs> - <outputs> - <data name="output_file" format="input" /> - </outputs> - <tests> - <test> - <!-- Do nothing trim --> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="keep_zero_length" value="keep_zero_length" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- Trim to empty File --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="30"/> - <param name="right_column_offset" value="64"/> - <param name="keep_zero_length" value="exclude_zero_length" /> - <output name="output_file" file="empty_file.dat" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="base_offset_type" value="offsets_percent"/> - <param name="left_column_offset" value="50"/> - <param name="right_column_offset" value="50"/> - <param name="keep_zero_length" value="exclude_zero_length" /> - <output name="output_file" file="empty_file.dat" /> - </test> - <!-- Trim to 4 inner-most bases --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="45"/> - <param name="right_column_offset" value="45"/> - <param name="keep_zero_length" value="exclude_zero_length" /> - <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="base_offset_type" value="offsets_percent"/> - <param name="left_column_offset" value="47.87"/> - <param name="right_column_offset" value="47.87"/> - <param name="keep_zero_length" value="exclude_zero_length" /> - <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> - </test> - </tests> - <help> +<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.1.1"> + <description>by column</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-trimmer '$input_file' '$output_file' ${offset_type['left_column_offset']} ${offset_type['right_column_offset']} ${offset_type['base_offset_type']} '${input_file.extension[len('fastq'):]}' $keep_zero_length + ]]></command> + <inputs> + <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz" label="FASTQ file"/> + <conditional name="offset_type"> + <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> + <option value="offsets_absolute" selected="true">Absolute Values</option> + <option value="offsets_percent">Percentage of Read Length</option> + </param> + <when value="offsets_absolute"> + <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" /> + <param name="right_column_offset" type="integer" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position" /> + </when> + <when value="offsets_percent"> + <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" /> + <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" /> + </when> + </conditional> + <param name="keep_zero_length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" checked="false" label="Keep reads with zero length" /> + </inputs> + <outputs> + <data name="output_file" format_source="input_file" /> + </outputs> + <tests> + <test> + <!-- Do nothing trim --> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="keep_zero_length" value="keep_zero_length" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- Trim to empty File --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="30"/> + <param name="right_column_offset" value="64"/> + <param name="keep_zero_length" value="exclude_zero_length" /> + <output name="output_file" file="empty_file.dat" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="base_offset_type" value="offsets_percent"/> + <param name="left_column_offset" value="50"/> + <param name="right_column_offset" value="50"/> + <param name="keep_zero_length" value="exclude_zero_length" /> + <output name="output_file" file="empty_file.dat" ftype="fastqsanger" /> + </test> + <!-- Trim to 4 inner-most bases --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="45"/> + <param name="right_column_offset" value="45"/> + <param name="keep_zero_length" value="exclude_zero_length" /> + <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="base_offset_type" value="offsets_percent"/> + <param name="left_column_offset" value="47.87"/> + <param name="right_column_offset" value="47.87"/> + <param name="keep_zero_length" value="exclude_zero_length" /> + <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ **What is does** - + This tool allows you to trim the ends of reads. -You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. +You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. For example, if you have a read of length 36:: - + @Some FASTQ Sanger Read CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA + - =@@.@;B-%?8>CBA@>7@7BBCA4-48%<;;%<B@ - + =@@.@;B-%?8>CBA@>7@7BBCA4-48%<;;%<B@ + And you set absolute offsets of 2 and 9:: - + @Some FASTQ Sanger Read ATATGTNCTCACTGATAAGTGGATA + - @.@;B-%?8>CBA@>7@7BBCA4-4 - + @.@;B-%?8>CBA@>7@7BBCA4-4 + Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36):: - + @Some FASTQ Sanger Read ATATGTNCTCACTGATAAGTGGATATN + - @.@;B-%?8>CBA@>7@7BBCA4-48% - + @.@;B-%?8>CBA@>7@7BBCA4-48% + ----- .. class:: warningmark Trimming a color space read will cause any adapter base to be lost. - ------- - - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> - </citations> - + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Wed Nov 11 12:42:58 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>