changeset 3:c64d534a763c draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastq_trimmer_by_quality commit f2582539542b33240234e8ea6093e25d0aee9b6a
author devteam
date Sat, 30 Sep 2017 13:56:36 -0400
parents 25c24379693a
children 8050e091e99b
files fastq_trimmer_by_quality.py fastq_trimmer_by_quality.xml
diffstat 2 files changed, 132 insertions(+), 257 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_trimmer_by_quality.py	Thu Feb 02 12:12:55 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#Dan Blankenberg
-from optparse import OptionParser
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def mean( score_list ):
-    return float( sum( score_list ) ) / float( len( score_list ) )
-
-ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean }
-
-def compare( aggregated_value, operator, threshold_value ):
-    if operator == '>':
-        return aggregated_value > threshold_value
-    elif operator == '>=':
-        return aggregated_value >= threshold_value
-    elif operator == '==':
-        return aggregated_value == threshold_value
-    elif operator == '<':
-        return aggregated_value < threshold_value
-    elif operator == '<=':
-        return aggregated_value <= threshold_value
-    elif operator == '!=':
-        return aggregated_value != threshold_value
-
-def exclude( value_list, exclude_indexes ):
-    rval = []
-    for i, val in enumerate( value_list ):
-        if i not in exclude_indexes:
-            rval.append( val )
-    return rval
-
-def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ):
-    if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ):
-        return True
-    if exclude_indexes:
-        for exclude_index in exclude_indexes:
-            excluded_list = exclude( aggregate_list, exclude_index )
-            if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ):
-                return True
-    return False
-
-def main():
-    usage = "usage: %prog [options] input_file output_file"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
-    parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' )
-    parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' )
-    parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' )
-    parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' )
-    parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' )
-    parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' )
-    parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
-    parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length")
-    ( options, args ) = parser.parse_args()
-    
-    if len ( args ) != 2:
-        parser.error( "Need to specify an input file and an output file" )
-    
-    if options.window_size < 1:
-        parser.error( 'You must specify a strictly positive window size' )
-    
-    if options.window_step < 1:
-        parser.error( 'You must specify a strictly positive step size' )
-    
-    #determine an exhaustive list of window indexes that can be excluded from aggregation
-    exclude_window_indexes = []
-    last_exclude_indexes = []
-    for exclude_count in range( min( options.exclude_count, options.window_size ) ):
-        if last_exclude_indexes:
-            new_exclude_indexes = []
-            for exclude_list in last_exclude_indexes:
-                for window_index in range( options.window_size ):
-                    if window_index not in exclude_list:
-                        new_exclude = sorted( exclude_list + [ window_index ] )
-                        if new_exclude not in exclude_window_indexes + new_exclude_indexes:
-                            new_exclude_indexes.append( new_exclude )
-            exclude_window_indexes += new_exclude_indexes
-            last_exclude_indexes = new_exclude_indexes
-        else:
-            for window_index in range( options.window_size ):
-                last_exclude_indexes.append( [ window_index ] )
-            exclude_window_indexes = list( last_exclude_indexes )
-    
-    out = fastqWriter( open( args[1], 'wb' ), format = options.format )
-    action = ACTION_METHODS[ options.aggregation_action ]
-    
-    num_reads = None
-    num_reads_excluded = 0
-    for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
-        for trim_end in options.trim_ends:
-            quality_list = fastq_read.get_decimal_quality_scores()
-            if trim_end == '5':
-                lwindow_position = 0 #left position of window
-                while True:
-                    if lwindow_position >= len( quality_list ):
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( lwindow_position, None )
-                        break
-                    lwindow_position += options.window_step
-            else:
-                rwindow_position = len( quality_list ) #right position of window
-                while True:
-                    lwindow_position = rwindow_position - options.window_size #left position of window
-                    if rwindow_position <= 0 or lwindow_position < 0:
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( None, rwindow_position )
-                        break
-                    rwindow_position -= options.window_step
-        if options.keep_zero_length or len( fastq_read ):
-            out.write( fastq_read )
-        else:
-            num_reads_excluded += 1
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were processed." % ( num_reads + 1 )
-    if num_reads_excluded:
-        print "%i reads of zero length were excluded from the output." % num_reads_excluded
-
-if __name__ == "__main__": main()
--- a/fastq_trimmer_by_quality.xml	Thu Feb 02 12:12:55 2017 -0500
+++ b/fastq_trimmer_by_quality.xml	Sat Sep 30 13:56:36 2017 -0400
@@ -1,130 +1,131 @@
-<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.0.1">
-  <description>by sliding window</description>
-  <requirements>
-    <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement>
-  </requirements>
-  <command>python '$__tool_directory__/fastq_trimmer_by_quality.py' '$input_file' '$output_file' -f '${input_file.extension[len( 'fastq' ):]}' -s $window_size
-    -t $step_size -e $trim_ends -a $aggregation_action -x $exclude_count -c '$score_comparison' -q $quality_score
-    #if $keep_zero_length:
-        -k
-    #end if
-  </command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
-    <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" checked="false"/>
-    <param name="trim_ends" type="select" label="Trim ends">
-      <option value="53" selected="True">5' and 3'</option>
-      <option value="5">5' only</option>
-      <option value="3">3' only</option>
-    </param>
-    <param name="window_size" type="integer" value="1" label="Window size"/>
-    <param name="step_size" type="integer" value="1" label="Step Size" />
-    <param name="exclude_count" label="Maximum number of bases to exclude from the window during aggregation" value="0" type="integer" />
-    <param name="aggregation_action" type="select" label="Aggregate action for window">
-      <option value="min" selected="True">min score</option>
-      <option value="max">max score</option>
-      <option value="sum">sum of scores</option>
-      <option value="mean">mean of scores</option>
-    </param>
-    <param name="score_comparison" type="select" label="Trim until aggregate score is">
-      <sanitizer>
-        <valid initial="none">
-            <add value="&lt;&gt;=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->
-        </valid>
-      </sanitizer>
-      <option value="&gt;">&gt;</option>
-      <option value="&gt;=" selected="true">&gt;=</option>
-      <option value="==">==</option>
-      <option value="!=">!=</option>
-      <option value="&lt;">&lt;</option>
-      <option value="&lt;=">&lt;=</option>
-    </param>
-    <param name="quality_score" label="Quality Score" value="0" type="float" />
-  </inputs>
-  <outputs>
-    <data name="output_file" format_source="input_file" />
-  </outputs>
-  <tests>
-    <test>
-      <!-- Trim until window size 1 >= 20;both ends -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="false" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 1 >= 20; 5' end only -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="false" />
-      <param name="trim_ends" value="5"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 1 >= 20; 3' end only -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="false" />
-      <param name="trim_ends" value="3"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="20"/>
-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="false" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="2"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="1"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="1"/>
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim entire sequences; keep empty reads -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="true" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="999"/>
-      <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" />
-    </test>
-    <test>
-      <!-- Trim entire sequences; discard empty reads -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="keep_zero_length" value="false" />
-      <param name="trim_ends" value="53"/>
-      <param name="window_size" value="1"/>
-      <param name="step_size" value="1"/>
-      <param name="exclude_count" value="0"/>
-      <param name="aggregation_action" value="min"/>
-      <param name="score_comparison" value="&gt;="/>
-      <param name="quality_score" value="999"/>
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-  </tests>
-  <help>
+<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.1.1">
+    <description>by sliding window</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-trimmer-by-quality '$input_file' '$output_file' -f '${input_file.extension[len('fastq'):]}' -s $window_size
+-t $step_size -e $trim_ends -a $aggregation_action -x $exclude_count -c '$score_comparison' -q $quality_score
+#if $keep_zero_length:
+    -k
+#end if
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File"/>
+        <param name="keep_zero_length" type="boolean" checked="false" label="Keep reads with zero length"/>
+        <param name="trim_ends" type="select" label="Trim ends">
+            <option value="53" selected="true">5' and 3'</option>
+            <option value="5">5' only</option>
+            <option value="3">3' only</option>
+        </param>
+        <param name="window_size" type="integer" value="1" label="Window size"/>
+        <param name="step_size" type="integer" value="1" label="Step size" />
+        <param name="exclude_count" type="integer" value="0" label="Maximum number of bases to exclude from the window during aggregation" />
+        <param name="aggregation_action" type="select" label="Aggregate action for window">
+            <option value="min" selected="true">min score</option>
+            <option value="max">max score</option>
+            <option value="sum">sum of scores</option>
+            <option value="mean">mean of scores</option>
+        </param>
+        <param name="score_comparison" type="select" label="Trim until aggregate score is">
+            <sanitizer>
+                <valid initial="none">
+                    <add value="&lt;&gt;=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->
+                </valid>
+            </sanitizer>
+            <option value="&gt;">&gt;</option>
+            <option value="&gt;=" selected="true">&gt;=</option>
+            <option value="==">==</option>
+            <option value="!=">!=</option>
+            <option value="&lt;">&lt;</option>
+            <option value="&lt;=">&lt;=</option>
+        </param>
+        <param name="quality_score" type="float" value="0" label="Quality score" />
+    </inputs>
+    <outputs>
+        <data name="output_file" format_source="input_file" />
+    </outputs>
+    <tests>
+        <!-- Trim until window size 1 >= 20;both ends -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="false" />
+            <param name="trim_ends" value="53"/>
+            <param name="window_size" value="1"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="0"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="20"/>
+            <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim until window size 1 >= 20; 5' end only -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="false" />
+            <param name="trim_ends" value="5"/>
+            <param name="window_size" value="1"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="0"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="20"/>
+            <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim until window size 1 >= 20; 3' end only -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="false" />
+            <param name="trim_ends" value="3"/>
+            <param name="window_size" value="1"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="0"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="20"/>
+            <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="false" />
+            <param name="trim_ends" value="53"/>
+            <param name="window_size" value="2"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="1"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="1"/>
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim entire sequences; keep empty reads -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="true" />
+            <param name="trim_ends" value="53"/>
+            <param name="window_size" value="1"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="0"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="999"/>
+            <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim entire sequences; discard empty reads -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="keep_zero_length" value="false" />
+            <param name="trim_ends" value="53"/>
+            <param name="window_size" value="1"/>
+            <param name="step_size" value="1"/>
+            <param name="exclude_count" value="0"/>
+            <param name="aggregation_action" value="min"/>
+            <param name="score_comparison" value="&gt;="/>
+            <param name="quality_score" value="999"/>
+            <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What it does**
 
 This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to 'simple' trimming of the ends.
@@ -138,8 +139,8 @@
 .. class:: warningmark
 
 Trimming a color space read will cause any adapter base to be lost.
-  </help>
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btq281</citation>
-  </citations>
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>