Repository 'fastq_trimmer_by_quality'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/fastq_trimmer_by_quality

Changeset 3:c64d534a763c (2017-09-30)
Previous changeset 2:25c24379693a (2017-02-02) Next changeset 4:8050e091e99b (2020-03-01)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastq_trimmer_by_quality commit f2582539542b33240234e8ea6093e25d0aee9b6a
modified:
fastq_trimmer_by_quality.xml
removed:
fastq_trimmer_by_quality.py
b
diff -r 25c24379693a -r c64d534a763c fastq_trimmer_by_quality.py
--- a/fastq_trimmer_by_quality.py Thu Feb 02 12:12:55 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,126 +0,0 @@
-#Dan Blankenberg
-from optparse import OptionParser
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def mean( score_list ):
-    return float( sum( score_list ) ) / float( len( score_list ) )
-
-ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean }
-
-def compare( aggregated_value, operator, threshold_value ):
-    if operator == '>':
-        return aggregated_value > threshold_value
-    elif operator == '>=':
-        return aggregated_value >= threshold_value
-    elif operator == '==':
-        return aggregated_value == threshold_value
-    elif operator == '<':
-        return aggregated_value < threshold_value
-    elif operator == '<=':
-        return aggregated_value <= threshold_value
-    elif operator == '!=':
-        return aggregated_value != threshold_value
-
-def exclude( value_list, exclude_indexes ):
-    rval = []
-    for i, val in enumerate( value_list ):
-        if i not in exclude_indexes:
-            rval.append( val )
-    return rval
-
-def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ):
-    if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ):
-        return True
-    if exclude_indexes:
-        for exclude_index in exclude_indexes:
-            excluded_list = exclude( aggregate_list, exclude_index )
-            if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ):
-                return True
-    return False
-
-def main():
-    usage = "usage: %prog [options] input_file output_file"
-    parser = OptionParser( usage=usage )
-    parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' )
-    parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' )
-    parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' )
-    parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' )
-    parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' )
-    parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' )
-    parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' )
-    parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' )
-    parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length")
-    ( options, args ) = parser.parse_args()
-    
-    if len ( args ) != 2:
-        parser.error( "Need to specify an input file and an output file" )
-    
-    if options.window_size < 1:
-        parser.error( 'You must specify a strictly positive window size' )
-    
-    if options.window_step < 1:
-        parser.error( 'You must specify a strictly positive step size' )
-    
-    #determine an exhaustive list of window indexes that can be excluded from aggregation
-    exclude_window_indexes = []
-    last_exclude_indexes = []
-    for exclude_count in range( min( options.exclude_count, options.window_size ) ):
-        if last_exclude_indexes:
-            new_exclude_indexes = []
-            for exclude_list in last_exclude_indexes:
-                for window_index in range( options.window_size ):
-                    if window_index not in exclude_list:
-                        new_exclude = sorted( exclude_list + [ window_index ] )
-                        if new_exclude not in exclude_window_indexes + new_exclude_indexes:
-                            new_exclude_indexes.append( new_exclude )
-            exclude_window_indexes += new_exclude_indexes
-            last_exclude_indexes = new_exclude_indexes
-        else:
-            for window_index in range( options.window_size ):
-                last_exclude_indexes.append( [ window_index ] )
-            exclude_window_indexes = list( last_exclude_indexes )
-    
-    out = fastqWriter( open( args[1], 'wb' ), format = options.format )
-    action = ACTION_METHODS[ options.aggregation_action ]
-    
-    num_reads = None
-    num_reads_excluded = 0
-    for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ):
-        for trim_end in options.trim_ends:
-            quality_list = fastq_read.get_decimal_quality_scores()
-            if trim_end == '5':
-                lwindow_position = 0 #left position of window
-                while True:
-                    if lwindow_position >= len( quality_list ):
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( lwindow_position, None )
-                        break
-                    lwindow_position += options.window_step
-            else:
-                rwindow_position = len( quality_list ) #right position of window
-                while True:
-                    lwindow_position = rwindow_position - options.window_size #left position of window
-                    if rwindow_position <= 0 or lwindow_position < 0:
-                        fastq_read.sequence = ''
-                        fastq_read.quality = ''
-                        break
-                    if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ):
-                        fastq_read = fastq_read.slice( None, rwindow_position )
-                        break
-                    rwindow_position -= options.window_step
-        if options.keep_zero_length or len( fastq_read ):
-            out.write( fastq_read )
-        else:
-            num_reads_excluded += 1
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were processed." % ( num_reads + 1 )
-    if num_reads_excluded:
-        print "%i reads of zero length were excluded from the output." % num_reads_excluded
-
-if __name__ == "__main__": main()
b
diff -r 25c24379693a -r c64d534a763c fastq_trimmer_by_quality.xml
--- a/fastq_trimmer_by_quality.xml Thu Feb 02 12:12:55 2017 -0500
+++ b/fastq_trimmer_by_quality.xml Sat Sep 30 13:56:36 2017 -0400
[
b'@@ -1,130 +1,131 @@\n-<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.0.1">\n-  <description>by sliding window</description>\n-  <requirements>\n-    <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement>\n-  </requirements>\n-  <command>python \'$__tool_directory__/fastq_trimmer_by_quality.py\' \'$input_file\' \'$output_file\' -f \'${input_file.extension[len( \'fastq\' ):]}\' -s $window_size\n-    -t $step_size -e $trim_ends -a $aggregation_action -x $exclude_count -c \'$score_comparison\' -q $quality_score\n-    #if $keep_zero_length:\n-        -k\n-    #end if\n-  </command>\n-  <inputs>\n-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>\n-    <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" checked="false"/>\n-    <param name="trim_ends" type="select" label="Trim ends">\n-      <option value="53" selected="True">5\' and 3\'</option>\n-      <option value="5">5\' only</option>\n-      <option value="3">3\' only</option>\n-    </param>\n-    <param name="window_size" type="integer" value="1" label="Window size"/>\n-    <param name="step_size" type="integer" value="1" label="Step Size" />\n-    <param name="exclude_count" label="Maximum number of bases to exclude from the window during aggregation" value="0" type="integer" />\n-    <param name="aggregation_action" type="select" label="Aggregate action for window">\n-      <option value="min" selected="True">min score</option>\n-      <option value="max">max score</option>\n-      <option value="sum">sum of scores</option>\n-      <option value="mean">mean of scores</option>\n-    </param>\n-    <param name="score_comparison" type="select" label="Trim until aggregate score is">\n-      <sanitizer>\n-        <valid initial="none">\n-            <add value="&lt;&gt;=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->\n-        </valid>\n-      </sanitizer>\n-      <option value="&gt;">&gt;</option>\n-      <option value="&gt;=" selected="true">&gt;=</option>\n-      <option value="==">==</option>\n-      <option value="!=">!=</option>\n-      <option value="&lt;">&lt;</option>\n-      <option value="&lt;=">&lt;=</option>\n-    </param>\n-    <param name="quality_score" label="Quality Score" value="0" type="float" />\n-  </inputs>\n-  <outputs>\n-    <data name="output_file" format_source="input_file" />\n-  </outputs>\n-  <tests>\n-    <test>\n-      <!-- Trim until window size 1 >= 20;both ends -->\n-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n-      <param name="keep_zero_length" value="false" />\n-      <param name="trim_ends" value="53"/>\n-      <param name="window_size" value="1"/>\n-      <param name="step_size" value="1"/>\n-      <param name="exclude_count" value="0"/>\n-      <param name="aggregation_action" value="min"/>\n-      <param name="score_comparison" value="&gt;="/>\n-      <param name="quality_score" value="20"/>\n-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" />\n-    </test>\n-    <test>\n-      <!-- Trim until window size 1 >= 20; 5\' end only -->\n-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n-      <param name="keep_zero_length" value="false" />\n-      <param name="trim_ends" value="5"/>\n-      <param name="window_size" value="1"/>\n-      <param name="step_size" value="1"/>\n-      <param name="exclude_count" value="0"/>\n-      <param name="aggregation_action" value="min"/>\n-      <param name="score_comparison" value="&gt;="/>\n-      <param name="quality_score" value="20"/>\n-      <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" />\n-    </test>\n-    <test>\n-      <!-- Trim until window size 1 >= 20; 3\' end only -->\n-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n-      <param na'..b'param name="exclude_count" value="0"/>\n+            <param name="aggregation_action" value="min"/>\n+            <param name="score_comparison" value="&gt;="/>\n+            <param name="quality_score" value="20"/>\n+            <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" ftype="fastqsanger" />\n+        </test>\n+        <!-- Trim until window size 1 >= 20; 3\' end only -->\n+        <test>\n+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+            <param name="keep_zero_length" value="false" />\n+            <param name="trim_ends" value="3"/>\n+            <param name="window_size" value="1"/>\n+            <param name="step_size" value="1"/>\n+            <param name="exclude_count" value="0"/>\n+            <param name="aggregation_action" value="min"/>\n+            <param name="score_comparison" value="&gt;="/>\n+            <param name="quality_score" value="20"/>\n+            <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" ftype="fastqsanger" />\n+        </test>\n+        <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->\n+        <test>\n+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+            <param name="keep_zero_length" value="false" />\n+            <param name="trim_ends" value="53"/>\n+            <param name="window_size" value="2"/>\n+            <param name="step_size" value="1"/>\n+            <param name="exclude_count" value="1"/>\n+            <param name="aggregation_action" value="min"/>\n+            <param name="score_comparison" value="&gt;="/>\n+            <param name="quality_score" value="1"/>\n+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+        </test>\n+        <!-- Trim entire sequences; keep empty reads -->\n+        <test>\n+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+            <param name="keep_zero_length" value="true" />\n+            <param name="trim_ends" value="53"/>\n+            <param name="window_size" value="1"/>\n+            <param name="step_size" value="1"/>\n+            <param name="exclude_count" value="0"/>\n+            <param name="aggregation_action" value="min"/>\n+            <param name="score_comparison" value="&gt;="/>\n+            <param name="quality_score" value="999"/>\n+            <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" ftype="fastqsanger" />\n+        </test>\n+        <!-- Trim entire sequences; discard empty reads -->\n+        <test>\n+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+            <param name="keep_zero_length" value="false" />\n+            <param name="trim_ends" value="53"/>\n+            <param name="window_size" value="1"/>\n+            <param name="step_size" value="1"/>\n+            <param name="exclude_count" value="0"/>\n+            <param name="aggregation_action" value="min"/>\n+            <param name="score_comparison" value="&gt;="/>\n+            <param name="quality_score" value="999"/>\n+            <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n **What it does**\n \n This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to \'simple\' trimming of the ends.\n@@ -138,8 +139,8 @@\n .. class:: warningmark\n \n Trimming a color space read will cause any adapter base to be lost.\n-  </help>\n-  <citations>\n-    <citation type="doi">10.1093/bioinformatics/btq281</citation>\n-  </citations>\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1093/bioinformatics/btq281</citation>\n+    </citations>\n </tool>\n'