Previous changeset 2:25c24379693a (2017-02-02) Next changeset 4:8050e091e99b (2020-03-01) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastq_trimmer_by_quality commit f2582539542b33240234e8ea6093e25d0aee9b6a |
modified:
fastq_trimmer_by_quality.xml |
removed:
fastq_trimmer_by_quality.py |
b |
diff -r 25c24379693a -r c64d534a763c fastq_trimmer_by_quality.py --- a/fastq_trimmer_by_quality.py Thu Feb 02 12:12:55 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,126 +0,0 @@ -#Dan Blankenberg -from optparse import OptionParser -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - -def mean( score_list ): - return float( sum( score_list ) ) / float( len( score_list ) ) - -ACTION_METHODS = { 'min':min, 'max':max, 'sum':sum, 'mean':mean } - -def compare( aggregated_value, operator, threshold_value ): - if operator == '>': - return aggregated_value > threshold_value - elif operator == '>=': - return aggregated_value >= threshold_value - elif operator == '==': - return aggregated_value == threshold_value - elif operator == '<': - return aggregated_value < threshold_value - elif operator == '<=': - return aggregated_value <= threshold_value - elif operator == '!=': - return aggregated_value != threshold_value - -def exclude( value_list, exclude_indexes ): - rval = [] - for i, val in enumerate( value_list ): - if i not in exclude_indexes: - rval.append( val ) - return rval - -def exclude_and_compare( aggregate_action, aggregate_list, operator, threshold_value, exclude_indexes = None ): - if not aggregate_list or compare( aggregate_action( aggregate_list ), operator, threshold_value ): - return True - if exclude_indexes: - for exclude_index in exclude_indexes: - excluded_list = exclude( aggregate_list, exclude_index ) - if not excluded_list or compare( aggregate_action( excluded_list ), operator, threshold_value ): - return True - return False - -def main(): - usage = "usage: %prog [options] input_file output_file" - parser = OptionParser( usage=usage ) - parser.add_option( '-f', '--format', dest='format', type='choice', default='sanger', choices=( 'sanger', 'cssanger', 'solexa', 'illumina' ), help='FASTQ variant type' ) - parser.add_option( '-s', '--window_size', type="int", dest='window_size', default='1', help='Window size' ) - parser.add_option( '-t', '--window_step', type="int", dest='window_step', default='1', help='Window step' ) - parser.add_option( '-e', '--trim_ends', type="choice", dest='trim_ends', default='53', choices=('5','3','53','35' ), help='Ends to Trim' ) - parser.add_option( '-a', '--aggregation_action', type="choice", dest='aggregation_action', default='min', choices=('min','max','sum','mean' ), help='Aggregate action for window' ) - parser.add_option( '-x', '--exclude_count', type="int", dest='exclude_count', default='0', help='Maximum number of bases to exclude from the window during aggregation' ) - parser.add_option( '-c', '--score_comparison', type="choice", dest='score_comparison', default='>=', choices=('>','>=','==','<', '<=', '!=' ), help='Keep read when aggregate score is' ) - parser.add_option( '-q', '--quality_score', type="float", dest='quality_score', default='0', help='Quality Score' ) - parser.add_option( "-k", "--keep_zero_length", action="store_true", dest="keep_zero_length", default=False, help="Keep reads with zero length") - ( options, args ) = parser.parse_args() - - if len ( args ) != 2: - parser.error( "Need to specify an input file and an output file" ) - - if options.window_size < 1: - parser.error( 'You must specify a strictly positive window size' ) - - if options.window_step < 1: - parser.error( 'You must specify a strictly positive step size' ) - - #determine an exhaustive list of window indexes that can be excluded from aggregation - exclude_window_indexes = [] - last_exclude_indexes = [] - for exclude_count in range( min( options.exclude_count, options.window_size ) ): - if last_exclude_indexes: - new_exclude_indexes = [] - for exclude_list in last_exclude_indexes: - for window_index in range( options.window_size ): - if window_index not in exclude_list: - new_exclude = sorted( exclude_list + [ window_index ] ) - if new_exclude not in exclude_window_indexes + new_exclude_indexes: - new_exclude_indexes.append( new_exclude ) - exclude_window_indexes += new_exclude_indexes - last_exclude_indexes = new_exclude_indexes - else: - for window_index in range( options.window_size ): - last_exclude_indexes.append( [ window_index ] ) - exclude_window_indexes = list( last_exclude_indexes ) - - out = fastqWriter( open( args[1], 'wb' ), format = options.format ) - action = ACTION_METHODS[ options.aggregation_action ] - - num_reads = None - num_reads_excluded = 0 - for num_reads, fastq_read in enumerate( fastqReader( open( args[0] ), format = options.format ) ): - for trim_end in options.trim_ends: - quality_list = fastq_read.get_decimal_quality_scores() - if trim_end == '5': - lwindow_position = 0 #left position of window - while True: - if lwindow_position >= len( quality_list ): - fastq_read.sequence = '' - fastq_read.quality = '' - break - if exclude_and_compare( action, quality_list[ lwindow_position:lwindow_position + options.window_size ], options.score_comparison, options.quality_score, exclude_window_indexes ): - fastq_read = fastq_read.slice( lwindow_position, None ) - break - lwindow_position += options.window_step - else: - rwindow_position = len( quality_list ) #right position of window - while True: - lwindow_position = rwindow_position - options.window_size #left position of window - if rwindow_position <= 0 or lwindow_position < 0: - fastq_read.sequence = '' - fastq_read.quality = '' - break - if exclude_and_compare( action, quality_list[ lwindow_position:rwindow_position ], options.score_comparison, options.quality_score, exclude_window_indexes ): - fastq_read = fastq_read.slice( None, rwindow_position ) - break - rwindow_position -= options.window_step - if options.keep_zero_length or len( fastq_read ): - out.write( fastq_read ) - else: - num_reads_excluded += 1 - out.close() - if num_reads is None: - print "No valid FASTQ reads could be processed." - else: - print "%i FASTQ reads were processed." % ( num_reads + 1 ) - if num_reads_excluded: - print "%i reads of zero length were excluded from the output." % num_reads_excluded - -if __name__ == "__main__": main() |
b |
diff -r 25c24379693a -r c64d534a763c fastq_trimmer_by_quality.xml --- a/fastq_trimmer_by_quality.xml Thu Feb 02 12:12:55 2017 -0500 +++ b/fastq_trimmer_by_quality.xml Sat Sep 30 13:56:36 2017 -0400 |
[ |
b'@@ -1,130 +1,131 @@\n-<tool id="fastq_quality_trimmer" name="FASTQ Quality Trimmer" version="1.0.1">\n- <description>by sliding window</description>\n- <requirements>\n- <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement>\n- </requirements>\n- <command>python \'$__tool_directory__/fastq_trimmer_by_quality.py\' \'$input_file\' \'$output_file\' -f \'${input_file.extension[len( \'fastq\' ):]}\' -s $window_size\n- -t $step_size -e $trim_ends -a $aggregation_action -x $exclude_count -c \'$score_comparison\' -q $quality_score\n- #if $keep_zero_length:\n- -k\n- #end if\n- </command>\n- <inputs>\n- <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>\n- <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" checked="false"/>\n- <param name="trim_ends" type="select" label="Trim ends">\n- <option value="53" selected="True">5\' and 3\'</option>\n- <option value="5">5\' only</option>\n- <option value="3">3\' only</option>\n- </param>\n- <param name="window_size" type="integer" value="1" label="Window size"/>\n- <param name="step_size" type="integer" value="1" label="Step Size" />\n- <param name="exclude_count" label="Maximum number of bases to exclude from the window during aggregation" value="0" type="integer" />\n- <param name="aggregation_action" type="select" label="Aggregate action for window">\n- <option value="min" selected="True">min score</option>\n- <option value="max">max score</option>\n- <option value="sum">sum of scores</option>\n- <option value="mean">mean of scores</option>\n- </param>\n- <param name="score_comparison" type="select" label="Trim until aggregate score is">\n- <sanitizer>\n- <valid initial="none">\n- <add value="<>=!"/> <!-- only allow lt, gt, e, le, ge, ne for this parameter; will be single-quote escaped on commandline -->\n- </valid>\n- </sanitizer>\n- <option value=">">></option>\n- <option value=">=" selected="true">>=</option>\n- <option value="==">==</option>\n- <option value="!=">!=</option>\n- <option value="<"><</option>\n- <option value="<="><=</option>\n- </param>\n- <param name="quality_score" label="Quality Score" value="0" type="float" />\n- </inputs>\n- <outputs>\n- <data name="output_file" format_source="input_file" />\n- </outputs>\n- <tests>\n- <test>\n- <!-- Trim until window size 1 >= 20;both ends -->\n- <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n- <param name="keep_zero_length" value="false" />\n- <param name="trim_ends" value="53"/>\n- <param name="window_size" value="1"/>\n- <param name="step_size" value="1"/>\n- <param name="exclude_count" value="0"/>\n- <param name="aggregation_action" value="min"/>\n- <param name="score_comparison" value=">="/>\n- <param name="quality_score" value="20"/>\n- <output name="output_file" file="sanger_full_range_quality_trimmed_out_1.fastqsanger" />\n- </test>\n- <test>\n- <!-- Trim until window size 1 >= 20; 5\' end only -->\n- <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n- <param name="keep_zero_length" value="false" />\n- <param name="trim_ends" value="5"/>\n- <param name="window_size" value="1"/>\n- <param name="step_size" value="1"/>\n- <param name="exclude_count" value="0"/>\n- <param name="aggregation_action" value="min"/>\n- <param name="score_comparison" value=">="/>\n- <param name="quality_score" value="20"/>\n- <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" />\n- </test>\n- <test>\n- <!-- Trim until window size 1 >= 20; 3\' end only -->\n- <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n- <param na'..b'param name="exclude_count" value="0"/>\n+ <param name="aggregation_action" value="min"/>\n+ <param name="score_comparison" value=">="/>\n+ <param name="quality_score" value="20"/>\n+ <output name="output_file" file="sanger_full_range_quality_trimmed_out_2.fastqsanger" ftype="fastqsanger" />\n+ </test>\n+ <!-- Trim until window size 1 >= 20; 3\' end only -->\n+ <test>\n+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+ <param name="keep_zero_length" value="false" />\n+ <param name="trim_ends" value="3"/>\n+ <param name="window_size" value="1"/>\n+ <param name="step_size" value="1"/>\n+ <param name="exclude_count" value="0"/>\n+ <param name="aggregation_action" value="min"/>\n+ <param name="score_comparison" value=">="/>\n+ <param name="quality_score" value="20"/>\n+ <output name="output_file" file="sanger_full_range_quality_trimmed_out_3.fastqsanger" ftype="fastqsanger" />\n+ </test>\n+ <!-- Trim until window size 2 >= 1;both ends, 1 deviant score -->\n+ <test>\n+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+ <param name="keep_zero_length" value="false" />\n+ <param name="trim_ends" value="53"/>\n+ <param name="window_size" value="2"/>\n+ <param name="step_size" value="1"/>\n+ <param name="exclude_count" value="1"/>\n+ <param name="aggregation_action" value="min"/>\n+ <param name="score_comparison" value=">="/>\n+ <param name="quality_score" value="1"/>\n+ <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+ </test>\n+ <!-- Trim entire sequences; keep empty reads -->\n+ <test>\n+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+ <param name="keep_zero_length" value="true" />\n+ <param name="trim_ends" value="53"/>\n+ <param name="window_size" value="1"/>\n+ <param name="step_size" value="1"/>\n+ <param name="exclude_count" value="0"/>\n+ <param name="aggregation_action" value="min"/>\n+ <param name="score_comparison" value=">="/>\n+ <param name="quality_score" value="999"/>\n+ <output name="output_file" file="sanger_full_range_empty_reads.fastqsanger" ftype="fastqsanger" />\n+ </test>\n+ <!-- Trim entire sequences; discard empty reads -->\n+ <test>\n+ <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />\n+ <param name="keep_zero_length" value="false" />\n+ <param name="trim_ends" value="53"/>\n+ <param name="window_size" value="1"/>\n+ <param name="step_size" value="1"/>\n+ <param name="exclude_count" value="0"/>\n+ <param name="aggregation_action" value="min"/>\n+ <param name="score_comparison" value=">="/>\n+ <param name="quality_score" value="999"/>\n+ <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n **What it does**\n \n This tool allows you to trim the ends of reads based upon the aggregate value of quality scores found within a sliding window; a sliding window of size 1 is equivalent to \'simple\' trimming of the ends.\n@@ -138,8 +139,8 @@\n .. class:: warningmark\n \n Trimming a color space read will cause any adapter base to be lost.\n- </help>\n- <citations>\n- <citation type="doi">10.1093/bioinformatics/btq281</citation>\n- </citations>\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1093/bioinformatics/btq281</citation>\n+ </citations>\n </tool>\n' |