# HG changeset patch
# User devteam
# Date 1506797874 14400
# Node ID 06934412f56d21ac4658527c87c1f114f7fac0c5
# Parent b957f55f3955a54e93ad6806e57554ff07e1ab68
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_filter commit f2582539542b33240234e8ea6093e25d0aee9b6a
diff -r b957f55f3955 -r 06934412f56d fastq_filter.py
--- a/fastq_filter.py Wed Nov 11 12:40:42 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
- #Read command line arguments
- input_filename = sys.argv[1]
- script_filename = sys.argv[2]
- output_filename = sys.argv[3]
- additional_files_path = sys.argv[4]
- input_type = sys.argv[5] or 'sanger'
-
- #Save script file for debuging/verification info later
- os.mkdir( additional_files_path )
- shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) )
-
- ## Dan, Others: Can we simply drop the "format=input_type" here since it is specified in reader.
- ## This optimization would cut runtime roughly in half (for my test case anyway). -John
- out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-
- i = None
- reads_kept = 0
- execfile(script_filename, globals())
- for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
- ret_val = fastq_read_pass_filter( fastq_read ) ## fastq_read_pass_filter defined in script_filename
- if ret_val:
- out.write( fastq_read )
- reads_kept += 1
- out.close()
- if i is None:
- print "Your file contains no valid fastq reads."
- else:
- print 'Kept %s of %s reads (%.2f%%).' % ( reads_kept, i + 1, float( reads_kept ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
- main()
diff -r b957f55f3955 -r 06934412f56d fastq_filter.xml
--- a/fastq_filter.xml Wed Nov 11 12:40:42 2015 -0500
+++ b/fastq_filter.xml Sat Sep 30 14:57:54 2017 -0400
@@ -1,294 +1,299 @@
-
- reads by quality score and length
-
- galaxy_sequence_utils
-
- fastq_filter.py $input_file $fastq_filter_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- int( float( value ) ) == float( value )
-
-
-
- int( float( value ) ) == float( value )
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-def fastq_read_pass_filter( fastq_read ):
- def mean( score_list ):
- return float( sum( score_list ) ) / float( len( score_list ) )
- if len( fastq_read ) < $min_size:
+
+ reads by quality score and length
+
+ galaxy_sequence_utils
+
+
+
+ 0 and len(fastq_read) > $max_size:
return False
num_deviates = $max_num_deviants
qual_scores = fastq_read.get_decimal_quality_scores()
for qual_score in qual_scores:
- if qual_score < $min_quality or ( $max_quality > 0 and qual_score > $max_quality ):
+ if qual_score < $min_quality or ($max_quality > 0 and qual_score > $max_quality):
if num_deviates == 0:
return False
else:
num_deviates -= 1
#if not $paired_end:
- qual_scores_split = [ qual_scores ]
+ qual_scores_split = [qual_scores]
#else:
- qual_scores_split = [ qual_scores[ 0:int( len( qual_scores ) / 2 ) ], qual_scores[ int( len( qual_scores ) / 2 ): ] ]
+ qual_scores_split = [qual_scores[0:int(len(qual_scores) / 2)], qual_scores[int(len(qual_scores) / 2): ]]
#end if
#for $fastq_filter in $fastq_filters:
for split_scores in qual_scores_split:
- left_column_offset = $fastq_filter[ 'offset_type' ][ 'left_column_offset' ]
- right_column_offset = $fastq_filter[ 'offset_type' ][ 'right_column_offset' ]
-#if $fastq_filter[ 'offset_type' ]['base_offset_type'] == 'offsets_percent':
- left_column_offset = int( round( float( left_column_offset ) / 100.0 * float( len( split_scores ) ) ) )
- right_column_offset = int( round( float( right_column_offset ) / 100.0 * float( len( split_scores ) ) ) )
+ left_column_offset = $fastq_filter['offset_type']['left_column_offset']
+ right_column_offset = $fastq_filter['offset_type']['right_column_offset']
+#if $fastq_filter['offset_type']['base_offset_type'] == 'offsets_percent':
+ left_column_offset = int(round(float(left_column_offset) / 100.0 * float(len(split_scores))))
+ right_column_offset = int(round(float(right_column_offset) / 100.0 * float(len(split_scores))))
#end if
if right_column_offset > 0:
- split_scores = split_scores[ left_column_offset:-right_column_offset]
+ split_scores = split_scores[left_column_offset:-right_column_offset]
else:
- split_scores = split_scores[ left_column_offset:]
- if split_scores: ##if a read doesn't have enough columns, it passes by default
- if not ( ${fastq_filter[ 'score_operation' ]}( split_scores ) $fastq_filter[ 'score_comparison' ] $fastq_filter[ 'score' ] ):
+ split_scores = split_scores[left_column_offset:]
+ if split_scores: ##if a read doesn't have enough columns, it passes by default
+ if not (${fastq_filter['score_operation']}(split_scores) $fastq_filter['score_comparison'] $fastq_filter['score']):
return False
#end for
return True
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
- 10.1093/bioinformatics/btq281
-
-
+ ]]>
+
+ 10.1093/bioinformatics/btq281
+
diff -r b957f55f3955 -r 06934412f56d test-data/sanger_full_range_original_sanger.fastqsanger.gz
Binary file test-data/sanger_full_range_original_sanger.fastqsanger.gz has changed
diff -r b957f55f3955 -r 06934412f56d tool_dependencies.xml
--- a/tool_dependencies.xml Wed Nov 11 12:40:42 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-
-
-
-
-
-