changeset 2:4ac14b275aca draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_manipulation commit f2582539542b33240234e8ea6093e25d0aee9b6a
author devteam
date Sat, 30 Sep 2017 14:58:21 -0400
parents bb07615a5b6a
children 7861f3b10c68
files fastq_manipulation.py fastq_manipulation.xml test-data/sanger_full_range_as_rna.fastqsanger.bz2 test-data/sanger_full_range_as_rna.fastqsanger.gz tool_dependencies.xml
diffstat 5 files changed, 350 insertions(+), 386 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_manipulation.py	Wed Nov 11 12:41:10 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#Dan Blankenberg
-import sys, os, shutil
-import imp
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
-    #Read command line arguments
-    input_filename = sys.argv[1]
-    script_filename = sys.argv[2]
-    output_filename = sys.argv[3]
-    additional_files_path = sys.argv[4]
-    input_type = sys.argv[5] or 'sanger'
-    
-    #Save script file for debuging/verification info later
-    os.mkdir( additional_files_path )
-    shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) )
-    
-    fastq_manipulator = imp.load_module( 'fastq_manipulator', open( script_filename ), script_filename, ( '', 'r', imp.PY_SOURCE ) )
-    
-    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-    
-    i = None
-    reads_manipulated = 0
-    for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        new_read = fastq_manipulator.match_and_manipulate_read( fastq_read )
-        if new_read:
-            out.write( new_read )
-        if new_read != fastq_read:
-            reads_manipulated += 1
-    out.close()
-    if i is None:
-        print "Your file contains no valid FASTQ reads."
-    else:
-        print 'Manipulated %s of %s reads (%.2f%%).' % ( reads_manipulated, i + 1, float( reads_manipulated ) / float( i + 1 ) * 100.0 )
-
-if __name__ == "__main__":
-    main()
--- a/fastq_manipulation.xml	Wed Nov 11 12:41:10 2015 -0500
+++ b/fastq_manipulation.xml	Sat Sep 30 14:58:21 2017 -0400
@@ -1,199 +1,54 @@
-<tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.1">
-  <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
-  <requirements>
-    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
-  </requirements>
-  <description>reads on various attributes</description>
-  <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
-    <page>
-      <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/>
-      <!-- Match Reads -->
-      <repeat name="match_blocks" title="Match Reads">
-        <conditional name="match_type">
-          <param name="match_type_selector" type="select" label="Match Reads by">
-            <option value="identifier">Name/Identifier</option>
-            <option value="sequence">Sequence Content</option>
-            <option value="quality">Quality Score Content</option>
-          </param>
-          <when value="identifier">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Identifier Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-          <when value="sequence">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Sequence Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-          <when value="quality">
-            <conditional name="match">
-              <param name="match_selector" type="select" label="Quality Match Type">
-                <option value="regex">Regular Expression</option>
-              </param>
-              <when value="regex">
-                <param type="text" name="match_by" label="Match by" value=".*" />
-              </when>
-            </conditional>
-          </when>
-        </conditional>
-      </repeat>
-      <!-- Manipulate Matched Reads -->
-      <repeat name="manipulate_blocks" title="Manipulate Reads">
-        <conditional name="manipulation_type">
-          <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
-            <option value="identifier">Name/Identifier</option>
-            <option value="sequence">Sequence Content</option>
-            <option value="quality">Quality Score Content</option>
-            <option value="miscellaneous">Miscellaneous Actions</option>
-          </param>
-          <when value="identifier">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
-                <option value="translate">String Translate</option>
-              </param>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-            </conditional>
-          </when>
-          <when value="sequence">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
-                <option value="rev_comp">Reverse Complement</option>
-                <option value="rev_no_comp">Reverse, No Complement</option>
-                <option value="no_rev_comp">Complement, No Reverse</option>
-                <option value="trim">Trim</option>
-                <option value="dna_to_rna">DNA to RNA</option>
-                <option value="rna_to_dna">RNA to DNA</option>
-                <option value="translate">String Translate</option>
-                <option value="change_adapter">Change Adapter Base</option>
-              </param>
-              <when value="rev_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="rev_no_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="no_rev_comp">
-                <!-- no extra settings -->
-              </when>
-              <when value="trim">
-                <conditional name="offset_type">
-                  <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
-                    <option value="offsets_absolute" selected="true">Absolute Values</option>
-                    <option value="offsets_percent">Percentage of Read Length</option>
-                  </param>
-                  <when value="offsets_absolute">
-                    <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
-                      <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-                      <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-                    </param>
-                    <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
-                      <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-                      <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-                    </param>
-                  </when>
-                  <when value="offsets_percent">
-                    <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
-                      <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-                    </param>
-                    <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
-                      <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-                    </param>
-                  </when>
-                </conditional>
-                <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
-              </when>
-              <when value="dna_to_rna">
-                <!-- no extra settings -->
-              </when>
-              <when value="rna_to_dna">
-                <!-- no extra settings -->
-              </when>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-              <when value="change_adapter">
-                <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" />
-              </when>
-            </conditional>
-          </when>
-          <when value="quality">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
-                <option value="translate">String Translate</option>
-                <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
-              </param>
-              <when value="translate">
-                <param name="from" type="text" label="From" value="" />
-                <param name="to" type="text" label="To" value="" />
-              </when>
-              <when value="modify_each_score">
-                <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
-              </when>
-            </conditional>
-          </when>
-          <when value="miscellaneous">
-            <conditional name="manipulation">
-              <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
-                <option value="remove">Remove Read</option>
-              </param>
-              <when value="remove">
-                <!-- no extra settings -->
-              </when>
-            </conditional>
-          </when>
-        </conditional>
-      </repeat>
-    </page>
-  </inputs>
-  <configfiles>
-    <configfile name="fastq_manipulation_file">##create an importable module
+<tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.1.1">
+    <options sanitize="false" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters -->
+    <description>reads on various attributes</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-manipulation '$input_file' '$fastq_manipulation_file' '$output_file' '$output_file.files_path' '${input_file.extension[len('fastq'):]}'
+    ]]></command>
+    <configfiles>
+        <configfile name="fastq_manipulation_file"><![CDATA[##create an importable module
 #import binascii
+import binascii
 import re
-import binascii
-from string import maketrans
+import six
+
+if six.PY2:
+    from string import maketrans
+else:
+    maketrans = str.maketrans
+
+
 ##does read match
-def match_read( fastq_read ):
+def match_read(fastq_read):
     #for $match_block in $match_blocks:
         #if $match_block['match_type']['match_type_selector'] == 'identifier':
-    search_target = fastq_read.identifier[1:] ##don't include @
+    search_target = fastq_read.identifier[1:]  ##don't include @
         #elif $match_block['match_type']['match_type_selector'] == 'sequence':
     search_target = fastq_read.sequence
         #elif $match_block['match_type']['match_type_selector'] == 'quality':
     search_target = fastq_read.quality
         #else:
-        #continue
+            #continue
         #end if
-    if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target  ):
+    if not re.search(binascii.unhexlify("${ binascii.hexlify(str(match_block['match_type']['match']['match_by'])) }").decode(), search_target):
         return False
     #end for
     return True
+
+
 ##modify matched reads
-def manipulate_read( fastq_read ):
+def manipulate_read(fastq_read):
     new_read = fastq_read.clone()
     #for $manipulate_block in $manipulate_blocks:
         #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier':
             #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
+    new_read.identifier = "@%s" % new_read.identifier[1:].translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
             #end if
         #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence':
             #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
+    new_read.sequence = new_read.sequence.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp':
     new_read = new_read.reverse_complement()
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp':
@@ -202,18 +57,18 @@
     new_read = new_read.complement()
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim':
                 #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent':
-    left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
-    right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) )
+    left_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }) / 100.0 * float(len(new_read))))
+    right_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }) / 100.0 * float(len(new_read))))
                 #else
     left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }
     right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }
                 #end if
-    if right_column_offset > 0:
+    if right_column_offset != 0:
         right_column_offset = -right_column_offset
     else:
         right_column_offset = None
-    new_read = new_read.slice( left_column_offset, right_column_offset )
-    if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ):
+    new_read = new_read.slice(left_column_offset, right_column_offset)
+    if not (${str(manipulate_block['manipulation_type']['manipulation']['keep_zero_length']) == 'keep_zero_length'} or len(new_read)):
         return None
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna':
     new_read = new_read.sequence_as_DNA()
@@ -221,181 +76,335 @@
     new_read = new_read.sequence_as_RNA()
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter':
     if new_read.sequence_space == 'color':
-        new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) )
+        new_read = new_read.change_adapter(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['new_adapter'])) }").decode())
             #end if
         #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality':
             #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate':
-    new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) )
+    new_read.quality = new_read.quality.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode()))
             #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score':
-    def score_method( score ):
-        raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
-        new_read.quality_map( score_method )
+    def score_method(score):
+        raise Exception("Unimplemented")  ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions
+        new_read.quality_map(score_method)
             #end if
         #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous':
             #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove':
     return None
             #end if
         #else:
-        #continue
+            #continue
         #end if
     #end for
     if new_read.description != "+":
-        new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid
+        new_read.description = "+%s" % new_read.identifier[1:]  ##ensure description is still valid
     return new_read
-def match_and_manipulate_read( fastq_read ):
+
+
+def match_and_manipulate_read(fastq_read):
     new_read = fastq_read
-    if match_read( fastq_read ):
-        new_read = manipulate_read( fastq_read )
+    if match_read(fastq_read):
+        new_read = manipulate_read(fastq_read)
     return new_read
-</configfile>
-  </configfiles>
-  <outputs>
-    <data format="input" name="output_file" />
-  </outputs>
-  <tests>
-    <!-- match all and do nothing -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="identifier" />
-      <param name="manipulation_selector" value="translate" />
-      <param name="from" value="" />
-      <param name="to" value="" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match None and do nothing -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="STRINGDOESNOTEXIST" />
-      <param name="manipulation_type_selector" value="identifier" />
-      <param name="manipulation_selector" value="translate" />
-      <param name="from" value="" />
-      <param name="to" value="" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and remove -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="miscellaneous" />
-      <param name="manipulation_selector" value="remove" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <!-- match None and remove -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="STRINGDOESNOTEXIST" />
-      <param name="manipulation_type_selector" value="miscellaneous" />
-      <param name="manipulation_selector" value="remove" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and trim to 4 inner-most bases -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="trim" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="45"/>
-      <param name="right_column_offset" value="45"/>
-      <param name="keep_zero_length" value="true" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="trim" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="47.87"/>
-      <param name="right_column_offset" value="47.87"/>
-      <param name="keep_zero_length" value="true" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <!-- match all and rev comp -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" />
-    </test>
-    <!-- match all and rev comp, with ambiguous DNA -->
-    <test>
-      <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" />
-    </test>
-    <!-- match all and rev comp, with ambiguous RNA -->
-    <test>
-      <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" />
-    </test>
-    <!-- match first seq and rev comp -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="FAKE0001" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" />
-    </test>
-    <!-- match first seq and rev comp: i.e. undo above -->
-    <test>
-      <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value="FAKE0001" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rev_comp" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- match all and DNA to RNA -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="dna_to_rna" />
-      <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" />
-    </test>
-    <!-- match all and RNA to DNA -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
-      <param name="match_type_selector" value="identifier" />
-      <param name="match_selector" value="regex" />
-      <param name="match_by" value=".*" />
-      <param name="manipulation_type_selector" value="sequence" />
-      <param name="manipulation_selector" value="rna_to_dna" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-  </tests>
-<help>
+        ]]></configfile>
+    </configfiles>
+    <inputs>
+        <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility -->
+        <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer"/>
+        <!-- Match Reads -->
+        <repeat name="match_blocks" title="Match Reads">
+            <conditional name="match_type">
+                <param name="match_type_selector" type="select" label="Match Reads by">
+                    <option value="identifier">Name/Identifier</option>
+                    <option value="sequence">Sequence Content</option>
+                    <option value="quality">Quality Score Content</option>
+                </param>
+                <when value="identifier">
+                    <conditional name="match">
+                        <param name="match_selector" type="select" label="Identifier Match Type">
+                            <option value="regex">Regular Expression</option>
+                        </param>
+                        <when value="regex">
+                            <param name="match_by" type="text" value=".*" label="Match by" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="sequence">
+                    <conditional name="match">
+                        <param name="match_selector" type="select" label="Sequence Match Type">
+                            <option value="regex">Regular Expression</option>
+                        </param>
+                        <when value="regex">
+                            <param name="match_by" type="text" value=".*" label="Match by" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="quality">
+                    <conditional name="match">
+                        <param name="match_selector" type="select" label="Quality Match Type">
+                            <option value="regex">Regular Expression</option>
+                        </param>
+                        <when value="regex">
+                            <param name="match_by" type="text" value=".*" label="Match by" />
+                        </when>
+                    </conditional>
+                </when>
+            </conditional>
+        </repeat>
+        <!-- Manipulate Matched Reads -->
+        <repeat name="manipulate_blocks" title="Manipulate Reads">
+            <conditional name="manipulation_type">
+                <param name="manipulation_type_selector" type="select" label="Manipulate Reads on">
+                    <option value="identifier">Name/Identifier</option>
+                    <option value="sequence">Sequence Content</option>
+                    <option value="quality">Quality Score Content</option>
+                    <option value="miscellaneous">Miscellaneous Actions</option>
+                </param>
+                <when value="identifier">
+                    <conditional name="manipulation">
+                        <param name="manipulation_selector" type="select" label="Identifier Manipulation Type">
+                            <option value="translate">String Translate</option>
+                        </param>
+                        <when value="translate">
+                            <param name="from" type="text" value="" label="From" />
+                            <param name="to" type="text" value="" label="To" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="sequence">
+                    <conditional name="manipulation">
+                        <param name="manipulation_selector" type="select" label="Sequence Manipulation Type">
+                            <option value="rev_comp">Reverse Complement</option>
+                            <option value="rev_no_comp">Reverse, No Complement</option>
+                            <option value="no_rev_comp">Complement, No Reverse</option>
+                            <option value="trim">Trim</option>
+                            <option value="dna_to_rna">DNA to RNA</option>
+                            <option value="rna_to_dna">RNA to DNA</option>
+                            <option value="translate">String Translate</option>
+                            <option value="change_adapter">Change Adapter Base</option>
+                        </param>
+                        <when value="rev_comp" />
+                        <when value="rev_no_comp" />
+                        <when value="no_rev_comp" />
+                        <when value="trim">
+                            <conditional name="offset_type">
+                                <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
+                                    <option value="offsets_absolute" selected="true">Absolute Values</option>
+                                    <option value="offsets_percent">Percentage of Read Length</option>
+                                </param>
+                                <when value="offsets_absolute">
+                                    <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" />
+                                    <param name="right_column_offset" type="integer" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position" />
+                                </when>
+                                <when value="offsets_percent">
+                                    <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" />
+                                    <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" />
+                                </when>
+                            </conditional>
+                            <param name="keep_zero_length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" checked="false" label="Keep reads with zero length" />
+                        </when>
+                        <when value="dna_to_rna" />
+                        <when value="rna_to_dna" />
+                        <when value="translate">
+                            <param name="from" type="text" value="" label="From" />
+                            <param name="to" type="text" value="" label="To" />
+                        </when>
+                        <when value="change_adapter">
+                            <param name="new_adapter" type="text" value="G" label="New adapter" help="An empty string will remove the adapter base" />
+                        </when>
+                    </conditional>
+                </when>
+                <when value="quality">
+                    <conditional name="manipulation">
+                        <param name="manipulation_selector" type="select" label="Quality Manipulation Type">
+                            <option value="translate">String Translate</option>
+                            <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet-->
+                        </param>
+                        <when value="translate">
+                            <param name="from" type="text" value="" label="From" />
+                            <param name="to" type="text" value="" label="To" />
+                        </when>
+                        <!-- <when value="modify_each_score">
+                            <param name="map_score" type="text" label="Modify Score by" value="$score + 1" />
+                        </when> -->
+                    </conditional>
+                </when>
+                <when value="miscellaneous">
+                    <conditional name="manipulation">
+                        <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type">
+                            <option value="remove">Remove Read</option>
+                        </param>
+                        <when value="remove" />
+                    </conditional>
+                </when>
+            </conditional>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="output_file" format_source="input_file" />
+    </outputs>
+    <tests>
+        <!-- match all and do nothing -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="identifier" />
+            <param name="manipulation_selector" value="translate" />
+            <param name="from" value="" />
+            <param name="to" value="" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match None and do nothing -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value="STRINGDOESNOTEXIST" />
+            <param name="manipulation_type_selector" value="identifier" />
+            <param name="manipulation_selector" value="translate" />
+            <param name="from" value="" />
+            <param name="to" value="" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and remove -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="miscellaneous" />
+            <param name="manipulation_selector" value="remove" />
+            <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
+        </test>
+        <!-- match None and remove -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value="STRINGDOESNOTEXIST" />
+            <param name="manipulation_type_selector" value="miscellaneous" />
+            <param name="manipulation_selector" value="remove" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and trim to 4 inner-most bases -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="trim" />
+            <param name="base_offset_type" value="offsets_absolute"/>
+            <param name="left_column_offset" value="45"/>
+            <param name="right_column_offset" value="45"/>
+            <param name="keep_zero_length" value="true" />
+            <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="trim" />
+            <param name="base_offset_type" value="offsets_percent"/>
+            <param name="left_column_offset" value="47.87"/>
+            <param name="right_column_offset" value="47.87"/>
+            <param name="keep_zero_length" value="true" />
+            <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and rev comp -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rev_comp" />
+            <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and rev comp, with ambiguous DNA -->
+        <test>
+            <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rev_comp" />
+            <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and rev comp, with ambiguous RNA -->
+        <test>
+            <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rev_comp" />
+            <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match first seq and rev comp -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value="FAKE0001" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rev_comp" />
+            <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match first seq and rev comp: i.e. undo above -->
+        <test>
+            <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value="FAKE0001" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rev_comp" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and DNA to RNA -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="dna_to_rna" />
+            <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and RNA to DNA-->
+        <test>
+            <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rna_to_dna" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- match all and RNA to DNA (gz compressed) -->
+        <test>
+            <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.gz" ftype="fastqsanger.gz" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rna_to_dna" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.gz" decompress="true" />
+        </test>
+        <!-- match all and RNA to DNA (bz2 compressed) -->
+        <test>
+            <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.bz2" ftype="fastqsanger.bz2" />
+            <param name="match_type_selector" value="identifier" />
+            <param name="match_selector" value="regex" />
+            <param name="match_by" value=".*" />
+            <param name="manipulation_type_selector" value="sequence" />
+            <param name="manipulation_selector" value="rna_to_dna" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.bz2" decompress="true" />
+        </test>
+    </tests>
+    <help><![CDATA[
 This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified.
 
 Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information.
@@ -407,7 +416,6 @@
 
 Only color space reads can have adapter bases substituted.
 
-
 -----
 
 **Example**
@@ -416,12 +424,11 @@
 
 Steps:
 
-1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads). 
-2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field). 
+1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads).
+2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field).
 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN".
 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads.
-
-</help>
+    ]]></help>
     <citations>
         <citation type="doi">10.1093/bioinformatics/btq281</citation>
     </citations>
Binary file test-data/sanger_full_range_as_rna.fastqsanger.bz2 has changed
Binary file test-data/sanger_full_range_as_rna.fastqsanger.gz has changed
--- a/tool_dependencies.xml	Wed Nov 11 12:41:10 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="galaxy_sequence_utils" version="1.0.0">
-      <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>