changeset 2:430b9da91435 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_trimmer commit f2582539542b33240234e8ea6093e25d0aee9b6a
author devteam
date Sat, 30 Sep 2017 13:55:56 -0400
parents e0cfb5a703ce
children 2d0d13b0b0f1
files fastq_trimmer.py fastq_trimmer.xml tool_dependencies.xml
diffstat 3 files changed, 90 insertions(+), 150 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_trimmer.py	Wed Nov 11 12:42:58 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    left_offset = sys.argv[3]
-    right_offset = sys.argv[4]
-    percent_offsets = sys.argv[5] == 'offsets_percent'
-    input_type = sys.argv[6] or 'sanger'
-    keep_zero_length = sys.argv[7] == 'keep_zero_length'
-    
-    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
-    num_reads_excluded = 0
-    num_reads = None
-    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        if percent_offsets:
-            left_column_offset = int( round( float( left_offset ) / 100.0 * float( len( fastq_read ) ) ) )
-            right_column_offset = int( round( float( right_offset ) / 100.0 * float( len( fastq_read ) ) ) )
-        else:
-            left_column_offset = int( left_offset )
-            right_column_offset = int( right_offset )
-        if right_column_offset > 0:
-            right_column_offset = -right_column_offset
-        else:
-            right_column_offset = None
-        fastq_read = fastq_read.slice( left_column_offset, right_column_offset )
-        if keep_zero_length or len( fastq_read ):
-            out.write( fastq_read )
-        else:
-            num_reads_excluded += 1
-    out.close()
-    if num_reads is None:
-        print "No valid fastq reads could be processed."
-    else:
-        print "%i fastq reads were processed." % ( num_reads + 1 )
-    if num_reads_excluded:
-        print "%i reads of zero length were excluded from the output." % num_reads_excluded
-
-if __name__ == "__main__": main()
--- a/fastq_trimmer.xml	Wed Nov 11 12:42:58 2015 -0500
+++ b/fastq_trimmer.xml	Sat Sep 30 13:55:56 2017 -0400
@@ -1,125 +1,112 @@
-<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
-  <description>by column</description>
-  <requirements>
-    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
-  </requirements>
-  <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
-    <conditional name="offset_type">
-      <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
-        <option value="offsets_absolute" selected="true">Absolute Values</option>
-        <option value="offsets_percent">Percentage of Read Length</option>
-      </param>
-      <when value="offsets_absolute">
-        <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
-          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-        </param>
-        <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
-          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
-          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
-        </param>
-      </when>
-      <when value="offsets_percent">
-        <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
-          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-        </param>
-        <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
-          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
-        </param>
-      </when>
-    </conditional>
-  <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="input" />
-  </outputs>
-  <tests>
-    <test>
-      <!-- Do nothing trim -->
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="0"/>
-      <param name="right_column_offset" value="0"/>
-      <param name="keep_zero_length" value="keep_zero_length" />
-      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
-    </test>
-    <!-- Trim to empty File -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="30"/>
-      <param name="right_column_offset" value="64"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="50"/>
-      <param name="right_column_offset" value="50"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="empty_file.dat" />
-    </test>
-    <!-- Trim to 4 inner-most bases -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_absolute"/>
-      <param name="left_column_offset" value="45"/>
-      <param name="right_column_offset" value="45"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="base_offset_type" value="offsets_percent"/>
-      <param name="left_column_offset" value="47.87"/>
-      <param name="right_column_offset" value="47.87"/>
-      <param name="keep_zero_length" value="exclude_zero_length" />
-      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
-    </test>
-  </tests>
-  <help>
+<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.1.1">
+    <description>by column</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-trimmer '$input_file' '$output_file' ${offset_type['left_column_offset']} ${offset_type['right_column_offset']} ${offset_type['base_offset_type']} '${input_file.extension[len('fastq'):]}' $keep_zero_length
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz" label="FASTQ file"/>
+        <conditional name="offset_type">
+            <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
+                <option value="offsets_absolute" selected="true">Absolute Values</option>
+                <option value="offsets_percent">Percentage of Read Length</option>
+            </param>
+            <when value="offsets_absolute">
+                <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" />
+                <param name="right_column_offset" type="integer" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position" />
+            </when>
+            <when value="offsets_percent">
+                <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" />
+                <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" />
+            </when>
+        </conditional>
+        <param name="keep_zero_length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" checked="false" label="Keep reads with zero length" />
+    </inputs>
+    <outputs>
+        <data name="output_file" format_source="input_file" />
+    </outputs>
+    <tests>
+        <test>
+            <!-- Do nothing trim -->
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="base_offset_type" value="offsets_absolute"/>
+            <param name="left_column_offset" value="0"/>
+            <param name="right_column_offset" value="0"/>
+            <param name="keep_zero_length" value="keep_zero_length" />
+            <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <!-- Trim to empty File -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="base_offset_type" value="offsets_absolute"/>
+            <param name="left_column_offset" value="30"/>
+            <param name="right_column_offset" value="64"/>
+            <param name="keep_zero_length" value="exclude_zero_length" />
+            <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="base_offset_type" value="offsets_percent"/>
+            <param name="left_column_offset" value="50"/>
+            <param name="right_column_offset" value="50"/>
+            <param name="keep_zero_length" value="exclude_zero_length" />
+            <output name="output_file" file="empty_file.dat" ftype="fastqsanger" />
+        </test>
+        <!-- Trim to 4 inner-most bases -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="base_offset_type" value="offsets_absolute"/>
+            <param name="left_column_offset" value="45"/>
+            <param name="right_column_offset" value="45"/>
+            <param name="keep_zero_length" value="exclude_zero_length" />
+            <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
+        </test>
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="base_offset_type" value="offsets_percent"/>
+            <param name="left_column_offset" value="47.87"/>
+            <param name="right_column_offset" value="47.87"/>
+            <param name="keep_zero_length" value="exclude_zero_length" />
+            <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What is does**
-    
+
 This tool allows you to trim the ends of reads.
 
-You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. 
+You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer.
 
 For example, if you have a read of length 36::
-  
+
   @Some FASTQ Sanger Read
   CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
   +
-  =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
-  
+  =@@.@;B-%?8>CBA@>7@7BBCA4-48%<;;%<B@
+
 And you set absolute offsets of 2 and 9::
-  
+
   @Some FASTQ Sanger Read
   ATATGTNCTCACTGATAAGTGGATA
   +
-  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
-  
+  @.@;B-%?8>CBA@>7@7BBCA4-4
+
 Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
-  
+
   @Some FASTQ Sanger Read
   ATATGTNCTCACTGATAAGTGGATATN
   +
-  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
-  
+  @.@;B-%?8>CBA@>7@7BBCA4-48%
+
 -----
 
 .. class:: warningmark
 
 Trimming a color space read will cause any adapter base to be lost.
-
-------
-
-  </help>
-  
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btq281</citation>
-  </citations>
-  
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>
--- a/tool_dependencies.xml	Wed Nov 11 12:42:58 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="galaxy_sequence_utils" version="1.0.0">
-      <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>