Mercurial > repos > devteam > fastq_to_tabular

--- a/fastq_to_tabular.py	Wed Nov 11 12:42:45 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader
-
-def stop_err( msg ):
-    sys.stderr.write( msg )
-    sys.exit()
-
-def main():
-    if len(sys.argv) != 5:
-        stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]")
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    descr_split = int( sys.argv[3] ) - 1
-    if descr_split < 0:
-        stop_err("Bad description split value (should be 1 or more)")
-    input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary
-
-    num_reads = None
-    fastq_read = None
-    out = open( output_filename, 'wb' )
-    if descr_split == 0:
-        #Don't divide the description into multiple columns
-        for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-            out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) )
-    else:
-        for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-            words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split)
-            #pad with empty columns if required
-            words += [""]*(descr_split-len(words))
-            out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) )
-    out.close()
-    if num_reads is None:
-        print "No valid FASTQ reads could be processed."
-    else:
-        print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 )
-
-if __name__ == "__main__": main()
--- a/fastq_to_tabular.xml	Wed Nov 11 12:42:45 2015 -0500
+++ b/fastq_to_tabular.xml	Sat Sep 30 13:55:43 2017 -0400
@@ -1,40 +1,45 @@
-<tool id="fastq_to_tabular" name="FASTQ to Tabular" version="1.1.0">
-  <description>converter</description>
-  <requirements>
-    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
-  </requirements>
-  <command interpreter="python">fastq_to_tabular.py '$input_file' '$output_file' $descr_columns '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqillumina,fastqsolexa" label="FASTQ file to convert" />
-    <param name="descr_columns" type="integer" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column">
-      <validator type="in_range" min="1" />
-    </param>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="tabular" />
-  </outputs>
-  <tests>
-    <!-- basic test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="descr_columns" value="1"/>
-      <output name="output_file" file="fastq_to_tabular_out_1.tabular" />
-    </test>
-    <!-- color space test -->
-    <test>
-      <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" />
-      <param name="descr_columns" value="1"/>
-      <output name="output_file" file="fastq_to_tabular_out_2.tabular" />
-    </test>
-    <!-- split title into columns -->
-    <test>
-      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
-      <param name="descr_columns" value="2"/>
-      <output name="output_file" file="fastq_to_tabular_out_3.tabular" />
-    </test>
-  </tests>
-  <help>
-
+<tool id="fastq_to_tabular" name="FASTQ to Tabular" version="1.1.1">
+    <description>converter</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-to-tabular '$input_file' '$output_file' $descr_columns '${input_file.extension[len('fastq'):]}'
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqillumina,fastqsolexa,fastqsanger.gz,fastqcssanger.gz,fastqillumina.gz,fastqsolexa.gz,fastqsanger.bz2,fastqcssanger.bz2,fastqillumina.bz2,fastqsolexa.bz2" label="FASTQ file to convert" />
+        <param name="descr_columns" type="integer" min="1" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column" />
+    </inputs>
+    <outputs>
+        <data name="output_file" format="tabular" />
+    </outputs>
+    <tests>
+        <!-- basic test -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="descr_columns" value="1"/>
+            <output name="output_file" file="fastq_to_tabular_out_1.tabular" />
+        </test>
+        <!-- compression test -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz" />
+            <param name="descr_columns" value="1"/>
+            <output name="output_file" file="fastq_to_tabular_out_1.tabular" />
+        </test>
+        <!-- color space test -->
+        <test>
+            <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" />
+            <param name="descr_columns" value="1"/>
+            <output name="output_file" file="fastq_to_tabular_out_2.tabular" />
+        </test>
+        <!-- split title into columns -->
+        <test>
+            <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
+            <param name="descr_columns" value="2"/>
+            <output name="output_file" file="fastq_to_tabular_out_3.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What it does**

 This tool converts FASTQ sequencing reads to a Tabular file.
@@ -45,7 +50,7 @@

 Tab characters, if present in the source FASTQ title, will be converted to spaces.

------
+-----

 **Example**

@@ -59,16 +64,16 @@
  aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn
  +FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95]
  FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF
- D???:3104/76=:5...4.3,,,366////4&lt;ABBAAA=CCFDDDDDDDD:666CDFFFF=&lt;ABA=;:333111&lt;===9
+ D???:3104/76=:5...4.3,,,366////4<ABBAAA=CCFDDDDDDDD:666CDFFFF=<ABA=;:333111<===9
  9;B889FFFFFFDDBDBDDD=8844231..,,,-,,,,,,,,1133..---17111,,,,,22555131121.--.,333
  11,.,,3--,,.,,--,3511123..--!,,,,--,----9,,,,8=,,-,,,-,,,,---26:9:5-..1,,,,11//,
- ,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.030000,,,044400036;96662.//;7&gt;&lt;;!!!
+ ,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.030000,,,044400036;96662.//;7><;!!!
  @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]
  tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt
  taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn
  +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74]
- FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=&lt;4444@@B=555:BBBBB@@?8:8&lt;?&lt;89898&lt;84442;==3,,,514,,
- ,11,,,.,,21777555513,..--1115758.//34488&gt;&lt;&lt;;;;;9944/!/4,,,57855!!
+ FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<?<89898<84442;==3,,,514,,
+ ,11,,,.,,21777555513,..--1115758.//34488><<;;;;9944/!/4,,,57855!!

 By default this is converted into a 3 column tabular file, with the full FASTQ title used as column 1:

@@ -92,13 +97,8 @@
 ============== ============ ========== =========== ============= ============== =================== ============== ==============

 Note the sequences and quality strings have been truncated for display purposes in the above tables.
-
-------
-
-  </help>
-
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btq281</citation>
-  </citations>
-
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>
Binary file test-data/sanger_full_range_original_sanger.fastqsanger.gz has changed
--- a/tool_dependencies.xml	Wed Nov 11 12:42:45 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="galaxy_sequence_utils" version="1.0.0">
-      <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>