Repository 'fastq_stats'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/fastq_stats

Changeset 2:e2cf940128d5 (2017-09-30)
Previous changeset 1:daaf552153fe (2015-11-11) Next changeset 3:b1cea1b2fcd0 (2019-11-01)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_stats commit f2582539542b33240234e8ea6093e25d0aee9b6a
modified:
fastq_stats.xml
test-data/fastq_stats_1_out.tabular
removed:
fastq_stats.py
tool_dependencies.xml
b
diff -r daaf552153fe -r e2cf940128d5 fastq_stats.py
--- a/fastq_stats.py Wed Nov 11 12:42:31 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,48 +0,0 @@
-#Dan Blankenberg
-import sys
-from galaxy_utils.sequence.fastq import fastqReader, fastqAggregator
-
-VALID_NUCLEOTIDES = [ 'A', 'C', 'G', 'T', 'N' ]
-VALID_COLOR_SPACE = map( str, range( 7 ) ) + [ '.' ]
-SUMMARY_STAT_ORDER = ['read_count', 'min_score', 'max_score', 'sum_score', 'mean_score', 'q1', 'med_score', 'q3', 'iqr', 'left_whisker', 'right_whisker' ]
-
-def main():
-    input_filename = sys.argv[1]
-    output_filename = sys.argv[2]
-    input_type = sys.argv[3] or 'sanger'
-    
-    aggregator = fastqAggregator()
-    num_reads = None
-    fastq_read = None
-    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
-        aggregator.consume_read( fastq_read )
-    out = open( output_filename, 'wb' )
-    valid_nucleotides = VALID_NUCLEOTIDES
-    if fastq_read:
-        if fastq_read.sequence_space == 'base':
-            out.write( '#column\tcount\tmin\tmax\tsum\tmean\tQ1\tmed\tQ3\tIQR\tlW\trW\toutliers\tA_Count\tC_Count\tG_Count\tT_Count\tN_Count\tother_bases\tother_base_count\n' )
-        else:
-            out.write( '#column\tcount\tmin\tmax\tsum\tmean\tQ1\tmed\tQ3\tIQR\tlW\trW\toutliers\t0_Count\t1_Count\t2_Count\t3_Count\t4_Count\t5_Count\t6_Count\t._Count\tother_bases\tother_base_count\n' )
-            valid_nucleotides = VALID_COLOR_SPACE
-    for i in range( aggregator.get_max_read_length() ):
-        column_stats = aggregator.get_summary_statistics_for_column( i )
-        out.write( '%i\t' % ( i + 1 ) )
-        out.write( '%s\t' * len( SUMMARY_STAT_ORDER ) % tuple( [ column_stats[ key ] for key in SUMMARY_STAT_ORDER ] ) )
-        out.write( '%s\t' % ','.join( map( str, column_stats['outliers'] ) ) )
-        base_counts = aggregator.get_base_counts_for_column( i )
-        for nuc in valid_nucleotides:
-            out.write( "%s\t" % base_counts.get( nuc, 0 ) )
-        extra_nucs = sorted( [ nuc for nuc in base_counts.keys() if nuc not in valid_nucleotides ] )
-        out.write( "%s\t%s\n" % ( ','.join( extra_nucs ), ','.join( str( base_counts[nuc] ) for nuc in extra_nucs ) ) )
-    out.close()
-    if num_reads is None:
-        print "No valid fastq reads could be processed."
-    else:
-        print "%i fastq reads were processed." % ( num_reads + 1 )
-        print "Based upon quality values and sequence characters, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() ) or "None" )
-        ascii_range = aggregator.get_ascii_range()
-        decimal_range =  aggregator.get_decimal_range()
-        print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed
-        print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] )
-
-if __name__ == "__main__": main()
b
diff -r daaf552153fe -r e2cf940128d5 fastq_stats.xml
--- a/fastq_stats.xml Wed Nov 11 12:42:31 2015 -0500
+++ b/fastq_stats.xml Sat Sep 30 13:55:30 2017 -0400
[
@@ -1,25 +1,27 @@
-<tool id="fastq_stats" name="FASTQ Summary Statistics" version="1.0.0">
-  <description>by column</description>
-  <requirements>
-    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
-  </requirements>
-  <command interpreter="python">fastq_stats.py '$input_file' '$output_file' '${input_file.extension[len( 'fastq' ):]}'</command>
-  <inputs>
-    <param name="input_file" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger" label="FASTQ File"/>
-  </inputs>
-  <outputs>
-    <data name="output_file" format="tabular" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="input_file" value="fastq_stats1.fastq" ftype="fastqsanger" />
-      <output name="output_file" file="fastq_stats_1_out.tabular" />
-    </test>
-  </tests>
-  <help>
+<tool id="fastq_stats" name="FASTQ Summary Statistics" version="1.1.1">
+    <description>by column</description>
+    <requirements>
+        <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement>
+    </requirements>
+    <command><![CDATA[
+gx-fastq-stats '$input_file' '$output_file' '${input_file.extension[len('fastq'):]}'
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="fastqsanger,fastqillumina,fastqsolexa,fastqcssanger,fastqsanger.gz,fastqillumina.gz,fastqsolexa.gz,fastqcssanger.gz,fastqsanger.bz2,fastqillumina.bz2,fastqsolexa.bz2,fastqcssanger.bz2" label="FASTQ File"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="fastq_stats1.fastq" ftype="fastqsanger" />
+            <output name="output_file" file="fastq_stats_1_out.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
 **What is does**
 
-This tool creates summary statistics on a FASTQ file. 
+This tool creates summary statistics on a FASTQ file.
 
 .. class:: infomark
 
@@ -53,24 +55,19 @@
 For example::
 
   #column   count   min max sum mean    Q1  med Q3  IQR lW  rW  outliers    A_Count C_Count G_Count T_Count N_Count other_bases other_base_count
-  1   14336356    2   33  450600675   31.4306281875   32.0    33.0    33.0    1.0 31  33  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30    4482314 2199633 4425957 3208745 19707       
-  2   14336356    2   34  441135033   30.7703737965   30.0    33.0    33.0    3.0 26  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25   4419184 2170537 4627987 3118567 81      
-  3   14336356    2   34  433659182   30.2489127642   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4310988 2941988 3437467 3645784 129     
-  4   14336356    2   34  433635331   30.2472490917   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4110637 3007028 3671749 3546839 103     
-  5   14336356    2   34  432498583   30.167957813    29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4348275 2935903 3293025 3759029 124     
+  1   14336356    2   33  450600675   31.4306281875   32.0    33.0    33.0    1.0 31  33  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30    4482314 2199633 4425957 3208745 19707
+  2   14336356    2   34  441135033   30.7703737965   30.0    33.0    33.0    3.0 26  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25   4419184 2170537 4627987 3118567 81
+  3   14336356    2   34  433659182   30.2489127642   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4310988 2941988 3437467 3645784 129
+  4   14336356    2   34  433635331   30.2472490917   29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4110637 3007028 3671749 3546839 103
+  5   14336356    2   34  432498583   30.167957813    29.0    32.0    33.0    4.0 23  34  2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22    4348275 2935903 3293025 3759029 124
 
 -----
 
 .. class:: warningmark
 
 Adapter bases in color space reads are excluded from statistics.
-
-------
-
-  </help>
-  
-  <citations>
-    <citation type="doi">10.1093/bioinformatics/btq281</citation>
-  </citations>
-  
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btq281</citation>
+    </citations>
 </tool>
b
diff -r daaf552153fe -r e2cf940128d5 test-data/fastq_stats_1_out.tabular
--- a/test-data/fastq_stats_1_out.tabular Wed Nov 11 12:42:31 2015 -0500
+++ b/test-data/fastq_stats_1_out.tabular Sat Sep 30 13:55:30 2017 -0400
b
@@ -1,37 +1,37 @@
 #column count min max sum mean Q1 med Q3 IQR lW rW outliers A_Count C_Count G_Count T_Count N_Count other_bases other_base_count
-1 9 23 34 288 32.0 33.0 33.0 33.0 0.0 33 33 23,34 3 1 4 1 0
-2 9 28 33 287 31.8888888889 30.5 33.0 33.0 2.5 28 33 3 3 2 1 0
-3 9 13 34 268 29.7777777778 27.5 33.0 33.5 6.0 27 34 13 5 1 0 3 0
-4 9 17 33 261 29.0 24.5 33.0 33.0 8.5 17 33 1 2 3 3 0
-5 9 22 33 269 29.8888888889 26.0 33.0 33.0 7.0 22 33 3 3 3 0 0
-6 9 22 33 277 30.7777777778 29.0 33.0 33.0 4.0 28 33 22 5 3 0 1 0
-7 9 21 33 258 28.6666666667 23.0 33.0 33.0 10.0 21 33 4 1 3 1 0
-8 9 12 33 263 29.2222222222 26.5 33.0 33.0 6.5 21 33 12 2 1 1 5 0
-9 9 29 33 290 32.2222222222 31.5 33.0 33.0 1.5 30 33 29 3 3 2 1 0
-10 9 23 33 277 30.7777777778 28.0 33.0 33.0 5.0 23 33 1 4 2 2 0
-11 9 12 33 245 27.2222222222 21.0 31.0 33.0 12.0 12 33 5 2 1 1 0
-12 9 13 33 214 23.7777777778 14.0 24.0 33.0 19.0 13 33 2 4 2 1 0
-13 9 5 33 249 27.6666666667 26.5 31.0 33.0 6.5 24 33 5 2 1 1 5 0
-14 9 5 33 233 25.8888888889 19.5 33.0 33.0 13.5 5 33 3 3 2 1 0
-15 9 15 33 251 27.8888888889 22.5 33.0 33.0 10.5 15 33 5 1 1 2 0
-16 9 23 34 269 29.8888888889 23.5 33.0 33.0 9.5 23 34 3 1 2 3 0
-17 9 13 34 266 29.5555555556 27.0 33.0 33.0 6.0 21 34 13 2 3 1 3 0
-18 9 21 34 272 30.2222222222 26.0 33.0 33.0 7.0 21 34 0 5 1 3 0
-19 9 5 34 244 27.1111111111 24.0 30.0 33.0 9.0 21 34 5 4 4 1 0 0
-20 9 11 34 241 26.7777777778 17.0 32.0 33.0 16.0 11 34 3 4 2 0 0
-21 9 13 33 240 26.6666666667 22.5 27.0 33.0 10.5 13 33 1 4 0 4 0
-22 9 5 33 190 21.1111111111 9.0 21.0 33.0 24.0 5 33 1 4 0 3 1
-23 9 5 33 205 22.7777777778 14.0 26.0 33.0 19.0 5 33 4 4 1 0 0
-24 9 5 33 247 27.4444444444 24.5 31.0 33.0 8.5 21 33 5 1 5 1 2 0
-25 9 11 34 241 26.7777777778 18.5 33.0 33.0 14.5 11 34 3 4 0 2 0
-26 9 5 33 212 23.5555555556 11.5 31.0 33.0 21.5 5 33 0 6 0 3 0
-27 9 5 33 227 25.2222222222 20.0 26.0 33.0 13.0 5 33 3 4 1 1 0
-28 9 21 33 255 28.3333333333 22.5 31.0 33.0 10.5 21 33 2 4 3 0 0
-29 9 5 33 228 25.3333333333 19.5 30.0 33.0 13.5 5 33 2 4 1 2 0
-30 9 10 33 213 23.6666666667 13.5 28.0 33.0 19.5 10 33 3 4 2 0 0
-31 9 5 33 236 26.2222222222 21.0 31.0 33.0 12.0 5 33 1 4 1 3 0
-32 9 5 33 210 23.3333333333 11.5 29.0 33.0 21.5 5 33 3 3 0 3 0
-33 9 5 33 183 20.3333333333 8.0 21.0 33.0 25.0 5 33 1 4 2 2 0
-34 9 5 33 150 16.6666666667 6.0 17.0 25.5 19.5 5 33 3 4 1 1 0
-35 9 13 33 217 24.1111111111 19.5 24.0 31.0 11.5 13 33 1 4 1 3 0
-36 9 5 33 195 21.6666666667 11.5 21.0 32.5 21.0 5 33 3 2 1 3 0
+1 9 23 34 288 32.000000 33.000000 33.000000 33.000000 0.000000 33 33 23,34 3 1 4 1 0
+2 9 28 33 287 31.888889 30.500000 33.000000 33.000000 2.500000 28 33 3 3 2 1 0
+3 9 13 34 268 29.777778 27.500000 33.000000 33.500000 6.000000 27 34 13 5 1 0 3 0
+4 9 17 33 261 29.000000 24.500000 33.000000 33.000000 8.500000 17 33 1 2 3 3 0
+5 9 22 33 269 29.888889 26.000000 33.000000 33.000000 7.000000 22 33 3 3 3 0 0
+6 9 22 33 277 30.777778 29.000000 33.000000 33.000000 4.000000 28 33 22 5 3 0 1 0
+7 9 21 33 258 28.666667 23.000000 33.000000 33.000000 10.000000 21 33 4 1 3 1 0
+8 9 12 33 263 29.222222 26.500000 33.000000 33.000000 6.500000 21 33 12 2 1 1 5 0
+9 9 29 33 290 32.222222 31.500000 33.000000 33.000000 1.500000 30 33 29 3 3 2 1 0
+10 9 23 33 277 30.777778 28.000000 33.000000 33.000000 5.000000 23 33 1 4 2 2 0
+11 9 12 33 245 27.222222 21.000000 31.000000 33.000000 12.000000 12 33 5 2 1 1 0
+12 9 13 33 214 23.777778 14.000000 24.000000 33.000000 19.000000 13 33 2 4 2 1 0
+13 9 5 33 249 27.666667 26.500000 31.000000 33.000000 6.500000 24 33 5 2 1 1 5 0
+14 9 5 33 233 25.888889 19.500000 33.000000 33.000000 13.500000 5 33 3 3 2 1 0
+15 9 15 33 251 27.888889 22.500000 33.000000 33.000000 10.500000 15 33 5 1 1 2 0
+16 9 23 34 269 29.888889 23.500000 33.000000 33.000000 9.500000 23 34 3 1 2 3 0
+17 9 13 34 266 29.555556 27.000000 33.000000 33.000000 6.000000 21 34 13 2 3 1 3 0
+18 9 21 34 272 30.222222 26.000000 33.000000 33.000000 7.000000 21 34 0 5 1 3 0
+19 9 5 34 244 27.111111 24.000000 30.000000 33.000000 9.000000 21 34 5 4 4 1 0 0
+20 9 11 34 241 26.777778 17.000000 32.000000 33.000000 16.000000 11 34 3 4 2 0 0
+21 9 13 33 240 26.666667 22.500000 27.000000 33.000000 10.500000 13 33 1 4 0 4 0
+22 9 5 33 190 21.111111 9.000000 21.000000 33.000000 24.000000 5 33 1 4 0 3 1
+23 9 5 33 205 22.777778 14.000000 26.000000 33.000000 19.000000 5 33 4 4 1 0 0
+24 9 5 33 247 27.444444 24.500000 31.000000 33.000000 8.500000 21 33 5 1 5 1 2 0
+25 9 11 34 241 26.777778 18.500000 33.000000 33.000000 14.500000 11 34 3 4 0 2 0
+26 9 5 33 212 23.555556 11.500000 31.000000 33.000000 21.500000 5 33 0 6 0 3 0
+27 9 5 33 227 25.222222 20.000000 26.000000 33.000000 13.000000 5 33 3 4 1 1 0
+28 9 21 33 255 28.333333 22.500000 31.000000 33.000000 10.500000 21 33 2 4 3 0 0
+29 9 5 33 228 25.333333 19.500000 30.000000 33.000000 13.500000 5 33 2 4 1 2 0
+30 9 10 33 213 23.666667 13.500000 28.000000 33.000000 19.500000 10 33 3 4 2 0 0
+31 9 5 33 236 26.222222 21.000000 31.000000 33.000000 12.000000 5 33 1 4 1 3 0
+32 9 5 33 210 23.333333 11.500000 29.000000 33.000000 21.500000 5 33 3 3 0 3 0
+33 9 5 33 183 20.333333 8.000000 21.000000 33.000000 25.000000 5 33 1 4 2 2 0
+34 9 5 33 150 16.666667 6.000000 17.000000 25.500000 19.500000 5 33 3 4 1 1 0
+35 9 13 33 217 24.111111 19.500000 24.000000 31.000000 11.500000 13 33 1 4 1 3 0
+36 9 5 33 195 21.666667 11.500000 21.000000 32.500000 21.000000 5 33 3 2 1 3 0
b
diff -r daaf552153fe -r e2cf940128d5 tool_dependencies.xml
--- a/tool_dependencies.xml Wed Nov 11 12:42:31 2015 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-  <package name="galaxy_sequence_utils" version="1.0.0">
-      <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>