Mercurial > repos > devteam > fastq_to_tabular
changeset 2:ccf4e1d1fcbe draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_to_tabular commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 13:55:43 -0400 |
parents | 7da7ddea4425 |
children | 614a63f14adf |
files | fastq_to_tabular.py fastq_to_tabular.xml test-data/sanger_full_range_original_sanger.fastqsanger.gz tool_dependencies.xml |
diffstat | 4 files changed, 51 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_to_tabular.py Wed Nov 11 12:42:45 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#Dan Blankenberg -import sys -from galaxy_utils.sequence.fastq import fastqReader - -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() - -def main(): - if len(sys.argv) != 5: - stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]") - input_filename = sys.argv[1] - output_filename = sys.argv[2] - descr_split = int( sys.argv[3] ) - 1 - if descr_split < 0: - stop_err("Bad description split value (should be 1 or more)") - input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary - - num_reads = None - fastq_read = None - out = open( output_filename, 'wb' ) - if descr_split == 0: - #Don't divide the description into multiple columns - for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) - else: - for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split) - #pad with empty columns if required - words += [""]*(descr_split-len(words)) - out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) - out.close() - if num_reads is None: - print "No valid FASTQ reads could be processed." - else: - print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 ) - -if __name__ == "__main__": main()
--- a/fastq_to_tabular.xml Wed Nov 11 12:42:45 2015 -0500 +++ b/fastq_to_tabular.xml Sat Sep 30 13:55:43 2017 -0400 @@ -1,40 +1,45 @@ -<tool id="fastq_to_tabular" name="FASTQ to Tabular" version="1.1.0"> - <description>converter</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_to_tabular.py '$input_file' '$output_file' $descr_columns '${input_file.extension[len( 'fastq' ):]}'</command> - <inputs> - <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqillumina,fastqsolexa" label="FASTQ file to convert" /> - <param name="descr_columns" type="integer" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column"> - <validator type="in_range" min="1" /> - </param> - </inputs> - <outputs> - <data name="output_file" format="tabular" /> - </outputs> - <tests> - <!-- basic test --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="descr_columns" value="1"/> - <output name="output_file" file="fastq_to_tabular_out_1.tabular" /> - </test> - <!-- color space test --> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" /> - <param name="descr_columns" value="1"/> - <output name="output_file" file="fastq_to_tabular_out_2.tabular" /> - </test> - <!-- split title into columns --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="descr_columns" value="2"/> - <output name="output_file" file="fastq_to_tabular_out_3.tabular" /> - </test> - </tests> - <help> - +<tool id="fastq_to_tabular" name="FASTQ to Tabular" version="1.1.1"> + <description>converter</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-to-tabular '$input_file' '$output_file' $descr_columns '${input_file.extension[len('fastq'):]}' + ]]></command> + <inputs> + <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqillumina,fastqsolexa,fastqsanger.gz,fastqcssanger.gz,fastqillumina.gz,fastqsolexa.gz,fastqsanger.bz2,fastqcssanger.bz2,fastqillumina.bz2,fastqsolexa.bz2" label="FASTQ file to convert" /> + <param name="descr_columns" type="integer" min="1" value="1" label="How many columns to divide title string into?" help="Typically 2 to take the ID (first word) and decription (rest) as two columns, or 1 to give a single column" /> + </inputs> + <outputs> + <data name="output_file" format="tabular" /> + </outputs> + <tests> + <!-- basic test --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="descr_columns" value="1"/> + <output name="output_file" file="fastq_to_tabular_out_1.tabular" /> + </test> + <!-- compression test --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz" /> + <param name="descr_columns" value="1"/> + <output name="output_file" file="fastq_to_tabular_out_1.tabular" /> + </test> + <!-- color space test --> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" /> + <param name="descr_columns" value="1"/> + <output name="output_file" file="fastq_to_tabular_out_2.tabular" /> + </test> + <!-- split title into columns --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="descr_columns" value="2"/> + <output name="output_file" file="fastq_to_tabular_out_3.tabular" /> + </test> + </tests> + <help><![CDATA[ **What it does** This tool converts FASTQ sequencing reads to a Tabular file. @@ -45,7 +50,7 @@ Tab characters, if present in the source FASTQ title, will be converted to spaces. ------ +----- **Example** @@ -59,16 +64,16 @@ aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn +FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF - D???:3104/76=:5...4.3,,,366////4<ABBAAA=CCFDDDDDDDD:666CDFFFF=<ABA=;:333111<===9 + D???:3104/76=:5...4.3,,,366////4<ABBAAA=CCFDDDDDDDD:666CDFFFF=<ABA=;:333111<===9 9;B889FFFFFFDDBDBDDD=8844231..,,,-,,,,,,,,1133..---17111,,,,,22555131121.--.,333 11,.,,3--,,.,,--,3511123..--!,,,,--,----9,,,,8=,,-,,,-,,,,---26:9:5-..1,,,,11//, - ,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.030000,,,044400036;96662.//;7><;!!! + ,,,!,,1917--,,,,-3.,--,,17,,,,---+11113.030000,,,044400036;96662.//;7><;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] - FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<?<89898<84442;==3,,,514,, - ,11,,,.,,21777555513,..--1115758.//34488><<;;;;9944/!/4,,,57855!! + FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<?<89898<84442;==3,,,514,, + ,11,,,.,,21777555513,..--1115758.//34488><<;;;;9944/!/4,,,57855!! By default this is converted into a 3 column tabular file, with the full FASTQ title used as column 1: @@ -92,13 +97,8 @@ ============== ============ ========== =========== ============= ============== =================== ============== ============== Note the sequences and quality strings have been truncated for display purposes in the above tables. - ------- - - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> - </citations> - + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Wed Nov 11 12:42:45 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>