Mercurial > repos > devteam > fastq_to_tabular
comparison fastq_to_tabular.py @ 0:bc9269529e88 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:28:21 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bc9269529e88 |
---|---|
1 #Dan Blankenberg | |
2 import sys | |
3 from galaxy_utils.sequence.fastq import fastqReader | |
4 | |
5 def stop_err( msg ): | |
6 sys.stderr.write( msg ) | |
7 sys.exit() | |
8 | |
9 def main(): | |
10 if len(sys.argv) != 5: | |
11 stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]") | |
12 input_filename = sys.argv[1] | |
13 output_filename = sys.argv[2] | |
14 descr_split = int( sys.argv[3] ) - 1 | |
15 if descr_split < 0: | |
16 stop_err("Bad description split value (should be 1 or more)") | |
17 input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary | |
18 | |
19 num_reads = None | |
20 fastq_read = None | |
21 out = open( output_filename, 'wb' ) | |
22 if descr_split == 0: | |
23 #Don't divide the description into multiple columns | |
24 for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): | |
25 out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) | |
26 else: | |
27 for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): | |
28 words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split) | |
29 #pad with empty columns if required | |
30 words += [""]*(descr_split-len(words)) | |
31 out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) | |
32 out.close() | |
33 if num_reads is None: | |
34 print "No valid FASTQ reads could be processed." | |
35 else: | |
36 print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 ) | |
37 | |
38 if __name__ == "__main__": main() |