Mercurial > repos > devteam > fastq_to_tabular
comparison fastq_to_tabular.py @ 0:bc9269529e88 draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 27 Jan 2014 09:28:21 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bc9269529e88 |
|---|---|
| 1 #Dan Blankenberg | |
| 2 import sys | |
| 3 from galaxy_utils.sequence.fastq import fastqReader | |
| 4 | |
| 5 def stop_err( msg ): | |
| 6 sys.stderr.write( msg ) | |
| 7 sys.exit() | |
| 8 | |
| 9 def main(): | |
| 10 if len(sys.argv) != 5: | |
| 11 stop_err("Wrong number of arguments. Expect: fasta tabular desrc_split [type]") | |
| 12 input_filename = sys.argv[1] | |
| 13 output_filename = sys.argv[2] | |
| 14 descr_split = int( sys.argv[3] ) - 1 | |
| 15 if descr_split < 0: | |
| 16 stop_err("Bad description split value (should be 1 or more)") | |
| 17 input_type = sys.argv[4] or 'sanger' #input type should ordinarily be unnecessary | |
| 18 | |
| 19 num_reads = None | |
| 20 fastq_read = None | |
| 21 out = open( output_filename, 'wb' ) | |
| 22 if descr_split == 0: | |
| 23 #Don't divide the description into multiple columns | |
| 24 for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): | |
| 25 out.write( "%s\t%s\t%s\n" % ( fastq_read.identifier[1:].replace( '\t', ' ' ), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) | |
| 26 else: | |
| 27 for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): | |
| 28 words = fastq_read.identifier[1:].replace( '\t', ' ' ).split(None, descr_split) | |
| 29 #pad with empty columns if required | |
| 30 words += [""]*(descr_split-len(words)) | |
| 31 out.write( "%s\t%s\t%s\n" % ("\t".join(words), fastq_read.sequence.replace( '\t', ' ' ), fastq_read.quality.replace( '\t', ' ' ) ) ) | |
| 32 out.close() | |
| 33 if num_reads is None: | |
| 34 print "No valid FASTQ reads could be processed." | |
| 35 else: | |
| 36 print "%i FASTQ reads were converted to Tabular." % ( num_reads + 1 ) | |
| 37 | |
| 38 if __name__ == "__main__": main() |
