annotate fastq_paired_end_interlacer.py @ 0:b89bdf6acb6c draft

Imported from capsule None
author devteam
date Mon, 27 Jan 2014 09:26:38 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
1 #Florent Angly
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
2 import sys
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
4
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
5 def main():
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
6 mate1_filename = sys.argv[1]
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
7 mate1_type = sys.argv[2] or 'sanger'
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
8 mate2_filename = sys.argv[3]
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
9 mate2_type = sys.argv[4] or 'sanger'
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
10 outfile_pairs = sys.argv[5]
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
11 outfile_singles = sys.argv[6]
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
12
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
13 if mate1_type != mate2_type:
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
14 print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
15 return
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
16
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
17 type = mate1_type
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
18 joiner = fastqJoiner( type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
19 out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
20 out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
21
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
22 # Pairs + singles present in mate1
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
23 nof_singles = 0
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
24 nof_pairs = 0
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
25 mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
26 i = None
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
27 for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ):
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
28 mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
29 if mate2:
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
30 out_pairs.write( mate1 )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
31 out_pairs.write( mate2 )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
32 nof_pairs += 1
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
33 else:
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
34 out_singles.write( mate1 )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
35 nof_singles += 1
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
36
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
37 # Singles present in mate2
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
38 mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
39 j = None
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
40 for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ):
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
41 mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
42 if not mate1:
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
43 out_singles.write( mate2 )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
44 nof_singles += 1
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
45
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
46 if (i is None) and (j is None):
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
47 print "Your input files contained no valid FASTQ sequences."
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
48 else:
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
49 print 'There were %s single reads.' % ( nof_singles )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
50 print 'Interlaced %s pairs of sequences.' % ( nof_pairs )
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
51
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
52 mate1_input.close()
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
53 mate2_input.close()
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
54 out_pairs.close()
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
55 out_singles.close()
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
56
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
57
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
58 if __name__ == "__main__":
b89bdf6acb6c Imported from capsule None
devteam
parents:
diff changeset
59 main()