0
|
1 #Florent Angly
|
|
2 import sys
|
|
3 from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner
|
|
4
|
|
5 def main():
|
|
6 mate1_filename = sys.argv[1]
|
|
7 mate1_type = sys.argv[2] or 'sanger'
|
|
8 mate2_filename = sys.argv[3]
|
|
9 mate2_type = sys.argv[4] or 'sanger'
|
|
10 outfile_pairs = sys.argv[5]
|
|
11 outfile_singles = sys.argv[6]
|
|
12
|
|
13 if mate1_type != mate2_type:
|
|
14 print "WARNING: You are trying to interlace files of two different types: %s and %s." % ( mate1_type, mate2_type )
|
|
15 return
|
|
16
|
|
17 type = mate1_type
|
|
18 joiner = fastqJoiner( type )
|
|
19 out_pairs = fastqWriter( open( outfile_pairs, 'wb' ), format = type )
|
|
20 out_singles = fastqWriter( open( outfile_singles, 'wb' ), format = type )
|
|
21
|
|
22 # Pairs + singles present in mate1
|
|
23 nof_singles = 0
|
|
24 nof_pairs = 0
|
|
25 mate2_input = fastqNamedReader( open( mate2_filename, 'rb' ), format = type )
|
|
26 i = None
|
|
27 for i, mate1 in enumerate( fastqReader( open( mate1_filename, 'rb' ), format = type ) ):
|
|
28 mate2 = mate2_input.get( joiner.get_paired_identifier( mate1 ) )
|
|
29 if mate2:
|
|
30 out_pairs.write( mate1 )
|
|
31 out_pairs.write( mate2 )
|
|
32 nof_pairs += 1
|
|
33 else:
|
|
34 out_singles.write( mate1 )
|
|
35 nof_singles += 1
|
|
36
|
|
37 # Singles present in mate2
|
|
38 mate1_input = fastqNamedReader( open( mate1_filename, 'rb' ), format = type )
|
|
39 j = None
|
|
40 for j, mate2 in enumerate( fastqReader( open( mate2_filename, 'rb' ), format = type ) ):
|
|
41 mate1 = mate1_input.get( joiner.get_paired_identifier( mate2 ) )
|
|
42 if not mate1:
|
|
43 out_singles.write( mate2 )
|
|
44 nof_singles += 1
|
|
45
|
|
46 if (i is None) and (j is None):
|
|
47 print "Your input files contained no valid FASTQ sequences."
|
|
48 else:
|
|
49 print 'There were %s single reads.' % ( nof_singles )
|
|
50 print 'Interlaced %s pairs of sequences.' % ( nof_pairs )
|
|
51
|
|
52 mate1_input.close()
|
|
53 mate2_input.close()
|
|
54 out_pairs.close()
|
|
55 out_singles.close()
|
|
56
|
|
57
|
|
58 if __name__ == "__main__":
|
|
59 main()
|