comparison tools/ngs_simulation/grinder_multiple_outputs.py @ 1:7d26d64539b2

Uploaded
author fangly
date Mon, 19 Sep 2011 01:07:28 -0400
parents
children 27a15723d4f0
comparison
equal deleted inserted replaced
0:b35ec780aac1 1:7d26d64539b2
1 #!/usr/bin/env python
2
3 """
4 Move files create by Grinder to a location where it is going to be recognized by
5 Galaxy as multiple output files with the right format. See
6 http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple Output Files
7 Example: python grinder_move_outputs output_dir output_id
8 Author: Florent Angly
9 """
10
11 import sys, os, re
12
13 assert sys.version_info[:2] >= ( 2, 4 )
14
15 def stop_err( msg ):
16 sys.stderr.write( "%s\n" % msg )
17 sys.exit()
18
19 def __main__():
20 # Get output dir and ID
21 args = sys.argv
22 output_dir = args[1]
23 output_id = args[2]
24
25 # Move Grinder files to the proper output
26 # Grinder filenames look like this
27 # grinder-ranks.txt
28 # grinder-reads.fa
29 # grinder-reads.qual
30 # grinder-1-ranks.txt
31 # grinder-1-reads.fa
32 # grinder-1-reads.qual
33 # grinder-2-ranks.txt
34 # grinder-2-reads.fa
35 # grinder-2-reads.qual
36
37 p = re.compile(output_id)
38 q = re.compile('-(\d+)-')
39 r = re.compile('-(\w+)$')
40
41
42 for fname in os.listdir(output_dir):
43
44 # Skip files that do not start with the output_id
45 source = os.path.join( output_dir, fname )
46 basename, extension = os.path.splitext(fname)
47 if not p.match(fname):
48 continue
49
50 # Assign the dataset format
51 if extension == '.txt':
52 format = 'text'
53 elif extension == '.fa':
54 format = 'fasta'
55 elif extension == '.fna':
56 format = 'fasta'
57 elif extension == '.faa':
58 format = 'fasta'
59 elif extension == '.fasta':
60 format = 'fasta'
61 elif extension == '.fq':
62 format = 'fastq'
63 elif extension == '.fastq':
64 format = 'fastq'
65 elif extension == '.qual':
66 format = 'qual'
67 else:
68 stop_err( 'Error: File %s had the unknown extension %s' % ( fname, extension ) )
69
70 # Assign the dataset name
71 name = ''
72 match = q.search(basename)
73 if match != None:
74 lib_num = match.group(1)
75 name = 'lib%s' % lib_num
76
77 match = r.search(basename)
78 if match == None:
79 stop_err( 'Error: File with basename %s did not have a recognized name' % (basename) )
80
81 lib_type = match.group(1)
82 if format == 'qual':
83 lib_type = 'qual'
84
85 name = name + '-' + lib_type
86
87 # Move the dataset to the proper place
88 optional_spec = 'asdf'
89 destination = os.path.join( output_dir, 'primary_%s_%s_visible_%s_%s' % ( output_id, name, format, optional_spec ) )
90
91 print "moving %s to %s" % (source, destination)
92
93 try:
94 os.rename(source, destination)
95 except Exception, e:
96 stop_err( 'Error: ' + str( e ) )
97
98 if __name__ == "__main__": __main__()