Mercurial > repos > fangly > grinder
comparison tools/ngs_simulation/grinder_multiple_outputs.py @ 1:7d26d64539b2
Uploaded
author | fangly |
---|---|
date | Mon, 19 Sep 2011 01:07:28 -0400 |
parents | |
children | 27a15723d4f0 |
comparison
equal
deleted
inserted
replaced
0:b35ec780aac1 | 1:7d26d64539b2 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Move files create by Grinder to a location where it is going to be recognized by | |
5 Galaxy as multiple output files with the right format. See | |
6 http://wiki.g2.bx.psu.edu/Admin/Tools/Multiple Output Files | |
7 Example: python grinder_move_outputs output_dir output_id | |
8 Author: Florent Angly | |
9 """ | |
10 | |
11 import sys, os, re | |
12 | |
13 assert sys.version_info[:2] >= ( 2, 4 ) | |
14 | |
15 def stop_err( msg ): | |
16 sys.stderr.write( "%s\n" % msg ) | |
17 sys.exit() | |
18 | |
19 def __main__(): | |
20 # Get output dir and ID | |
21 args = sys.argv | |
22 output_dir = args[1] | |
23 output_id = args[2] | |
24 | |
25 # Move Grinder files to the proper output | |
26 # Grinder filenames look like this | |
27 # grinder-ranks.txt | |
28 # grinder-reads.fa | |
29 # grinder-reads.qual | |
30 # grinder-1-ranks.txt | |
31 # grinder-1-reads.fa | |
32 # grinder-1-reads.qual | |
33 # grinder-2-ranks.txt | |
34 # grinder-2-reads.fa | |
35 # grinder-2-reads.qual | |
36 | |
37 p = re.compile(output_id) | |
38 q = re.compile('-(\d+)-') | |
39 r = re.compile('-(\w+)$') | |
40 | |
41 | |
42 for fname in os.listdir(output_dir): | |
43 | |
44 # Skip files that do not start with the output_id | |
45 source = os.path.join( output_dir, fname ) | |
46 basename, extension = os.path.splitext(fname) | |
47 if not p.match(fname): | |
48 continue | |
49 | |
50 # Assign the dataset format | |
51 if extension == '.txt': | |
52 format = 'text' | |
53 elif extension == '.fa': | |
54 format = 'fasta' | |
55 elif extension == '.fna': | |
56 format = 'fasta' | |
57 elif extension == '.faa': | |
58 format = 'fasta' | |
59 elif extension == '.fasta': | |
60 format = 'fasta' | |
61 elif extension == '.fq': | |
62 format = 'fastq' | |
63 elif extension == '.fastq': | |
64 format = 'fastq' | |
65 elif extension == '.qual': | |
66 format = 'qual' | |
67 else: | |
68 stop_err( 'Error: File %s had the unknown extension %s' % ( fname, extension ) ) | |
69 | |
70 # Assign the dataset name | |
71 name = '' | |
72 match = q.search(basename) | |
73 if match != None: | |
74 lib_num = match.group(1) | |
75 name = 'lib%s' % lib_num | |
76 | |
77 match = r.search(basename) | |
78 if match == None: | |
79 stop_err( 'Error: File with basename %s did not have a recognized name' % (basename) ) | |
80 | |
81 lib_type = match.group(1) | |
82 if format == 'qual': | |
83 lib_type = 'qual' | |
84 | |
85 name = name + '-' + lib_type | |
86 | |
87 # Move the dataset to the proper place | |
88 optional_spec = 'asdf' | |
89 destination = os.path.join( output_dir, 'primary_%s_%s_visible_%s_%s' % ( output_id, name, format, optional_spec ) ) | |
90 | |
91 print "moving %s to %s" % (source, destination) | |
92 | |
93 try: | |
94 os.rename(source, destination) | |
95 except Exception, e: | |
96 stop_err( 'Error: ' + str( e ) ) | |
97 | |
98 if __name__ == "__main__": __main__() |