comparison ALFA/ALFA_wrapper.py @ 22:1714165f5df0 draft

Uploaded
author charles-bernard
date Thu, 27 Oct 2016 04:42:59 -0400
parents
children f1a20d50c495
comparison
equal deleted inserted replaced
21:728294cecd96 22:1714165f5df0
1 #!/usr/bin/python
2
3 import argparse
4 import logging
5 import os
6 import re
7 import shutil
8 import subprocess
9 import sys
10 import tempfile
11
12 def exit_and_explain(msg):
13 logging.critical(msg)
14 sys.exit(msg)
15
16 def cleanup_before_exit(tmp_dir):
17 if tmp_dir and os.path.exists(tmp_dir):
18 shutil.rmtree(tmp_dir)
19
20 def get_arg():
21 parser = argparse.ArgumentParser()
22 parser.add_argument('--project_name', dest='project_name', action='store', nargs=1, metavar='project_name', type=str)
23 #Input 1: Annotation File
24 parser.add_argument('--index', dest='indexes', action='store', nargs=2, metavar=('stranded_index_filename', 'unstranded_index_filename'), type=str)
25 parser.add_argument('--bi_index', dest='bi_indexes', action='store', nargs=1, metavar='built_in_indexes_dir_path', type=str )
26 parser.add_argument('--annotation', dest='annotation_file', action='store', nargs=1, metavar='annotation_gtf_file', type=str )
27 #Input 2: Mapped Reads
28 parser.add_argument('--reads_format', dest='reads_format', action='store', nargs=1, choices=['bam', 'bedgraph'], metavar='reads_format', type=str)
29 parser.add_argument('--reads', dest='reads', action='store', nargs='+', metavar=('bam_file1 label1',""), type=str)
30 parser.add_argument('--strandness', dest='strandness', action='store', nargs=1, default=['unstranded'], choices=['unstranded', 'forward', 'reverse'], metavar='strandness', type=str)
31 #Output files
32 parser.add_argument('--output_pdf', dest='output_pdf', action='store', nargs=1, metavar='output_pdf_filename', type=str)
33 parser.add_argument('--output_svg', dest='output_svg', action='store', nargs=2, metavar=('categories_svg_filename', 'biotypes_svg_filename'), type=str)
34 parser.add_argument('--output_png', dest='output_png', action='store', nargs=2, metavar=('categories_png_filename', 'biotypes_png_filename'), type=str)
35 parser.add_argument('--output_count', dest='output_count', action='store', nargs=1, metavar='output_count_filename', type=str)
36 parser.add_argument('--output_index', dest='output_indexes', action='store', nargs=2, metavar=('output_stranded_index_filename', 'output_unstranded_index_filename'), type=str)
37 #Output Options
38 parser.add_argument('--categories_depth', dest='categories_depth', action='store', nargs=1, default=[3], choices=range(1,5), metavar='categories_depth', type=int)
39 parser.add_argument('--plot_format', dest='plot_format', action='store', nargs=1, choices=['pdf', 'png', 'svg'], metavar='plot_format', type=str)
40 parser.add_argument('--threshold', dest='threshold', action='store', nargs=2, metavar=('yMin', 'yMax'), type=float)
41 #Internal variables
42 parser.add_argument('--log_report', dest='log_report', action='store', nargs=1, metavar='log_filename', type=str)
43 parser.add_argument('--galaxy_root', dest='GALAXY_ROOT_DIR', action='store', nargs=1, metavar='galaxy_root_path', type=str)
44 parser.add_argument('--tool_dir', dest='GALAXY_TOOL_DIR', action='store', nargs=1, metavar='galaxy_tool_dir_path', type=str)
45 args = parser.parse_args()
46 return args
47
48 def make_tmp_dir(galaxy_root):
49 parent_dir = os.path.join(galaxy_root, 'database/tmp/')
50 if os.path.exists(parent_dir):
51 tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='', dir=parent_dir)
52 else:
53 tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='', dir='.')
54 return tmp_dir
55
56 def mv_and_rename_user_indexes(stranded_index, unstranded_index):
57 index='index'
58 shutil.copy(stranded_index, index + '.stranded.index')
59 shutil.copy(unstranded_index, index + '.unstranded.index')
60 return index
61
62 def get_input2_args(reads_list, format):
63 n = len(reads_list)
64 if n%2 != 0:
65 exit_and_explain('Problem with pairing reads filename and reads label')
66 input2_args='-i'
67 k = 0
68 reads_filenames = [''] * (n/2)
69 reads_labels = [''] * (n/2)
70 for i in range(0, n, 2):
71 reads_filenames[k] = reads_list[i].split('__fname__')[1]
72 reads_labels[k] = reads_list[i+1].split('__label__')[1]
73 if not reads_labels[k]:
74 reads_labels[k] = 'sample_%s' % str(k+1)
75 input2_args='%s %s %s' % (input2_args, reads_filenames[k], reads_labels[k])
76 k += 1
77 if format == 'bedgraph':
78 input2_args = input2_args + ' --bedgraph'
79 return input2_args, reads_filenames, reads_labels
80
81 def redirect_errors(alfa_out, alfa_err):
82 # When the option --n is enabled, alfa prints '### End of the program' in stderr even if the process worked-
83 # The following lines to avoid the tool from crashing in this case
84 if alfa_err and not re.search('### End of program', alfa_err):
85 # When alfa prints '### End of program' in stdout, all the messages in stderr are considered
86 # as warnings and not as errors.
87 if re.search('### End of program', alfa_out):
88 logging.warning("The script ALFA.py encountered the following warning:\n\n%s" % alfa_err)
89 logging.info("\n******************************************************************\n")
90 # True errors make the script exits
91 else:
92 exit_and_explain("The script ALFA.py encountered the following error:\n\n%s" % alfa_err)
93
94 def merge_count_files(reads_labels):
95 merged_count_file = open('count_file.txt', 'wb')
96 for i in range(0, len(reads_labels)):
97 current_count_file = open(reads_labels[i] + '.categories_counts', 'r')
98 reads_label = reads_labels[i]
99 merged_count_file.write('##LABEL: %s\n\n' % reads_label)
100 merged_count_file.write(current_count_file.read())
101 merged_count_file.write('__________________________________________________________________\n')
102 current_count_file.close()
103 merged_count_file.close()
104 return 'count_file.txt'
105
106 def main():
107 args = get_arg()
108
109 if not (args.output_pdf or args.output_png or args.output_svg or args.output_indexes or args.output_count):
110 exit_and_explain('Error: no output to return\nProcess Aborted\n')
111
112 tmp_dir = make_tmp_dir(args.GALAXY_TOOL_DIR[0])
113 os.chdir(tmp_dir)
114
115 logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format='%(message)s')
116
117 alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], 'ALFA.py')
118
119 #INPUT1: Annotation File
120 if args.indexes:
121 # The indexes submitted by the user must exhibit the suffix '.(un)stranded.index' and will be called by alfa by their prefix
122 index = mv_and_rename_user_indexes(args.indexes[0], args.indexes[1])
123 input1_args = '-g %s' % index
124 elif args.bi_indexes:
125 input1_args = '-g %s' % args.bi_indexes[0]
126 elif args.annotation_file:
127 input1_args = '-a %s' % args.annotation_file[0]
128 else:
129 exit_and_explain('No annotation file submitted !')
130
131 #INPUT 2: Mapped Reads
132 if args.reads:
133 input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])
134 strandness = '-s %s' % args.strandness[0]
135 else:
136 exit_and_explain('No reads submitted !')
137
138 ##Output options
139 categories_depth = '-d %s' % args.categories_depth[0]
140 if not (args.output_pdf or args.output_png or args.output_svg):
141 output_args = '--n'
142 else:
143 if args.output_pdf:
144 output_args = '--pdf plot.pdf'
145 if args.output_png:
146 output_args = '--png plot'
147 if args.output_svg:
148 output_args = '--svg plot'
149 if args.threshold:
150 output_args = '%s -t %.3f %.3f' % (output_args, args.threshold[0], args.threshold[1])
151
152 ##Run alfa
153 cmd = 'python %s %s %s %s %s %s' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)
154 logging.info("__________________________________________________________________\n")
155 logging.info("Alfa execution")
156 logging.info("__________________________________________________________________\n")
157 logging.info("Command Line:\n%s\n" % cmd)
158 logging.info("------------------------------------------------------------------\n")
159 alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
160 alfa_out, alfa_err = alfa_result.communicate()
161
162 ##Handle stdout, warning, errors...
163 redirect_errors(alfa_out, alfa_err)
164
165 logging.info("Alfa prompt:\n%s" % alfa_out)
166
167 ##Redirect outputs
168 if args.output_pdf:
169 shutil.move('plot.pdf', args.output_pdf[0])
170 if args.output_png:
171 shutil.move('plot' + '.categories.png', args.output_png[0])
172 shutil.move('plot' + '.biotypes.png', args.output_png[1])
173 if args.output_svg:
174 shutil.move('plot' + '.categories.svg', args.output_svg[0])
175 shutil.move('plot' + '.biotypes.svg', args.output_svg[1])
176 if args.output_count:
177 count_filename = merge_count_files(reads_labels)
178 shutil.move(count_filename, args.output_count[0])
179 if args.output_indexes:
180 if args.annotation_file:
181 indexes_regex = re.compile('.*\.index')
182 indexes = filter(indexes_regex.search, os.listdir('.'))
183 indexes.sort()
184 shutil.move(indexes[0], args.output_indexes[0])
185 shutil.move(indexes[1], args.output_indexes[1])
186 if args.indexes:
187 shutil.move(index + '.stranded.index', args.output_indexes[0])
188 shutil.move(index + '.unstranded.index', args.output_indexes[1])
189 if args.bi_indexes:
190 shutil.move(args.bi_indexes[0] + '.stranded.index', args.output_index[0])
191 shutil.move(args.bi_indexes[1] + '.unstranded.index', args.output_index[1])
192
193 cleanup_before_exit(tmp_dir)
194 main()