annotate cuffmerge_wrapper.py @ 10:b6e3849293b1 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:59:06 -0500
parents 5b285b6e4ee3
children 1707a530e598
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
2
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
4
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
5 def stop_err( msg ):
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
6 sys.stderr.write( '%s\n' % msg )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
7 sys.exit()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
8
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
9 def __main__():
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
10 #Parse Command Line
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
11 parser = optparse.OptionParser()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
12 parser.add_option( '-g', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
13 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
14 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
15
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
16 # Wrapper / Galaxy options.
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
17 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
18 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
19
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
20 # Outputs.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
21 parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' )
10
b6e3849293b1 Uploaded
devteam
parents: 2
diff changeset
22 parser.add_option( '--min-isoform-fraction', dest='min_isoform_fraction' )
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
23
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
24 (options, args) = parser.parse_args()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
25
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
26 # output version # of tool
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
27 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
28 tmp = tempfile.NamedTemporaryFile().name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
29 tmp_stdout = open( tmp, 'wb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
30 proc = subprocess.Popen( args='cuffmerge -v 2>&1', shell=True, stdout=tmp_stdout )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
31 tmp_stdout.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
32 returncode = proc.wait()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
33 stdout = None
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
34 for line in open( tmp_stdout.name, 'rb' ):
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
35 if line.lower().find( 'merge_cuff_asms v' ) >= 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
36 stdout = line.strip()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
37 break
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
38 if stdout:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
39 sys.stdout.write( '%s\n' % stdout )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
40 else:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
41 raise Exception
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
42 except:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
43 sys.stdout.write( 'Could not determine Cuffmerge version\n' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
44
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
45 # Set/link to sequence file.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
46 if options.use_seq_data:
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
47 if options.ref_file:
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
48 # Sequence data from history.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
49 # Create symbolic link to ref_file so that index will be created in working directory.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
50 seq_path = "ref.fa"
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
51 os.symlink( options.ref_file, seq_path )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
52 else:
2
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
53 if not os.path.exists( options.index ):
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
54 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
5b285b6e4ee3 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
55 seq_path = options.index
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
56
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
57 # Build command.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
58
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
59 # Base.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
60 cmd = "cuffmerge -o cm_output "
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
61
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
62 # Add options.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
63 if options.num_threads:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
64 cmd += ( " -p %i " % int ( options.num_threads ) )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
65 if options.ref_annotation:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
66 cmd += " -g %s " % options.ref_annotation
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
67 if options.use_seq_data:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
68 cmd += " -s %s " % seq_path
10
b6e3849293b1 Uploaded
devteam
parents: 2
diff changeset
69 if options.min_isoform_fraction:
b6e3849293b1 Uploaded
devteam
parents: 2
diff changeset
70 cmd += " --min-isoform-fraction %s " % (options.min_isoform_fraction)
0
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
71 # Add input files to a file.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
72 inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
73 inputs_file = open( inputs_file_name, 'w' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
74 for arg in args:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
75 inputs_file.write( arg + "\n" )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
76 inputs_file.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
77 cmd += inputs_file_name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
78
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
79 # Debugging.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
80 print cmd
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
81
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
82 # Run command.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
83 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
84 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
85 tmp_stderr = open( tmp_name, 'wb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
86 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
87 returncode = proc.wait()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
88 tmp_stderr.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
89
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
90 # Get stderr, allowing for case where it's very large.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
91 tmp_stderr = open( tmp_name, 'rb' )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
92 stderr = ''
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
93 buffsize = 1048576
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
94 try:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
95 while True:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
96 stderr += tmp_stderr.read( buffsize )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
97 if not stderr or len( stderr ) % buffsize != 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
98 break
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
99 except OverflowError:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
100 pass
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
101 tmp_stderr.close()
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
102
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
103 # Error checking.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
104 if returncode != 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
105 raise Exception, stderr
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
106
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
107 if len( open( "cm_output/merged.gtf", 'rb' ).read().strip() ) == 0:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
108 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
109
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
110 # Copy outputs.
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
111 shutil.copyfile( "cm_output/merged.gtf" , options.merged_transcripts )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
112
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
113 except Exception, e:
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
114 stop_err( 'Error running cuffmerge. ' + str( e ) )
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
115
dbbd37e013aa Uploaded tool tarball.
devteam
parents:
diff changeset
116 if __name__=="__main__": __main__()