annotate cuffcompare_wrapper.py @ 5:67695d7ff787 draft

Replace sam_fa_indices.loc with fasta_indexes.loc in fasta_indexes.loc.sample.
author devteam
date Thu, 09 Jan 2014 14:27:37 -0500
parents 8b22e9adae34
children 8e534225baa9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
2
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
3 # Supports Cuffcompare versions v1.3.0 and newer.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
4
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
6
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( '%s\n' % msg )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
10
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
11 def __main__():
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
12 #Parse Command Line
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
13 parser = optparse.OptionParser()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
14 parser.add_option( '-r', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
15 parser.add_option( '-R', action="store_true", dest='ignore_nonoverlap', help='If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts accuracy file' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
16 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
17
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
18 # Wrapper / Galaxy options.
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
19 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
20 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
21
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
22 # Outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
23 parser.add_option( '', '--combined-transcripts', dest='combined_transcripts' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
24
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
25 (options, args) = parser.parse_args()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
26
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
27 # output version # of tool
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
28 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
29 tmp = tempfile.NamedTemporaryFile().name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
30 tmp_stdout = open( tmp, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
31 proc = subprocess.Popen( args='cuffcompare 2>&1', shell=True, stdout=tmp_stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
32 tmp_stdout.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
33 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
34 stdout = None
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
35 for line in open( tmp_stdout.name, 'rb' ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
36 if line.lower().find( 'cuffcompare v' ) >= 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
37 stdout = line.strip()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
38 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
39 if stdout:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
40 sys.stdout.write( '%s\n' % stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
41 else:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
42 raise Exception
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
43 except:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
44 sys.stdout.write( 'Could not determine Cuffcompare version\n' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
45
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
46 # Set/link to sequence file.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
47 if options.use_seq_data:
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
48 if options.ref_file:
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
49 # Sequence data from history.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
50 # Create symbolic link to ref_file so that index will be created in working directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
51 seq_path = "ref.fa"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
52 os.symlink( options.ref_file, seq_path )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
53 else:
2
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
54 if not os.path.exists( options.index ):
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
55 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
8b22e9adae34 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
56 seq_path = options.index
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
57
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
58 # Build command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
59
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
60 # Base.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
61 cmd = "cuffcompare -o cc_output "
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
62
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
63 # Add options.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
64 if options.ref_annotation:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
65 cmd += " -r %s " % options.ref_annotation
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
66 if options.ignore_nonoverlap:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
67 cmd += " -R "
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
68 if options.use_seq_data:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
69 cmd += " -s %s " % seq_path
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
70
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
71 # Add input files.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
72
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
73 # Need to symlink inputs so that output files are written to temp directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
74 for i, arg in enumerate( args ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
75 input_file_name = "./input%i" % ( i+1 )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
76 os.symlink( arg, input_file_name )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
77 cmd += "%s " % input_file_name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
78
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
79 # Debugging.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
80 print cmd
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
81
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
82 # Run command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
83 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
84 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
85 tmp_stderr = open( tmp_name, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
86 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
87 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
88 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
89
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
90 # Get stderr, allowing for case where it's very large.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
91 tmp_stderr = open( tmp_name, 'rb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
92 stderr = ''
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
93 buffsize = 1048576
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
94 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
95 while True:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
96 stderr += tmp_stderr.read( buffsize )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
97 if not stderr or len( stderr ) % buffsize != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
98 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
99 except OverflowError:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
100 pass
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
101 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
102
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
103 # Error checking.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
104 if returncode != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
105 raise Exception, stderr
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
106
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
107 # Copy outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
108 shutil.copyfile( "cc_output.combined.gtf" , options.combined_transcripts )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
109
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
110 # check that there are results in the output file
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
111 cc_output_fname = "cc_output.stats"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
112 if len( open( cc_output_fname, 'rb' ).read().strip() ) == 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
113 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
114 except Exception, e:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
115 stop_err( 'Error running cuffcompare. ' + str( e ) )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
116
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
117 if __name__=="__main__": __main__()