annotate cuffcompare_wrapper.py @ 0:9d35cf35634e

Uploaded tool tarball.
author devteam
date Tue, 01 Oct 2013 12:49:41 -0400
parents
children 8b22e9adae34
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
2
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
3 # Supports Cuffcompare versions v1.3.0 and newer.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
4
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
6
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( '%s\n' % msg )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
10
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
11 # Copied from sam_to_bam.py:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
12 def check_seq_file( dbkey, cached_seqs_pointer_file ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
13 seq_path = ''
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
14 for line in open( cached_seqs_pointer_file ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
15 line = line.rstrip( '\r\n' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
16 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
17 fields = line.split( '\t' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
18 if len( fields ) < 3:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
19 continue
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
20 if fields[1] == dbkey:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
21 seq_path = fields[2].strip()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
22 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
23 return seq_path
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
24
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
25 def __main__():
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
26 #Parse Command Line
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
27 parser = optparse.OptionParser()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
28 parser.add_option( '-r', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
29 parser.add_option( '-R', action="store_true", dest='ignore_nonoverlap', help='If -r was specified, this option causes cuffcompare to ignore reference transcripts that are not overlapped by any transcript in one of cuff1.gtf,...,cuffN.gtf. Useful for ignoring annotated transcripts that are not present in your RNA-Seq samples and thus adjusting the "sensitivity" calculation in the accuracy report written in the transcripts accuracy file' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
30 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
31
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
32 # Wrapper / Galaxy options.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
33 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
34 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
35 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
36
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
37 # Outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
38 parser.add_option( '', '--combined-transcripts', dest='combined_transcripts' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
39
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
40 (options, args) = parser.parse_args()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
41
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
42 # output version # of tool
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
43 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
44 tmp = tempfile.NamedTemporaryFile().name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
45 tmp_stdout = open( tmp, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
46 proc = subprocess.Popen( args='cuffcompare 2>&1', shell=True, stdout=tmp_stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
47 tmp_stdout.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
48 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
49 stdout = None
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
50 for line in open( tmp_stdout.name, 'rb' ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
51 if line.lower().find( 'cuffcompare v' ) >= 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
52 stdout = line.strip()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
53 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
54 if stdout:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
55 sys.stdout.write( '%s\n' % stdout )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
56 else:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
57 raise Exception
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
58 except:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
59 sys.stdout.write( 'Could not determine Cuffcompare version\n' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
60
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
61 # Set/link to sequence file.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
62 if options.use_seq_data:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
63 if options.ref_file != 'None':
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
64 # Sequence data from history.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
65 # Create symbolic link to ref_file so that index will be created in working directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
66 seq_path = "ref.fa"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
67 os.symlink( options.ref_file, seq_path )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
68 else:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
69 # Sequence data from loc file.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
70 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
71 if not os.path.exists( cached_seqs_pointer_file ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
72 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
73 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
74 # and the equCab2.fa file will contain fasta sequences.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
75 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
76 if seq_path == '':
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
77 stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
78
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
79 # Build command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
80
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
81 # Base.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
82 cmd = "cuffcompare -o cc_output "
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
83
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
84 # Add options.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
85 if options.ref_annotation:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
86 cmd += " -r %s " % options.ref_annotation
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
87 if options.ignore_nonoverlap:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
88 cmd += " -R "
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
89 if options.use_seq_data:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
90 cmd += " -s %s " % seq_path
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
91
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
92 # Add input files.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
93
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
94 # Need to symlink inputs so that output files are written to temp directory.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
95 for i, arg in enumerate( args ):
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
96 input_file_name = "./input%i" % ( i+1 )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
97 os.symlink( arg, input_file_name )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
98 cmd += "%s " % input_file_name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
99
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
100 # Debugging.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
101 print cmd
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
102
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
103 # Run command.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
104 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
105 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
106 tmp_stderr = open( tmp_name, 'wb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
107 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
108 returncode = proc.wait()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
109 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
110
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
111 # Get stderr, allowing for case where it's very large.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
112 tmp_stderr = open( tmp_name, 'rb' )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
113 stderr = ''
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
114 buffsize = 1048576
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
115 try:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
116 while True:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
117 stderr += tmp_stderr.read( buffsize )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
118 if not stderr or len( stderr ) % buffsize != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
119 break
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
120 except OverflowError:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
121 pass
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
122 tmp_stderr.close()
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
123
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
124 # Error checking.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
125 if returncode != 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
126 raise Exception, stderr
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
127
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
128 # Copy outputs.
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
129 shutil.copyfile( "cc_output.combined.gtf" , options.combined_transcripts )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
130
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
131 # check that there are results in the output file
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
132 cc_output_fname = "cc_output.stats"
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
133 if len( open( cc_output_fname, 'rb' ).read().strip() ) == 0:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
134 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
135 except Exception, e:
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
136 stop_err( 'Error running cuffcompare. ' + str( e ) )
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
137
9d35cf35634e Uploaded tool tarball.
devteam
parents:
diff changeset
138 if __name__=="__main__": __main__()