annotate cuffdiff_wrapper.py @ 2:fdf01b3c1841

Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
author Jim Johnson <jj@umn.edu>
date Fri, 08 Nov 2013 14:54:01 -0600
parents ebb9a992508d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/env python
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
3 # Wrapper supports Cuffdiff versions v1.3.0-v2.0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
4
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
6
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
7 def group_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
8 groups = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
9 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
10 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
11 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
12 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
13 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
14 elif arg[0] is ",":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
15 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
16 flist = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
17 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
18 flist.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
19 groups.append(flist)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
20
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
21 setattr(parser.values, option.dest, groups)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
22
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
23 def label_callback( option, op_str, value, parser ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
24 labels = []
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
25 for arg in parser.rargs:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
26 arg = arg.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
27 if arg[0] is "-":
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
28 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
29 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
30 labels.append(arg)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
31
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
32 setattr(parser.values, option.dest, labels)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
33
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
34 def stop_err( msg ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
35 sys.stderr.write( "%s\n" % msg )
2
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
36 sys.exit(1)
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
37
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
38 # Copied from sam_to_bam.py:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
39 def check_seq_file( dbkey, cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
40 seq_path = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
41 for line in open( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
42 line = line.rstrip( '\r\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
43 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
44 fields = line.split( '\t' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
45 if len( fields ) < 3:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
46 continue
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
47 if fields[1] == dbkey:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
48 seq_path = fields[2].strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
49 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
50 return seq_path
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
51
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
52 def __main__():
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
53 #Parse Command Line
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
54 parser = optparse.OptionParser()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
55
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
56 # Cuffdiff options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
57 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
58 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
59 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
60 For, example, for paired end runs with fragments selected at 300bp, \
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
61 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
2
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
65 parser.add_option( '--library-norm-method', dest='library_norm_method' )
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
66 parser.add_option( '--dispersion-method', dest='dispersion_method' )
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
67
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
68 # Advanced Options:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
69 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
70 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
71
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
72 # Wrapper / Galaxy options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
73 parser.add_option( '-f', '--files', dest='groups', action="callback", callback=group_callback, help="Groups to be processed, groups are separated by spaces, replicates in a group comma separated. group1_rep1,group1_rep2 group2_rep1,group2_rep2, ..., groupN_rep1, groupN_rep2" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
74 parser.add_option( '-A', '--inputA', dest='inputA', help='A transcript GTF file produced by cufflinks, cuffcompare, or other source.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
75 parser.add_option( '-1', '--input1', dest='input1', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
76 parser.add_option( '-2', '--input2', dest='input2', help='File of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
77
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
78 # Label options
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
79 parser.add_option('-L', '--labels', dest='labels', action="callback", callback=label_callback, help="Labels for the groups the replicates are in.")
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
80
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
81 # Normalization options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
82 parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
83
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
84 # Bias correction options.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
85 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
86 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
87 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
88 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
89
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
90 # Outputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
91 parser.add_option( "--isoforms_fpkm_tracking_output", dest="isoforms_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
92 parser.add_option( "--genes_fpkm_tracking_output", dest="genes_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
93 parser.add_option( "--cds_fpkm_tracking_output", dest="cds_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
94 parser.add_option( "--tss_groups_fpkm_tracking_output", dest="tss_groups_fpkm_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
95 parser.add_option( "--isoforms_read_group_tracking_output", dest="isoforms_read_group_tracking_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
96 parser.add_option( "--genes_read_group_tracking_output", dest="genes_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
97 parser.add_option( "--cds_read_group_tracking_output", dest="cds_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
98 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
99 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
100 parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
101 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
102 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
2
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
103 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
104 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
105 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
106 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
107 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
108 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
109 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
110 parser.add_option( "--run_info_output", dest="run_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
111 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
112 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
113 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
114
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
115 (options, args) = parser.parse_args()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
116
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
117 # output version # of tool
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
118 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
119 tmp = tempfile.NamedTemporaryFile().name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
120 tmp_stdout = open( tmp, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
121 proc = subprocess.Popen( args='cuffdiff --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
122 tmp_stdout.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
123 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
124 stdout = None
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
125 for line in open( tmp_stdout.name, 'rb' ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
126 if line.lower().find( 'cuffdiff v' ) >= 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
127 stdout = line.strip()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
128 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
129 if stdout:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
130 sys.stdout.write( '%s\n' % stdout )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
131 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
132 raise Exception
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
133 except:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
134 sys.stdout.write( 'Could not determine Cuffdiff version\n' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
135
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
136 # Make temp directory for output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
137 tmp_output_dir = tempfile.mkdtemp()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
138 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
139 if not os.path.exists( cuffdatadir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
140 os.makedirs( cuffdatadir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
141
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
142 # If doing bias correction, set/link to sequence file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
143 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
144 if options.ref_file != 'None':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
145 # Sequence data from history.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
146 # Create symbolic link to ref_file so that index will be created in working directory.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
147 seq_path = os.path.join( cuffdatadir, "ref.fa" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
148 os.symlink( options.ref_file, seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
149 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
150 # Sequence data from loc file.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
151 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
152 if not os.path.exists( cached_seqs_pointer_file ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
153 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
154 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
155 # and the equCab2.fa file will contain fasta sequences.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
156 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
157 if seq_path == '':
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
158 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
159
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
160 # Build command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
161
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
162 # Base; always use quiet mode to avoid problems with storing log output.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
163 cmd = "cuffdiff --no-update-check -q"
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
164
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
165 # Add options.
2
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
166 if options.library_norm_method:
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
167 cmd += ( " --library-norm-method %s" % options.library_norm_method )
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
168 if options.dispersion_method:
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
169 cmd += ( " --dispersion-method %s" % options.dispersion_method )
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
170 if options.inner_dist_std_dev:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
171 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
172 if options.num_threads:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
173 cmd += ( " -p %i" % int ( options.num_threads ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
174 if options.inner_mean_dist:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
175 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
176 if options.min_alignment_count:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
177 cmd += ( " -c %i" % int ( options.min_alignment_count ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
178 if options.FDR:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
179 cmd += ( " --FDR %f" % float( options.FDR ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
180 if options.multi_read_correct:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
181 cmd += ( " -u" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
182 if options.num_importance_samples:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
183 cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
184 if options.max_mle_iterations:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
185 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
186 if options.do_normalization:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
187 cmd += ( " -N" )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
188 if options.do_bias_correction:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
189 cmd += ( " -b %s" % seq_path )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
190
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
191 # Add inputs.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
192 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
193 if options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
194 cmd += " --labels "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
195 for label in options.labels:
2
fdf01b3c1841 Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
196 cmd += '"%s",' % label
0
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
197 cmd = cmd[:-1]
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
198
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
199 cmd += " " + options.inputA + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
200
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
201 for group in options.groups:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
202 for filename in group:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
203 cmd += filename + ","
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
204 cmd = cmd[:-1] + " "
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
205 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
206 cmd += " " + options.inputA + " " + options.input1 + " " + options.input2
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
207
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
208 # Debugging.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
209 print cmd
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
210
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
211 # Run command.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
212 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
213 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
214 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
215 proc = subprocess.Popen( args=cmd, shell=True, cwd=cuffdatadir, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
216 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
217 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
218
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
219 # Get stderr, allowing for case where it's very large.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
220 tmp_stderr = open( tmp_name, 'rb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
221 stderr = ''
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
222 buffsize = 1048576
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
223 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
224 while True:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
225 stderr += tmp_stderr.read( buffsize )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
226 if not stderr or len( stderr ) % buffsize != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
227 break
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
228 except OverflowError:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
229 pass
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
230 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
231
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
232 # Error checking.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
233 if returncode != 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
234 raise Exception, stderr
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
235
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
236 # check that there are results in the output file
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
237 if len( open( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), 'rb' ).read().strip() ) == 0:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
238 raise Exception, 'The main output file is empty, there may be an error with your input file or settings.'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
239 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
240 stop_err( 'Error running cuffdiff. ' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
241
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
242
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
243 # Copy output files from tmp directory to specified files.
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
244 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
245 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
246 if options.isoforms_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
247 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.fpkm_tracking" ), options.isoforms_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
248 if options.genes_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
249 shutil.copyfile( os.path.join( cuffdatadir, "genes.fpkm_tracking" ), options.genes_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
250 if options.cds_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
251 shutil.copyfile( os.path.join( cuffdatadir, "cds.fpkm_tracking" ), options.cds_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
252 if options.tss_groups_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
253 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.fpkm_tracking" ), options.tss_groups_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
254
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
255 if options.isoforms_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
256 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.read_group_tracking" ), options.isoforms_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
257 if options.genes_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
258 shutil.copyfile( os.path.join( cuffdatadir, "genes.read_group_tracking" ), options.genes_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
259 if options.cds_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
260 shutil.copyfile( os.path.join( cuffdatadir, "cds.read_group_tracking" ), options.cds_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
261 if options.tss_groups_read_group_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.read_group_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
262 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.read_group_tracking" ), options.tss_groups_read_group_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
263
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
264 if options.isoforms_exp_output and os.path.exists(os.path.join( cuffdatadir, "isoform_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
265 shutil.copyfile( os.path.join( cuffdatadir, "isoform_exp.diff" ), options.isoforms_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
266 if options.genes_exp_output and os.path.exists(os.path.join( cuffdatadir, "gene_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
267 shutil.copyfile( os.path.join( cuffdatadir, "gene_exp.diff" ), options.genes_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
268 if options.cds_exp_fpkm_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
269 shutil.copyfile( os.path.join( cuffdatadir, "cds_exp.diff" ), options.cds_exp_fpkm_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
270 if options.tss_groups_exp_output and os.path.exists(os.path.join( cuffdatadir, "tss_group_exp.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
271 shutil.copyfile( os.path.join( cuffdatadir, "tss_group_exp.diff" ), options.tss_groups_exp_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
272
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
273 if options.isoforms_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "isoforms.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
274 shutil.copyfile( os.path.join( cuffdatadir, "isoforms.count_tracking" ), options.isoforms_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
275 if options.genes_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "genes.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
276 shutil.copyfile( os.path.join( cuffdatadir, "genes.count_tracking" ), options.genes_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
277 if options.cds_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "cds.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
278 shutil.copyfile( os.path.join( cuffdatadir, "cds.count_tracking" ), options.cds_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
279 if options.tss_groups_count_tracking_output and os.path.exists(os.path.join( cuffdatadir, "tss_groups.count_tracking" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
280 shutil.copyfile( os.path.join( cuffdatadir, "tss_groups.count_tracking" ), options.tss_groups_count_tracking_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
281
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
282 if options.cds_diff_output and os.path.exists(os.path.join( cuffdatadir, "cds.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
283 shutil.copyfile( os.path.join( cuffdatadir, "cds.diff" ), options.cds_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
284
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
285 if options.splicing_diff_output and os.path.exists(os.path.join( cuffdatadir, "splicing.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
286 shutil.copyfile( os.path.join( cuffdatadir, "splicing.diff" ), options.splicing_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
287 if options.promoters_diff_output and os.path.exists(os.path.join( cuffdatadir, "promoters.diff" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
288 shutil.copyfile( os.path.join( cuffdatadir, "promoters.diff" ), options.promoters_diff_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
289
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
290 if options.run_info_output and os.path.exists(os.path.join( cuffdatadir, "run.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
291 shutil.copyfile( os.path.join( cuffdatadir, "run.info" ), options.run_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
292 if options.read_groups_info_output and os.path.exists(os.path.join( cuffdatadir, "read_groups.info" )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
293 shutil.copyfile( os.path.join( cuffdatadir, "read_groups.info" ), options.read_groups_info_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
294
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
295 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
296 stop_err( 'Error in cuffdiff:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
297 if options.cummeRbund_db_output:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
298 try:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
299 dbFile = 'cuffData.db'
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
300 rscript = tempfile.NamedTemporaryFile( dir=tmp_output_dir,suffix='.r' ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
301 rscript_fh = open( rscript, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
302 rscript_fh.write('library(cummeRbund)\n')
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
303 if options.inputA and options.ref_file:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
304 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", gtfFile = "%s", genome = "%s", rebuild = T)\n' % (cuffdatadir,dbFile,options.inputA,options.ref_file))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
305 else:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
306 rscript_fh.write('cuff<-readCufflinks(dir = "%s", dbFile = "%s", rebuild = T)\n' % (cuffdatadir,dbFile))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
307 rscript_fh.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
308 cmd = ( "Rscript --vanilla %s" % rscript )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
309 tmp_name = tempfile.NamedTemporaryFile( dir=tmp_output_dir ).name
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
310 tmp_stderr = open( tmp_name, 'wb' )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
311 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
312 #proc = subprocess.Popen( args=cmd, shell=True)
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
313 returncode = proc.wait()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
314 tmp_stderr.close()
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
315 if os.path.exists(os.path.join( cuffdatadir, dbFile )):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
316 shutil.copyfile( os.path.join( cuffdatadir, dbFile ), options.cummeRbund_db_output )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
317 shutil.rmtree(os.path.join( cuffdatadir, dbFile ))
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
318 except Exception, e:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
319 stop_err( 'Error generating cummeRbund cuffData.db:\n' + str( e ) )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
320 finally:
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
321 # Clean up temp dirs
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
322 if os.path.exists( tmp_output_dir ):
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
323 shutil.rmtree( tmp_output_dir )
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
324
da7241f92ecf Uploaded
jjohnson
parents:
diff changeset
325 if __name__=="__main__": __main__()