comparison cuffdiff_wrapper.py @ 2:fdf01b3c1841

Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
author Jim Johnson <jj@umn.edu>
date Fri, 08 Nov 2013 14:54:01 -0600
parents ebb9a992508d
children
comparison
equal deleted inserted replaced
1:ebb9a992508d 2:fdf01b3c1841
31 31
32 setattr(parser.values, option.dest, labels) 32 setattr(parser.values, option.dest, labels)
33 33
34 def stop_err( msg ): 34 def stop_err( msg ):
35 sys.stderr.write( "%s\n" % msg ) 35 sys.stderr.write( "%s\n" % msg )
36 sys.exit() 36 sys.exit(1)
37 37
38 # Copied from sam_to_bam.py: 38 # Copied from sam_to_bam.py:
39 def check_seq_file( dbkey, cached_seqs_pointer_file ): 39 def check_seq_file( dbkey, cached_seqs_pointer_file ):
40 seq_path = '' 40 seq_path = ''
41 for line in open( cached_seqs_pointer_file ): 41 for line in open( cached_seqs_pointer_file ):
60 For, example, for paired end runs with fragments selected at 300bp, \ 60 For, example, for paired end runs with fragments selected at 300bp, \
61 where each end is 50bp, you should set -r to be 200. The default is 45bp.') 61 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' ) 62 parser.add_option( '-c', '--min-alignment-count', dest='min_alignment_count', help='The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples. If no testing is performed, changes in the locus are deemed not signficant, and the locus\' observed changes don\'t contribute to correction for multiple testing. The default is 1,000 fragment alignments (up to 2,000 paired reads).' )
63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' ) 63 parser.add_option( '--FDR', dest='FDR', help='The allowed false discovery rate. The default is 0.05.' )
64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome') 64 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
65 parser.add_option( '--library-norm-method', dest='library_norm_method' )
66 parser.add_option( '--dispersion-method', dest='dispersion_method' )
65 67
66 # Advanced Options: 68 # Advanced Options:
67 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) 69 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
68 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) 70 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
69 71
96 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None ) 98 parser.add_option( "--tss_groups_read_group_tracking_output", dest="tss_groups_read_group_tracking_output", default=None )
97 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" ) 99 parser.add_option( "--isoforms_exp_output", dest="isoforms_exp_output" )
98 parser.add_option( "--genes_exp_output", dest="genes_exp_output" ) 100 parser.add_option( "--genes_exp_output", dest="genes_exp_output" )
99 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" ) 101 parser.add_option( "--tss_groups_exp_output", dest="tss_groups_exp_output" )
100 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" ) 102 parser.add_option( "--cds_exp_fpkm_tracking_output", dest="cds_exp_fpkm_tracking_output" )
103 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
101 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" ) 104 parser.add_option( "--isoforms_count_tracking_output", dest="isoforms_count_tracking_output" )
102 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" ) 105 parser.add_option( "--genes_count_tracking_output", dest="genes_count_tracking_output" )
103 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" ) 106 parser.add_option( "--cds_count_tracking_output", dest="cds_count_tracking_output" )
104 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" ) 107 parser.add_option( "--tss_groups_count_tracking_output", dest="tss_groups_count_tracking_output" )
105 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" ) 108 parser.add_option( "--splicing_diff_output", dest="splicing_diff_output" )
106 parser.add_option( "--cds_diff_output", dest="cds_diff_output" )
107 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" ) 109 parser.add_option( "--promoters_diff_output", dest="promoters_diff_output" )
108 parser.add_option( "--run_info_output", dest="run_info_output" ) 110 parser.add_option( "--run_info_output", dest="run_info_output" )
109 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" ) 111 parser.add_option( "--read_groups_info_output", dest="read_groups_info_output" )
110 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None) 112 parser.add_option( "--cuffdatadir", dest="cuffdatadir", default=None)
111 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None) 113 parser.add_option( "--cummeRbund_db_output", dest="cummeRbund_db_output", default=None)
135 tmp_output_dir = tempfile.mkdtemp() 137 tmp_output_dir = tempfile.mkdtemp()
136 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir 138 cuffdatadir = options.cuffdatadir if options.cuffdatadir else tmp_output_dir
137 if not os.path.exists( cuffdatadir ): 139 if not os.path.exists( cuffdatadir ):
138 os.makedirs( cuffdatadir ) 140 os.makedirs( cuffdatadir )
139 141
140
141 # If doing bias correction, set/link to sequence file. 142 # If doing bias correction, set/link to sequence file.
142 if options.do_bias_correction: 143 if options.do_bias_correction:
143 if options.ref_file != 'None': 144 if options.ref_file != 'None':
144 # Sequence data from history. 145 # Sequence data from history.
145 # Create symbolic link to ref_file so that index will be created in working directory. 146 # Create symbolic link to ref_file so that index will be created in working directory.
160 161
161 # Base; always use quiet mode to avoid problems with storing log output. 162 # Base; always use quiet mode to avoid problems with storing log output.
162 cmd = "cuffdiff --no-update-check -q" 163 cmd = "cuffdiff --no-update-check -q"
163 164
164 # Add options. 165 # Add options.
166 if options.library_norm_method:
167 cmd += ( " --library-norm-method %s" % options.library_norm_method )
168 if options.dispersion_method:
169 cmd += ( " --dispersion-method %s" % options.dispersion_method )
165 if options.inner_dist_std_dev: 170 if options.inner_dist_std_dev:
166 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) ) 171 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
167 if options.num_threads: 172 if options.num_threads:
168 cmd += ( " -p %i" % int ( options.num_threads ) ) 173 cmd += ( " -p %i" % int ( options.num_threads ) )
169 if options.inner_mean_dist: 174 if options.inner_mean_dist:
186 # Add inputs. 191 # Add inputs.
187 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2 192 # For replicate analysis: group1_rep1,group1_rep2 groupN_rep1,groupN_rep2
188 if options.groups: 193 if options.groups:
189 cmd += " --labels " 194 cmd += " --labels "
190 for label in options.labels: 195 for label in options.labels:
191 cmd += label + "," 196 cmd += '"%s",' % label
192 cmd = cmd[:-1] 197 cmd = cmd[:-1]
193 198
194 cmd += " " + options.inputA + " " 199 cmd += " " + options.inputA + " "
195 200
196 for group in options.groups: 201 for group in options.groups: