annotate cufflinks_wrapper.py @ 7:5346d5eea8b1 draft

Uploaded
author devteam
date Fri, 19 Dec 2014 11:58:22 -0500
parents da11bfc10e81
children 64698e16f4a6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
2
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
4 from galaxy import eggs
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
5 from galaxy.datatypes.util.gff_util import parse_gff_attributes, gff_attributes_to_str
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
6
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
7 def stop_err( msg ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
8 sys.stderr.write( "%s\n" % msg )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
9 sys.exit()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
10
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
11 def __main__():
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
12 #Parse Command Line
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
13 parser = optparse.OptionParser()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
14 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
15 parser.add_option( '-I', '--max-intron-length', dest='max_intron_len', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
16 parser.add_option( '-F', '--min-isoform-fraction', dest='min_isoform_fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
17 parser.add_option( '-j', '--pre-mrna-fraction', dest='pre_mrna_fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
18 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
19 parser.add_option( '-G', '--GTF', dest='GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
20 parser.add_option ("--compatible-hits-norm",dest='compatible_hits_norm',help='Count hits compatible with reference RNAs only')
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
21 parser.add_option( '-g', '--GTF-guide', dest='GTFguide', help='use reference transcript annotation to guide assembly' )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
22 parser.add_option("--3-overhang-tolerance",dest='three_overhang_tolerance', help='The number of bp allowed to overhang the 3prime end of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 600 bp.')
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
23 parser.add_option("--intron-overhang-tolerance",dest='intron_overhang_tolerance',help='The number of bp allowed to enter the intron of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 50 bp.')
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
24 parser.add_option("--no-faux-reads", dest='no_faux_reads',help='This option disables tiling of the reference transcripts with faux reads. Use this if you only want to use sequencing reads in assembly but do not want to output assembled transcripts that lay within reference transcripts. All reference transcripts in the input annotation will also be included in the output.')
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
25 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
26
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
27 # Normalization options.
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
28 parser.add_option( "--no-effective-length-correction", dest="no_effective_length_correction", action="store_true" )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
29 parser.add_option( "--no-length-correction", dest="no_length_correction", action="store_true" )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
30
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
31 # Wrapper / Galaxy options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
32 parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
33
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
34 # Advanced Options:
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
35 parser.add_option( "--library-type",dest="library_type",help=' library prep used for input reads, default fr-unstranded')
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
36 parser.add_option( '-M','--mask-file', dest='mask_file', help='Tells Cufflinks to ignore all reads that could have come from transcripts in this GTF file. \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
37 We recommend including any annotated rRNA, mitochondrial transcripts other abundant transcripts \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
38 you wish to ignore in your analysis in this file. Due to variable efficiency of mRNA enrichment \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
39 methods and rRNA depletion kits, masking these transcripts often improves the overall robustness \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
40 of transcript abundance estimates.')
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
41 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
42 For, example, for paired end runs with fragments selected at 300bp, \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
43 where each end is 50bp, you should set -r to be 200. The default is 45bp.') # cufflinks: --frag-len-mean
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
44
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
45 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) # cufflinks: --frag-len-std-dev
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
46 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
47 parser.add_option( '--junc-alpha', dest='junc_alpha', help='Alpha value for the binomial test used during false positive spliced alignment filtration. Default: 0.001' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
48 parser.add_option( '--small-anchor-fraction', dest='small_anchor_fraction', help='Spliced reads with less than this percent of their length on each side of\
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
49 the junction are considered suspicious and are candidates for filtering prior to assembly. Default: 0.09.' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
50 parser.add_option( '--overhang-tolerance', dest='overhang_tolerance', help='The number of bp allowed to enter the intron of a transcript when determining if a \
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
51 read or another transcript is mappable to/compatible with it. The default is 8 bp based on the default bowtie/TopHat parameters.' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
52 parser.add_option( '--max-bundle-length', dest='max_bundle_length', help='Maximum genomic length of a given bundle" help="Default: 3,500,000bp' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
53 parser.add_option( '--max-bundle-frags', dest='max_bundle_frags', help='Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1,000,000' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
54 parser.add_option( '--min-intron-length', dest='min_intron_length', help='Minimal allowed intron size. Default: 50' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
55 parser.add_option( '--trim-3-avgcov-thresh', dest='trim_three_avgcov_thresh', help='Minimum average coverage required to attempt 3prime trimming. Default: 10' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
56 parser.add_option( '--trim-3-dropoff-frac', dest='trim_three_dropoff_frac', help='The fraction of average coverage below which to trim the 3prime end of an assembled transcript. Default: 0.1' )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
57
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
58
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
59 # Bias correction options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
60 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
2
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
61 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
62 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
63
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
64 # Global model (for trackster).
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
65 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
66
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
67 (options, args) = parser.parse_args()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
68
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
69 # output version # of tool
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
70 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
71 tmp = tempfile.NamedTemporaryFile().name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
72 tmp_stdout = open( tmp, 'wb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
73 proc = subprocess.Popen( args='cufflinks --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
74 tmp_stdout.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
75 returncode = proc.wait()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
76 stdout = None
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
77 for line in open( tmp_stdout.name, 'rb' ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
78 if line.lower().find( 'cufflinks v' ) >= 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
79 stdout = line.strip()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
80 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
81 if stdout:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
82 sys.stdout.write( '%s\n' % stdout )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
83 else:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
84 raise Exception
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
85 except:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
86 sys.stdout.write( 'Could not determine Cufflinks version\n' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
87
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
88 # If doing bias correction, set/link to sequence file.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
89 if options.do_bias_correction:
2
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
90 if options.ref_file:
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
91 # Sequence data from history.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
92 # Create symbolic link to ref_file so that index will be created in working directory.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
93 seq_path = "ref.fa"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
94 os.symlink( options.ref_file, seq_path )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
95 else:
2
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
96 if not os.path.exists( options.index ):
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
97 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
98 seq_path = options.index
da11bfc10e81 Update to the new data table specification.
Dave Bouvier <dave@bx.psu.edu>
parents: 0
diff changeset
99
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
100 # Build command.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
101
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
102 # Base; always use quiet mode to avoid problems with storing log output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
103 cmd = "cufflinks -q --no-update-check"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
104
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
105 # Add options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
106 if options.max_intron_len:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
107 cmd += ( " -I %i" % int ( options.max_intron_len ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
108 if options.min_isoform_fraction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
109 cmd += ( " -F %f" % float ( options.min_isoform_fraction ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
110 if options.pre_mrna_fraction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
111 cmd += ( " -j %f" % float ( options.pre_mrna_fraction ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
112 if options.num_threads:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
113 cmd += ( " -p %i" % int ( options.num_threads ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
114 if options.GTF:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
115 cmd += ( " -G %s" % options.GTF )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
116 if options.compatible_hits_norm:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
117 cmd += ( " --compatible-hits-norm" )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
118 if options.GTFguide:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
119 cmd += ( " -g %s" % options.GTFguide )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
120 cmd += ( " --3-overhang-tolerance %i" % int ( options.three_overhang_tolerance ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
121 cmd += ( " --intron-overhang-tolerance %i" % int ( options.intron_overhang_tolerance ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
122 if options.no_faux_reads:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
123 cmd += ( " --no-faux-reads" )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
124 if options.multi_read_correct:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
125 cmd += ( " -u" )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
126
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
127 if options.library_type and options.library_type != 'auto':
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
128 cmd += ( " --library-type %s" % options.library_type)
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
129 if options.mask_file:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
130 cmd += ( " --mask-file %s" % options.mask_file )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
131 if options.inner_mean_dist:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
132 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
133 if options.inner_dist_std_dev:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
134 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
135 if options.max_mle_iterations:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
136 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
137 if options.junc_alpha:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
138 cmd += ( " --junc-alpha %f" % float ( options.junc_alpha) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
139 if options.small_anchor_fraction:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
140 cmd += ( " --small-anchor-fraction %f" % float (options.small_anchor_fraction ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
141 if options.overhang_tolerance:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
142 cmd += ( " --overhang-tolerance %i" % int ( options.overhang_tolerance ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
143 if options.max_bundle_length:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
144 cmd += ( " --max-bundle-length %i" % int ( options.max_bundle_length ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
145 if options.max_bundle_frags:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
146 cmd += ( " --max-bundle-frags %i" % int ( options.max_bundle_frags ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
147 if options.min_intron_length:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
148 cmd += ( " --min-intron-length %i" % int ( options.min_intron_length ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
149 if options.trim_three_avgcov_thresh:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
150 cmd += ( " --trim-3-avgcov-thresh %i" % int ( options.trim_three_avgcov_thresh ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
151 if options.trim_three_dropoff_frac:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
152 cmd += ( " --trim-3-dropoff-frac %f" % float ( options.trim_three_dropoff_frac ) )
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
153
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
154 if options.do_bias_correction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
155 cmd += ( " -b %s" % seq_path )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
156 if options.no_effective_length_correction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
157 cmd += ( " --no-effective-length-correction" )
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
158 if options.no_length_correction:
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
159 cmd += ( " --no-length-correction" )
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
160
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
161 # Add input files.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
162 cmd += " " + options.input
7
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
163
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
164 # Debugging.
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
165 print cmd
5346d5eea8b1 Uploaded
devteam
parents: 2
diff changeset
166
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
167 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
168 # Run command and handle output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
169 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
170 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
171 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
172 # Run command.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
173 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
174 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
175 tmp_stderr = open( tmp_name, 'wb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
176 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
177 returncode = proc.wait()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
178 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
179
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
180 # Error checking.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
181 if returncode != 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
182 raise Exception, "return code = %i" % returncode
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
183
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
184 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
185 # Handle output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
186 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
187
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
188 # Read standard error to get total map/upper quartile mass.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
189 total_map_mass = -1
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
190 tmp_stderr = open( tmp_name, 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
191 for line in tmp_stderr:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
192 if line.lower().find( "map mass" ) >= 0 or line.lower().find( "upper quartile" ) >= 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
193 total_map_mass = float( line.split(":")[1].strip() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
194 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
195 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
196
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
197 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
198 # If there's a global model provided, use model's total map mass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
199 # to adjust FPKM + confidence intervals.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
200 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
201 if options.global_model_file:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
202 # Global model is simply total map mass from original run.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
203 global_model_file = open( options.global_model_file, 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
204 global_model_total_map_mass = float( global_model_file.readline() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
205 global_model_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
206
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
207 # Ratio of global model's total map mass to original run's map mass is
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
208 # factor used to adjust FPKM.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
209 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
210
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
211 # Update FPKM values in transcripts.gtf file.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
212 transcripts_file = open( "transcripts.gtf", 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
213 tmp_transcripts = tempfile.NamedTemporaryFile( dir="." ).name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
214 new_transcripts_file = open( tmp_transcripts, 'w' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
215 for line in transcripts_file:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
216 fields = line.split( '\t' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
217 attrs = parse_gff_attributes( fields[8] )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
218 attrs[ "FPKM" ] = str( float( attrs[ "FPKM" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
219 attrs[ "conf_lo" ] = str( float( attrs[ "conf_lo" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
220 attrs[ "conf_hi" ] = str( float( attrs[ "conf_hi" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
221 fields[8] = gff_attributes_to_str( attrs, "GTF" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
222 new_transcripts_file.write( "%s\n" % '\t'.join( fields ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
223 transcripts_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
224 new_transcripts_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
225 shutil.copyfile( tmp_transcripts, "transcripts.gtf" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
226
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
227 # TODO: update expression files as well.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
228
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
229 # Set outputs. Transcript and gene expression handled by wrapper directives.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
230 shutil.copyfile( "transcripts.gtf" , options.assembled_isoforms_output_file )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
231 if total_map_mass > -1:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
232 f = open( "global_model.txt", 'w' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
233 f.write( "%f\n" % total_map_mass )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
234 f.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
235 except Exception, e:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
236 # Read stderr so that it can be reported:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
237 tmp_stderr = open( tmp_name, 'rb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
238 stderr = ''
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
239 buffsize = 1048576
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
240 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
241 while True:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
242 stderr += tmp_stderr.read( buffsize )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
243 if not stderr or len( stderr ) % buffsize != 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
244 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
245 except OverflowError:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
246 pass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
247 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
248
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
249 stop_err( 'Error running cufflinks.\n%s\n%s' % ( str( e ), stderr ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
250
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
251 if __name__=="__main__": __main__()