annotate cufflinks_wrapper.py @ 0:b50aacc8ae49

Uploaded tool tarball.
author devteam
date Tue, 01 Oct 2013 12:55:37 -0400
parents
children da11bfc10e81
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
2
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
3 # Supports Cufflinks versions 1.3 and newer.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
4
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
5 import optparse, os, shutil, subprocess, sys, tempfile
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
6 from galaxy import eggs
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
7 from galaxy.datatypes.util.gff_util import parse_gff_attributes, gff_attributes_to_str
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
8
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
9 def stop_err( msg ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
10 sys.stderr.write( "%s\n" % msg )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
11 sys.exit()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
12
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
13 # Copied from sam_to_bam.py:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
14 def check_seq_file( dbkey, cached_seqs_pointer_file ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
15 seq_path = ''
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
16 for line in open( cached_seqs_pointer_file ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
17 line = line.rstrip( '\r\n' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
18 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
19 fields = line.split( '\t' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
20 if len( fields ) < 3:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
21 continue
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
22 if fields[1] == dbkey:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
23 seq_path = fields[2].strip()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
24 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
25 return seq_path
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
26
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
27 def __main__():
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
28 #Parse Command Line
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
29 parser = optparse.OptionParser()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
30 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
31 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
32 parser.add_option( '-I', '--max-intron-length', dest='max_intron_len', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
33 parser.add_option( '-F', '--min-isoform-fraction', dest='min_isoform_fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
34 parser.add_option( '-j', '--pre-mrna-fraction', dest='pre_mrna_fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
35 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
36 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
37 For, example, for paired end runs with fragments selected at 300bp, \
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
38 where each end is 50bp, you should set -r to be 200. The default is 45bp.')
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
39 parser.add_option( '-G', '--GTF', dest='GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
40 parser.add_option( '-g', '--GTF-guide', dest='GTFguide', help='use reference transcript annotation to guide assembly' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
41 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
42
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
43 # Normalization options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
44 parser.add_option( "-N", "--quartile-normalization", dest="do_normalization", action="store_true" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
45 parser.add_option( "--no-effective-length-correction", dest="no_effective_length_correction", action="store_true" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
46
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
47 # Wrapper / Galaxy options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
48 parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
49
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
50 # Advanced Options:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
51 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
52 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
53
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
54 # Bias correction options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
55 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
56 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
57 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
58 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
59
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
60 # Global model.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
61 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
62
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
63 (options, args) = parser.parse_args()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
64
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
65 # output version # of tool
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
66 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
67 tmp = tempfile.NamedTemporaryFile().name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
68 tmp_stdout = open( tmp, 'wb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
69 proc = subprocess.Popen( args='cufflinks --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
70 tmp_stdout.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
71 returncode = proc.wait()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
72 stdout = None
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
73 for line in open( tmp_stdout.name, 'rb' ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
74 if line.lower().find( 'cufflinks v' ) >= 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
75 stdout = line.strip()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
76 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
77 if stdout:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
78 sys.stdout.write( '%s\n' % stdout )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
79 else:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
80 raise Exception
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
81 except:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
82 sys.stdout.write( 'Could not determine Cufflinks version\n' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
83
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
84 # If doing bias correction, set/link to sequence file.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
85 if options.do_bias_correction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
86 if options.ref_file != 'None':
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
87 # Sequence data from history.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
88 # Create symbolic link to ref_file so that index will be created in working directory.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
89 seq_path = "ref.fa"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
90 os.symlink( options.ref_file, seq_path )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
91 else:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
92 # Sequence data from loc file.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
93 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
94 if not os.path.exists( cached_seqs_pointer_file ):
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
95 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
96 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
97 # and the equCab2.fa file will contain fasta sequences.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
98 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
99 if seq_path == '':
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
100 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
101
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
102 # Build command.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
103
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
104 # Base; always use quiet mode to avoid problems with storing log output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
105 cmd = "cufflinks -q --no-update-check"
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
106
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
107 # Add options.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
108 if options.inner_dist_std_dev:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
109 cmd += ( " -s %i" % int ( options.inner_dist_std_dev ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
110 if options.max_intron_len:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
111 cmd += ( " -I %i" % int ( options.max_intron_len ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
112 if options.min_isoform_fraction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
113 cmd += ( " -F %f" % float ( options.min_isoform_fraction ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
114 if options.pre_mrna_fraction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
115 cmd += ( " -j %f" % float ( options.pre_mrna_fraction ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
116 if options.num_threads:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
117 cmd += ( " -p %i" % int ( options.num_threads ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
118 if options.inner_mean_dist:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
119 cmd += ( " -m %i" % int ( options.inner_mean_dist ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
120 if options.GTF:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
121 cmd += ( " -G %s" % options.GTF )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
122 if options.GTFguide:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
123 cmd += ( " -g %s" % options.GTFguide )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
124 if options.multi_read_correct:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
125 cmd += ( " -u" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
126 if options.num_importance_samples:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
127 cmd += ( " --num-importance-samples %i" % int ( options.num_importance_samples ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
128 if options.max_mle_iterations:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
129 cmd += ( " --max-mle-iterations %i" % int ( options.max_mle_iterations ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
130 if options.do_normalization:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
131 cmd += ( " -N" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
132 if options.do_bias_correction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
133 cmd += ( " -b %s" % seq_path )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
134 if options.no_effective_length_correction:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
135 cmd += ( " --no-effective-length-correction" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
136
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
137 # Debugging.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
138 print cmd
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
139
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
140 # Add input files.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
141 cmd += " " + options.input
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
142
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
143 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
144 # Run command and handle output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
145 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
146 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
147 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
148 # Run command.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
149 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
150 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
151 tmp_stderr = open( tmp_name, 'wb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
152 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
153 returncode = proc.wait()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
154 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
155
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
156 # Error checking.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
157 if returncode != 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
158 raise Exception, "return code = %i" % returncode
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
159
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
160 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
161 # Handle output.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
162 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
163
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
164 # Read standard error to get total map/upper quartile mass.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
165 total_map_mass = -1
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
166 tmp_stderr = open( tmp_name, 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
167 for line in tmp_stderr:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
168 if line.lower().find( "map mass" ) >= 0 or line.lower().find( "upper quartile" ) >= 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
169 total_map_mass = float( line.split(":")[1].strip() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
170 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
171 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
172
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
173 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
174 # If there's a global model provided, use model's total map mass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
175 # to adjust FPKM + confidence intervals.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
176 #
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
177 if options.global_model_file:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
178 # Global model is simply total map mass from original run.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
179 global_model_file = open( options.global_model_file, 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
180 global_model_total_map_mass = float( global_model_file.readline() )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
181 global_model_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
182
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
183 # Ratio of global model's total map mass to original run's map mass is
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
184 # factor used to adjust FPKM.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
185 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
186
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
187 # Update FPKM values in transcripts.gtf file.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
188 transcripts_file = open( "transcripts.gtf", 'r' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
189 tmp_transcripts = tempfile.NamedTemporaryFile( dir="." ).name
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
190 new_transcripts_file = open( tmp_transcripts, 'w' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
191 for line in transcripts_file:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
192 fields = line.split( '\t' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
193 attrs = parse_gff_attributes( fields[8] )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
194 attrs[ "FPKM" ] = str( float( attrs[ "FPKM" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
195 attrs[ "conf_lo" ] = str( float( attrs[ "conf_lo" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
196 attrs[ "conf_hi" ] = str( float( attrs[ "conf_hi" ] ) * fpkm_map_mass_ratio )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
197 fields[8] = gff_attributes_to_str( attrs, "GTF" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
198 new_transcripts_file.write( "%s\n" % '\t'.join( fields ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
199 transcripts_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
200 new_transcripts_file.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
201 shutil.copyfile( tmp_transcripts, "transcripts.gtf" )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
202
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
203 # TODO: update expression files as well.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
204
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
205 # Set outputs. Transcript and gene expression handled by wrapper directives.
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
206 shutil.copyfile( "transcripts.gtf" , options.assembled_isoforms_output_file )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
207 if total_map_mass > -1:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
208 f = open( "global_model.txt", 'w' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
209 f.write( "%f\n" % total_map_mass )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
210 f.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
211 except Exception, e:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
212 # Read stderr so that it can be reported:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
213 tmp_stderr = open( tmp_name, 'rb' )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
214 stderr = ''
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
215 buffsize = 1048576
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
216 try:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
217 while True:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
218 stderr += tmp_stderr.read( buffsize )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
219 if not stderr or len( stderr ) % buffsize != 0:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
220 break
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
221 except OverflowError:
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
222 pass
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
223 tmp_stderr.close()
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
224
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
225 stop_err( 'Error running cufflinks.\n%s\n%s' % ( str( e ), stderr ) )
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
226
b50aacc8ae49 Uploaded tool tarball.
devteam
parents:
diff changeset
227 if __name__=="__main__": __main__()