comparison cufflinks_wrapper.py @ 2:da11bfc10e81

Update to the new data table specification.
author Dave Bouvier <dave@bx.psu.edu>
date Wed, 04 Dec 2013 13:24:49 -0500
parents b50aacc8ae49
children 5346d5eea8b1
comparison
equal deleted inserted replaced
1:b01956f26c36 2:da11bfc10e81
8 8
9 def stop_err( msg ): 9 def stop_err( msg ):
10 sys.stderr.write( "%s\n" % msg ) 10 sys.stderr.write( "%s\n" % msg )
11 sys.exit() 11 sys.exit()
12 12
13 # Copied from sam_to_bam.py:
14 def check_seq_file( dbkey, cached_seqs_pointer_file ):
15 seq_path = ''
16 for line in open( cached_seqs_pointer_file ):
17 line = line.rstrip( '\r\n' )
18 if line and not line.startswith( '#' ) and line.startswith( 'index' ):
19 fields = line.split( '\t' )
20 if len( fields ) < 3:
21 continue
22 if fields[1] == dbkey:
23 seq_path = fields[2].strip()
24 break
25 return seq_path
26
27 def __main__(): 13 def __main__():
28 #Parse Command Line 14 #Parse Command Line
29 parser = optparse.OptionParser() 15 parser = optparse.OptionParser()
30 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' ) 16 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
31 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) 17 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' )
51 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' ) 37 parser.add_option( '--num-importance-samples', dest='num_importance_samples', help='Sets the number of importance samples generated for each locus during abundance estimation. Default: 1000' )
52 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' ) 38 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
53 39
54 # Bias correction options. 40 # Bias correction options.
55 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.') 41 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
56 parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) 42 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
57 parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' )
58 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) 43 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
59 44
60 # Global model. 45 # Global model.
61 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' ) 46 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' )
62 47
81 except: 66 except:
82 sys.stdout.write( 'Could not determine Cufflinks version\n' ) 67 sys.stdout.write( 'Could not determine Cufflinks version\n' )
83 68
84 # If doing bias correction, set/link to sequence file. 69 # If doing bias correction, set/link to sequence file.
85 if options.do_bias_correction: 70 if options.do_bias_correction:
86 if options.ref_file != 'None': 71 if options.ref_file:
87 # Sequence data from history. 72 # Sequence data from history.
88 # Create symbolic link to ref_file so that index will be created in working directory. 73 # Create symbolic link to ref_file so that index will be created in working directory.
89 seq_path = "ref.fa" 74 seq_path = "ref.fa"
90 os.symlink( options.ref_file, seq_path ) 75 os.symlink( options.ref_file, seq_path )
91 else: 76 else:
92 # Sequence data from loc file. 77 if not os.path.exists( options.index ):
93 cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) 78 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
94 if not os.path.exists( cached_seqs_pointer_file ): 79 seq_path = options.index
95 stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) 80
96 # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa,
97 # and the equCab2.fa file will contain fasta sequences.
98 seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file )
99 if seq_path == '':
100 stop_err( 'No sequence data found for dbkey %s, so bias correction cannot be used.' % options.dbkey )
101
102 # Build command. 81 # Build command.
103 82
104 # Base; always use quiet mode to avoid problems with storing log output. 83 # Base; always use quiet mode to avoid problems with storing log output.
105 cmd = "cufflinks -q --no-update-check" 84 cmd = "cufflinks -q --no-update-check"
106 85