comparison sopra_wpc.py @ 2:87ffe493b6c1 draft default tip

Use GALAXY_SLOTS for multithreading in Bowtie. Create symlinks instead of copying files. Specify in help that Bowtie is used to align the reads to the contigs. Add readme.rst .
author crs4
date Mon, 03 Mar 2014 11:28:41 -0500
parents 988d5a82291a
children
comparison
equal deleted inserted replaced
1:d180348fe9db 2:87ffe493b6c1
30 return result 30 return result
31 31
32 32
33 def __main__(): 33 def __main__():
34 parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') 34 parser = optparse.OptionParser(description='SOPRA with prebuilt contigs')
35 parser.add_option('-p', dest='num_threads', type='int', help='Number of threads for Bowtie')
35 parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') 36 parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1')
36 parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') 37 parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file')
37 parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') 38 parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries')
38 parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie') 39 parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie')
39 parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded') 40 parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded')
47 parser.error('Wrong number of arguments') 48 parser.error('Wrong number of arguments')
48 49
49 contigs = options.contigs # a list of file paths 50 contigs = options.contigs # a list of file paths
50 mates = options.mates # a list of file paths 51 mates = options.mates # a list of file paths
51 insert_sizes = options.insert_sizes # a list of integers 52 insert_sizes = options.insert_sizes # a list of integers
52 max_mismatches = options.max_mismatches
53 c_option = options.c_option 53 c_option = options.c_option
54 w_option = options.w_option 54 w_option = options.w_option
55 L_option = options.L_option 55 L_option = options.L_option
56 h_option = options.h_option 56 h_option = options.h_option
57 scaffolds = options.scaffolds 57 scaffolds = options.scaffolds
58 logfile = options.logfile 58 logfile = options.logfile
59 59
60 s_scaf_path = which('s_scaf_v1.4.6.pl').pop() 60 s_scaf_path = which('s_scaf_v1.4.6.pl').pop()
61 print 'Creating temp dir' 61 print 'Creating temporary directory'
62 wd = tempfile.mkdtemp() 62 wd = tempfile.mkdtemp()
63 try: 63 try:
64 fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] 64 fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa]
65 contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file 65 contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file
66 bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files 66 bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files
67 mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files 67 mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files
68 mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format 68 mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format
69 mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files 69 mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files
70 orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file 70 orientdistinfo = os.path.join(wd, "orientdistinfo_c%d" % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file
71 scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file 71 scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file
72 72
73 for i in range(len(mates)): 73 for i in range(len(mates)):
74 print "Copying mate %s to %s" % (mates[i], fake_mates[i]) 74 print "Creating symbolic link %s pointing to %s" % (fake_mates[i], mates[i])
75 shutil.copy2(mates[i], fake_mates[i]) 75 os.symlink(mates[i], fake_mates[i])
76 76
77 log = open(logfile, 'w') if logfile else sys.stdout 77 log = open(logfile, 'w') if logfile else sys.stdout
78 try: 78 try:
79 cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd) 79 cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd)
80 print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1 80 print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1
83 cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build) 83 cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build)
84 print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2 84 print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2
85 subprocess.check_call(args=cmd_step2, stdout=log, shell=True) 85 subprocess.check_call(args=cmd_step2, stdout=log, shell=True)
86 86
87 for i in range(len(mate_sopras)): 87 for i in range(len(mate_sopras)):
88 cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) 88 cmd_step3 = "bowtie -p %d -v %d -m 1 -f --sam %s %s %s" % (options.num_threads, options.max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i])
89 print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) 89 print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3)
90 subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there 90 subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there
91 91
92 cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd) 92 cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd)
93 print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4 93 print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4
104 subprocess.check_call(args=cmd_step6, stdout=log, shell=True) 104 subprocess.check_call(args=cmd_step6, stdout=log, shell=True)
105 finally: 105 finally:
106 if log != sys.stdout: 106 if log != sys.stdout:
107 log.close() 107 log.close()
108 108
109 print 'Moving result file %s to %s' % (scaffolds_file, scaffolds) 109 print "Moving result file %s to %s" % (scaffolds_file, scaffolds)
110 shutil.move(scaffolds_file, scaffolds) 110 shutil.move(scaffolds_file, scaffolds)
111 finally: 111 finally:
112 shutil.rmtree(wd) 112 shutil.rmtree(wd)
113 113
114 114