Mercurial > repos > crs4 > sopra
diff sopra_wpc.py @ 2:87ffe493b6c1 draft default tip
Use GALAXY_SLOTS for multithreading in Bowtie. Create symlinks instead of copying files. Specify in help that Bowtie is used to align the reads to the contigs. Add readme.rst .
author | crs4 |
---|---|
date | Mon, 03 Mar 2014 11:28:41 -0500 |
parents | 988d5a82291a |
children |
line wrap: on
line diff
--- a/sopra_wpc.py Tue Oct 29 05:27:29 2013 -0400 +++ b/sopra_wpc.py Mon Mar 03 11:28:41 2014 -0500 @@ -32,6 +32,7 @@ def __main__(): parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') + parser.add_option('-p', dest='num_threads', type='int', help='Number of threads for Bowtie') parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') @@ -49,7 +50,6 @@ contigs = options.contigs # a list of file paths mates = options.mates # a list of file paths insert_sizes = options.insert_sizes # a list of integers - max_mismatches = options.max_mismatches c_option = options.c_option w_option = options.w_option L_option = options.L_option @@ -58,7 +58,7 @@ logfile = options.logfile s_scaf_path = which('s_scaf_v1.4.6.pl').pop() - print 'Creating temp dir' + print 'Creating temporary directory' wd = tempfile.mkdtemp() try: fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] @@ -67,12 +67,12 @@ mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files - orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file + orientdistinfo = os.path.join(wd, "orientdistinfo_c%d" % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file for i in range(len(mates)): - print "Copying mate %s to %s" % (mates[i], fake_mates[i]) - shutil.copy2(mates[i], fake_mates[i]) + print "Creating symbolic link %s pointing to %s" % (fake_mates[i], mates[i]) + os.symlink(mates[i], fake_mates[i]) log = open(logfile, 'w') if logfile else sys.stdout try: @@ -85,7 +85,7 @@ subprocess.check_call(args=cmd_step2, stdout=log, shell=True) for i in range(len(mate_sopras)): - cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) + cmd_step3 = "bowtie -p %d -v %d -m 1 -f --sam %s %s %s" % (options.num_threads, options.max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there @@ -106,7 +106,7 @@ if log != sys.stdout: log.close() - print 'Moving result file %s to %s' % (scaffolds_file, scaffolds) + print "Moving result file %s to %s" % (scaffolds_file, scaffolds) shutil.move(scaffolds_file, scaffolds) finally: shutil.rmtree(wd)