# HG changeset patch # User crs4 # Date 1382637730 14400 # Node ID 988d5a82291a1750941a2606676c5f3d3c5afa57 Uploaded diff -r 000000000000 -r 988d5a82291a COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Thu Oct 24 14:02:10 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Gianmauro Cuccuru +Nicola Soranzo + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -r 000000000000 -r 988d5a82291a sopra_wpc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sopra_wpc.py Thu Oct 24 14:02:10 2013 -0400 @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" +SOPRA with prebuilt contigs workflow runner +""" + +import optparse +import os +import tempfile +import shutil +import subprocess +import sys + + +# Copyright (c) Twisted Matrix Laboratories. +def which(name, flags=os.X_OK): + """ Search PATH for executable files with the given name. """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, name) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +def __main__(): + parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') + parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') + parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') + parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') + parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie') + parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded') + parser.add_option('-w', dest='w_option', type='int', help='Minimum number of links between two contigs') + parser.add_option('-L', dest='L_option', type='int', help='Minimum length of contigs to be used in scaffold assembly') + parser.add_option('--h_option', dest='h_option', type='float', help='High coverage contigs (above mean coverage + h x std coverage) are not considered in the scaffold assembly mainly to exclude reads from repetitive regions') + parser.add_option('--scaffolds', dest='scaffolds', help='scaffolds fasta file mandatory') + parser.add_option('-l', '--logfile', dest='logfile', help='log file (default=stdout)') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + contigs = options.contigs # a list of file paths + mates = options.mates # a list of file paths + insert_sizes = options.insert_sizes # a list of integers + max_mismatches = options.max_mismatches + c_option = options.c_option + w_option = options.w_option + L_option = options.L_option + h_option = options.h_option + scaffolds = options.scaffolds + logfile = options.logfile + + s_scaf_path = which('s_scaf_v1.4.6.pl').pop() + print 'Creating temp dir' + wd = tempfile.mkdtemp() + try: + fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] + contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file + bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files + mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files + mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format + mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files + orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file + scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file + + for i in range(len(mates)): + print "Copying mate %s to %s" % (mates[i], fake_mates[i]) + shutil.copy2(mates[i], fake_mates[i]) + + log = open(logfile, 'w') if logfile else sys.stdout + try: + cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd) + print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1 + subprocess.check_call(args=cmd_step1, stdout=log, shell=True) + + cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build) + print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2 + subprocess.check_call(args=cmd_step2, stdout=log, shell=True) + + for i in range(len(mate_sopras)): + cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) + print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) + subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there + + cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd) + print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4 + subprocess.check_call(args=cmd_step4, stdout=log, shell=True) + + cmd_step5 = "s_read_parsed_sam_v1.4.6.pl -c %d -a %s" % (c_option, wd) + for i in range(len(mysam_mates_parsed)): + cmd_step5 += " -parsed %s -d %d" % (mysam_mates_parsed[i], insert_sizes[i]) + print "SOPRA with prebuilt contigs (read parsed SAM) command to be executed:\n %s" % cmd_step5 + subprocess.check_call(args=cmd_step5, stdout=log, shell=True) + + cmd_step6 = "perl -X %s -w %d -L %d -h %s -o %s -a %s" % (s_scaf_path, w_option, L_option, h_option, orientdistinfo, wd) # need to call with perl -X because: 1) otherwise some Perl warnings are written on stderr; 2) simply redirecting stderr would hide real errors since it always returns exit status 0 + print "SOPRA with prebuilt contigs (scaffold assembly) command to be executed:\n %s" % cmd_step6 + subprocess.check_call(args=cmd_step6, stdout=log, shell=True) + finally: + if log != sys.stdout: + log.close() + + print 'Moving result file %s to %s' % (scaffolds_file, scaffolds) + shutil.move(scaffolds_file, scaffolds) + finally: + shutil.rmtree(wd) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 988d5a82291a sopra_wpc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sopra_wpc.xml Thu Oct 24 14:02:10 2013 -0400 @@ -0,0 +1,69 @@ + + for Illumina workflow + + sopra + bowtie + + + sopra_wpc.py + #for $cr in $contigs_repeat + --contigs ${cr.contigs_file} + #end for + #for $mr in $mate_repeat + --mate ${mr.mate_file} + -d ${mr.insert_size} + #end for + -v $max_mismatches -c $c_option -w $w_option -L $L_option --h_option $h_option --scaffolds $scaffolds_file --logfile $logfile + + + + + + + + + + + + + + + + + + + + +**What it does** + +SOPRA is an assembly tool for mate pair/paired-end data generated by high-throughput sequencing technologies, e.g. Illumina and SOLiD platforms. + +The input paired-end FASTA file can be obtained with: +FR reads -> *FASTQ interlacer on paired end reads* followed by *FASTQ to FASTA* converter +RF reads -> *Reverse-Complement*, *FASTQ interlacer on paired end reads* followed by *FASTQ to FASTA* converter + +.. class:: infomark + +**TIP:** Try trimming the end of short reads before feeding it to the assembler to remove the error prone bases (e.g. last 10 to 20 bps) and check if it improves the assembly. + +----- + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `SOPRA`_, which is licensed separately. Please cite |Dayarian2010|_. + +.. _SOPRA: http://www.physics.rutgers.edu/~anirvans/SOPRA/ +.. |Dayarian2010| replace:: Dayarian, A., Michael, T. P., Sengupta, A. M. (2010) SOPRA: Scaffolding algorithm for paired reads via statistical optimization. *BMC Bioinformatics* 11, 345 +.. _Dayarian2010: http://www.biomedcentral.com/1471-2105/11/345/ + + diff -r 000000000000 -r 988d5a82291a tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Oct 24 14:02:10 2013 -0400 @@ -0,0 +1,21 @@ + + + + + + + http://www.physics.rutgers.edu/~anirvans/SOPRA/SOPRA_v1.4.6.zip + + source_codes_v1.4.6/SOPRA_with_prebuilt_contigs + $INSTALL_DIR/SOPRA_with_prebuilt_contigs + + chmod 755 $INSTALL_DIR/SOPRA_with_prebuilt_contigs/*.pl + + $INSTALL_DIR/SOPRA_with_prebuilt_contigs + + + + +