# HG changeset patch # User crs4 # Date 1378719871 14400 # Node ID 60609a9cef3bec007d88cec09cf5b2e8b7a7c9dd Uploaded diff -r 000000000000 -r 60609a9cef3b COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Andrea Pinna +Nicola Soranzo + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -r 000000000000 -r 60609a9cef3b datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,6 @@ + + + + + + diff -r 000000000000 -r 60609a9cef3b edena_ass_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edena_ass_wrapper.py Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +""" +Edena (assembling) +version 0.2.1 (andrea.pinna@crs4.it) +""" + +import optparse +import shutil +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Edena (assembling) input options...' + parser = optparse.OptionParser(description='Edena assembly') + parser.add_option('--ovl_input', dest='ovl_input', help='') + parser.add_option('--overlapCutoff', dest='overlapCutoff', type='int', help='') + parser.add_option('--cc', action="store_true", dest='cc', help='') + parser.add_option('--discardNonUsable', action="store_true", dest='discardNonUsable', help='') + parser.add_option('--minContigSize', dest='minContigSize', type='int', help='') + parser.add_option('--minCoverage', dest='minCoverage', type='float', help='') + parser.add_option('--trim', dest='trim', type='int', help='') + parser.add_option('--peHorizon', dest='peHorizon', type='int', help='') + parser.add_option('--covStats', dest='covStats', help='') + parser.add_option('--out_contigs_cov', dest='out_contigs_cov', help='') + parser.add_option('--out_contigs_fasta', dest='out_contigs_fasta', help='') + parser.add_option('--out_contigs_lay', dest='out_contigs_lay', help='') + parser.add_option('--out_log_txt', dest='out_log_txt', help='') + parser.add_option('--out_nodesInfo', dest='out_nodesInfo', help='') + parser.add_option('--out_nodesPosition', dest='out_nodesPosition', help='') + parser.add_option('--logfile', dest='logfile', help='logfile') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Edena (assembling) command to be executed + ovl_input = '-e %s' % (options.ovl_input) + if options.overlapCutoff is not None: + overlapCutoff = '-m %d' % (options.overlapCutoff) + else: + overlapCutoff = '' + if options.cc: + cc = '-cc yes' + else: + cc = '-cc no' + if options.discardNonUsable: + discardNonUsable = '-discardNonUsable yes' + else: + discardNonUsable = '-discardNonUsable no' + if options.minContigSize is not None: + minContigSize = '-c %d' % (options.minContigSize) + else: + minContigSize = '' + if options.minCoverage is not None: + minCoverage = '-minCoverage %s' % (options.minCoverage) + else: + minCoverage = '' + if options.trim is not None: + trim = '-trim %d' % (options.trim) + else: + trim = '' + if options.peHorizon is not None: + peHorizon = '-peHorizon %d' % (options.peHorizon) + else: + peHorizon = '' + covStats = options.covStats + out_contigs_cov = options.out_contigs_cov + out_contigs_fasta = options.out_contigs_fasta + out_contigs_lay = options.out_contigs_lay + out_log_txt = options.out_log_txt + out_nodesInfo = options.out_nodesInfo + out_nodesPosition = options.out_nodesPosition + logfile = options.logfile + + # Build Edena (assembling) command + cmd1 = '%s %s %s %s %s %s %s %s' % (ovl_input, overlapCutoff, cc, discardNonUsable, minContigSize, minCoverage, trim, peHorizon) + cmd2 = 'edena %s' % ( cmd1 ) + print '\nEdena (assembling) command to be executed: \n %s' % ( cmd2 ) + + # Execution of Edena + print 'Executing Edena (assembling)...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd2, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because edena writes some logging info there (e.g. "Condensing overlaps graph...") + finally: + if log != sys.stdout: + log.close() + print 'Edena (assembling) executed!' + + shutil.move("covStats", covStats) + shutil.move("out_contigs.cov", out_contigs_cov) + shutil.move("out_contigs.fasta", out_contigs_fasta) + shutil.move("out_contigs.lay", out_contigs_lay) + shutil.move("out_log.txt", out_log_txt) + shutil.move("out_nodesInfo", out_nodesInfo) + shutil.move("out_nodesPosition", out_nodesPosition) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 60609a9cef3b edena_ass_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edena_ass_wrapper.xml Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,89 @@ + + + + edena + + edena -v + + edena_ass_wrapper.py --ovl_input=$ovl_input + #if str($overlapCutoff) + --overlapCutoff=$overlapCutoff + #end if + #if $cc + --cc + #end if + #if $discardNonUsable + --discardNonUsable + #end if + #if str($minContigSize) + --minContigSize=$minContigSize + #end if + #if str($minCoverage) + --minCoverage=$minCoverage + #end if + #if str($trim) + --trim=$trim + #end if + #if str($peHorizon) + --peHorizon=$peHorizon + #end if + --covStats=$covStats --out_contigs_cov=$out_contigs_cov --out_contigs_fasta=$out_contigs_fasta --out_contigs_lay=$out_contigs_lay --out_log_txt=$out_log_txt --out_nodesInfo=$out_nodesInfo --out_nodesPosition=$out_nodesPosition + --logfile=$logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +The key parameter for this mode is the overlaps size cutoff (option –m). By default it is set to half of the reads length, which is quite conservative. If your sequencing project is well covered (>50-100x) you may try increasing a bit this value. The minCoverage is an important parameter which is automatically determined. You may check this value in the program output and possibly override it. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Edena`_, which is licensed separately. Please cite |Hernandez2008|_. + +.. _Edena: http://www.genomic.ch/edena.php +.. |Hernandez2008| replace:: Hernandez, D., *et al.* (2008) De novo bacterial genome sequencing: Millions of very short reads assembled on a desktop computer. *Genome Res.* 18(5), 802-809 +.. _Hernandez2008: http://genome.cshlp.org/content/18/5/802 + + diff -r 000000000000 -r 60609a9cef3b edena_ovl_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edena_ovl_wrapper.py Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +""" +Edena (overlapping) +version 0.2.1 (andrea.pinna@crs4.it) +""" + +import optparse +import shutil +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Edena (overlapping) input options...' + parser = optparse.OptionParser() + parser.add_option('--unpaired_input', dest='unpaired_input', help='') + parser.add_option('--dr_pair_1', dest='dr_pair_1', help='') + parser.add_option('--dr_pair_2', dest='dr_pair_2', help='') + parser.add_option('--rd_pair_1', dest='rd_pair_1', help='') + parser.add_option('--rd_pair_2', dest='rd_pair_2', help='') + parser.add_option('--nThreads', dest='nThreads', type='int', help='') + parser.add_option('--minOlap', dest='minOlap', type='int', help='') + parser.add_option('--readsTruncation', dest='readsTruncation', type='int', help='') + parser.add_option('--output', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Edena (overlapping) command to be executed + # unpaired input(s) + if options.unpaired_input: + unpaired_inputs = options.unpaired_input.split('+')[0:-1] + unpaired_input = '-r' + for item in unpaired_inputs: + unpaired_input += ' %s' % (item) + else: + unpaired_input = '' + # direct-reverse paired-end files + if options.dr_pair_1 and options.dr_pair_2: + dr_pairs_1 = options.dr_pair_1.split('+')[0:-1] + dr_pairs_2 = options.dr_pair_2.split('+')[0:-1] + dr_pairs = '-DRpairs' + for i in xrange(len(dr_pairs_1)): + dr_pairs += ' %s %s' % (dr_pairs_1[i], dr_pairs_2[i]) + else: + dr_pairs = '' + # reverse-direct paired-end files + if options.rd_pair_1 and options.rd_pair_2: + rd_pairs_1 = options.rd_pair_1.split('+')[0:-1] + rd_pairs_2 = options.rd_pair_2.split('+')[0:-1] + rd_pairs = '-RDpairs' + for i in xrange(len(rd_pairs_1)): + rd_pairs += ' %s %s' % (rd_pairs_1[i], rd_pairs_2[i]) + else: + rd_pairs = '' + # nThreads + if options.nThreads is not None: + nThreads = '-nThreads %d' % (options.nThreads) + else: + nThreads = '' + # minimum overlap + if options.minOlap is not None: + minOlap = '-M %d' % (options.minOlap) + else: + minOlap = '' + # 3' end reads truncation + if options.readsTruncation is not None: + readsTruncation = '-t %d' % (options.readsTruncation) + else: + readsTruncation = '' + # output file(s) + output = options.output + logfile = options.logfile + + # Build Edena (overlapping) command + cmd = 'edena %s %s %s %s %s %s -p galaxy_output' % (unpaired_input, dr_pairs, rd_pairs, nThreads, minOlap, readsTruncation) + print '\nEdena (overlapping) command to be executed: \n %s' % ( cmd ) + + # Execution of Edena + print 'Executing Edena (overlapping)...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because edena writes some logging info there (e.g. "Computing overlaps >=30...") + finally: + if log != sys.stdout: + log.close() + print 'Edena (overlapping) executed!' + + shutil.move( "galaxy_output.ovl", output) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 60609a9cef3b edena_ovl_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/edena_ovl_wrapper.xml Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,127 @@ + + + + edena + + edena -v + + edena_ovl_wrapper.py + \${EDENA_SITE_OPTIONS:---nThreads 2} + #if $input_selection.input == "unpaired_file" + #for $i, $unpaired_file in enumerate( $input_selection.unpaired_input ): + #if $i == 0 + #echo "--unpaired_input=" + #end if + #echo $unpaired_file.unpaired_file + #echo '+' + #end for + #elif $input_selection.input == "dr_pairs" + #for $i, $dr_pair_1 in enumerate( $input_selection.dr_pairs_input ): + #if $i == 0 + #echo "--dr_pair_1=" + #end if + #echo $dr_pair_1.dr_pair_1 + #echo '+' + #end for + #echo ' ' + #for $i, $dr_pair_2 in enumerate( $input_selection.dr_pairs_input ): + #if $i == 0 + #echo "--dr_pair_2=" + #end if + #echo $dr_pair_2.dr_pair_2 + #echo '+' + #end for + #elif $input_selection.input == "rd_pairs" + #for $i, $rd_pair_1 in enumerate( $input_selection.rd_pairs_input ): + #if $i == 0 + #echo "--rd_pair_1=" + #end if + #echo $rd_pair_1.rd_pair_1 + #echo '+' + #end for + #echo ' ' + #for $i, $rd_pair_2 in enumerate( $input_selection.rd_pairs_input ): + #if $i == 0 + #echo "--rd_pair_2=" + #end if + #echo $rd_pair_2.rd_pair_2 + #echo '+' + #end for + #end if + #if str($minOlap) + --minOlap=$minOlap + #end if + #if str($readsTruncation) + --readsTruncation=$readsTruncation + #end if + --output=$output + --logfile=$logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Edena can accept both unpaired and paired files, FASTQ and FASTA format. Note that for technical reasons, all reads are required to be of the same length. You can however provide the program with different files containing different reads length. In such case, Edena will trim the 3’ ends of the longer reads so that they fit the shorter length. It is however required that reads within each individual file are of the same length (as Illumina GA reads are). By default all overlaps with a minimum size corresponding to half of the reads length are computed. This is quite conservative. Provided enough coverage, this value can be increased (option -M) to reduce the memory requirements. For reads longer than 100bp, you may consider the reads truncation option, which could help in discarding 3’ base calling errors. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Edena`_, which is licensed separately. Please cite |Hernandez2008|_. + +.. _Edena: http://www.genomic.ch/edena.php +.. |Hernandez2008| replace:: Hernandez, D., *et al.* (2008) De novo bacterial genome sequencing: Millions of very short reads assembled on a desktop computer. *Genome Res.* 18(5), 802-809 +.. _Hernandez2008: http://genome.cshlp.org/content/18/5/802 + + diff -r 000000000000 -r 60609a9cef3b tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Sep 09 05:44:31 2013 -0400 @@ -0,0 +1,24 @@ + + + + + + http://www.genomic.ch/edena/EdenaV3.130110.tar.gz + make + + bin + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + + + "--nThreads 2" + + + + +Change the EDENA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of threads to use (--nThreads). + + +