# HG changeset patch # User crs4 # Date 1391188101 18000 # Node ID b8c6a38530eb2594aa87787871556e48686e9adc # Parent cd6cc6d767081b4cf7e225d10c7956cb1fe26e88 Support Edena v. 3.131028 (new , official overlapping log file, covStats output file removed, -lph and -sph options instead of -peHorizon). Use $GALAXY_SLOTS instead of $EDENA _SITE_OPTIONS. Directly call edena, remove edena_ovl_wrapper.py and edena_ass_wrapper.py . Discard stderr instead of redirecting to stdout. Do not redirect stdout to logfile. Add readme.rst . diff -r cd6cc6d76708 -r b8c6a38530eb edena_ass_wrapper.py --- a/edena_ass_wrapper.py Fri Oct 18 14:09:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Edena (assembling) -version 0.2.1 (andrea.pinna@crs4.it) -""" - -import optparse -import shutil -import subprocess -import sys - -def __main__(): - # load arguments - print 'Parsing Edena (assembling) input options...' - parser = optparse.OptionParser(description='Edena assembly') - parser.add_option('--ovl_input', dest='ovl_input', help='') - parser.add_option('--overlapCutoff', dest='overlapCutoff', type='int', help='') - parser.add_option('--cc', action="store_true", dest='cc', help='') - parser.add_option('--discardNonUsable', action="store_true", dest='discardNonUsable', help='') - parser.add_option('--minContigSize', dest='minContigSize', type='int', help='') - parser.add_option('--minCoverage', dest='minCoverage', type='float', help='') - parser.add_option('--trim', dest='trim', type='int', help='') - parser.add_option('--peHorizon', dest='peHorizon', type='int', help='') - parser.add_option('--covStats', dest='covStats', help='') - parser.add_option('--out_contigs_cov', dest='out_contigs_cov', help='') - parser.add_option('--out_contigs_fasta', dest='out_contigs_fasta', help='') - parser.add_option('--out_contigs_lay', dest='out_contigs_lay', help='') - parser.add_option('--out_log_txt', dest='out_log_txt', help='') - parser.add_option('--out_nodesInfo', dest='out_nodesInfo', help='') - parser.add_option('--out_nodesPosition', dest='out_nodesPosition', help='') - parser.add_option('--logfile', dest='logfile', help='logfile') - (options, args) = parser.parse_args() - if len(args) > 0: - parser.error('Wrong number of arguments') - - # build Edena (assembling) command to be executed - ovl_input = '-e %s' % (options.ovl_input) - overlapCutoff = '-m %d' % (options.overlapCutoff) if options.overlapCutoff is not None else '' - cc = '-cc yes' if options.cc else '-cc no' - discardNonUsable = '-discardNonUsable yes' if options.discardNonUsable else '-discardNonUsable no' - minContigSize = '-c %d' % (options.minContigSize) if options.minContigSize is not None else '' - minCoverage = '-minCoverage %s' % (options.minCoverage) if options.minCoverage is not None else '' - trim = '-trim %d' % (options.trim) if options.trim is not None else '' - peHorizon = '-peHorizon %d' % (options.peHorizon) if options.peHorizon is not None else '' - covStats = options.covStats - out_contigs_cov = options.out_contigs_cov - out_contigs_fasta = options.out_contigs_fasta - out_contigs_lay = options.out_contigs_lay - out_log_txt = options.out_log_txt - out_nodesInfo = options.out_nodesInfo - out_nodesPosition = options.out_nodesPosition - logfile = options.logfile - - # Build Edena (assembling) command - cmd = 'edena %s %s %s %s %s %s %s %s' % (ovl_input, overlapCutoff, cc, discardNonUsable, minContigSize, minCoverage, trim, peHorizon) - print '\nEdena (assembling) command to be executed:\n %s' % (cmd) - - # Execution of Edena - print 'Executing Edena (assembling)...' - log = open(logfile, 'w') if logfile else sys.stdout - try: - subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because edena writes some logging info there (e.g. "Condensing overlaps graph...") - finally: - if log != sys.stdout: - log.close() - print 'Edena (assembling) executed!' - - shutil.move("covStats", covStats) - shutil.move("out_contigs.cov", out_contigs_cov) - shutil.move("out_contigs.fasta", out_contigs_fasta) - shutil.move("out_contigs.lay", out_contigs_lay) - shutil.move("out_log.txt", out_log_txt) - shutil.move("out_nodesInfo", out_nodesInfo) - shutil.move("out_nodesPosition", out_nodesPosition) - - -if __name__ == "__main__": - __main__() diff -r cd6cc6d76708 -r b8c6a38530eb edena_ass_wrapper.xml --- a/edena_ass_wrapper.xml Fri Oct 18 14:09:11 2013 -0400 +++ b/edena_ass_wrapper.xml Fri Jan 31 12:08:21 2014 -0500 @@ -1,63 +1,67 @@ - + - edena + edena - edena -v - - edena_ass_wrapper.py --ovl_input=$ovl_input + edena | head -n 1 + + edena -e $ovl_input #if str($overlapCutoff) - --overlapCutoff=$overlapCutoff + -m $overlapCutoff #end if #if $cc - --cc + -cc yes + #else + -cc no #end if #if $discardNonUsable - --discardNonUsable + -discardNonUsable yes + #else + -discardNonUsable no #end if #if str($minContigSize) - --minContigSize=$minContigSize + -c $minContigSize #end if #if str($minCoverage) - --minCoverage=$minCoverage + -minCoverage $minCoverage #end if #if str($trim) - --trim=$trim + -trim $trim + #end if + #if str($sph) + -sph $sph #end if - #if str($peHorizon) - --peHorizon=$peHorizon + #if str($lph) + -lph $lph #end if - --covStats=$covStats --out_contigs_cov=$out_contigs_cov --out_contigs_fasta=$out_contigs_fasta --out_contigs_lay=$out_contigs_lay --out_log_txt=$out_log_txt --out_nodesInfo=$out_nodesInfo --out_nodesPosition=$out_nodesPosition - --logfile=$logfile + 2>/dev/null ## need to discard stderr because edena writes some progress info there (e.g. "Condensing overlaps graph...") - + - + - + - - - + + + - - - - - - - - + + + + + + diff -r cd6cc6d76708 -r b8c6a38530eb edena_ovl_wrapper.py --- a/edena_ovl_wrapper.py Fri Oct 18 14:09:11 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Edena (overlapping) -version 0.2.1 (andrea.pinna@crs4.it) -""" - -import optparse -import shutil -import subprocess -import sys - -def __main__(): - # load arguments - print 'Parsing Edena (overlapping) input options...' - parser = optparse.OptionParser() - parser.add_option('--unpaired_input', action='append', dest='unpaired_input', help='') - parser.add_option('--dr_pair_1', action='append', dest='dr_pair_1', help='') - parser.add_option('--dr_pair_2', action='append', dest='dr_pair_2', help='') - parser.add_option('--rd_pair_1', action='append', dest='rd_pair_1', help='') - parser.add_option('--rd_pair_2', action='append', dest='rd_pair_2', help='') - parser.add_option('--nThreads', dest='nThreads', type='int', help='') - parser.add_option('--minOlap', dest='minOlap', type='int', help='') - parser.add_option('--readsTruncation', dest='readsTruncation', type='int', help='') - parser.add_option('--output', dest='output', help='') - parser.add_option('--logfile', dest='logfile', help='') - (options, args) = parser.parse_args() - if len(args) > 0: - parser.error('Wrong number of arguments') - - # build Edena (overlapping) command to be executed - # unpaired input(s) - if options.unpaired_input: - unpaired_input = '-r' - for item in options.unpaired_input: - unpaired_input += ' %s' % (item) - else: - unpaired_input = '' - # direct-reverse paired-end files - if options.dr_pair_1 and options.dr_pair_2: - dr_pairs = '-DRpairs' - for i in range(len(options.dr_pair_1)): - dr_pairs += ' %s %s' % (options.dr_pair_1[i], options.dr_pair_2[i]) - else: - dr_pairs = '' - # reverse-direct paired-end files - if options.rd_pair_1 and options.rd_pair_2: - rd_pairs = '-RDpairs' - for i in range(len(options.rd_pair_1)): - rd_pairs += ' %s %s' % (options.rd_pair_1[i], options.rd_pair_2[i]) - else: - rd_pairs = '' - # nThreads - nThreads = '-nThreads %d' % (options.nThreads) if options.nThreads is not None else '' - # minimum overlap - minOlap = '-M %d' % (options.minOlap) if options.minOlap is not None else '' - # 3' end reads truncation - readsTruncation = '-t %d' % (options.readsTruncation) if options.readsTruncation is not None else '' - # output file(s) - output = options.output - logfile = options.logfile - - # Build Edena (overlapping) command - cmd = 'edena %s %s %s %s %s %s -p galaxy_output' % (unpaired_input, dr_pairs, rd_pairs, nThreads, minOlap, readsTruncation) - print '\nEdena (overlapping) command to be executed:\n %s' % (cmd) - - # Execution of Edena - print 'Executing Edena (overlapping)...' - log = open(logfile, 'w') if logfile else sys.stdout - try: - subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because edena writes some logging info there (e.g. "Computing overlaps >=30...") - finally: - if log != sys.stdout: - log.close() - print 'Edena (overlapping) executed!' - - shutil.move('galaxy_output.ovl', output) - - -if __name__ == "__main__": - __main__() diff -r cd6cc6d76708 -r b8c6a38530eb edena_ovl_wrapper.xml --- a/edena_ovl_wrapper.xml Fri Oct 18 14:09:11 2013 -0400 +++ b/edena_ovl_wrapper.xml Fri Jan 31 12:08:21 2014 -0500 @@ -1,75 +1,75 @@ - + - edena + edena - edena -v - - edena_ovl_wrapper.py - \${EDENA_SITE_OPTIONS:---nThreads 2} + edena | head -n 1 + + edena + -nThreads \${GALAXY_SLOTS:-2} #if $input_selection.input == "unpaired_file" + -r #for $ui in $input_selection.unpaired_input - --unpaired_input=${ui.unpaired_file} + ${ui.unpaired_file} #end for #elif $input_selection.input == "dr_pairs" + -DRpairs #for $dpi in $input_selection.dr_pairs_input - --dr_pair_1=${dpi.dr_pair_1} - --dr_pair_2=${dpi.dr_pair_2} + ${dpi.dr_pair_1} ${dpi.dr_pair_2} #end for #elif $input_selection.input == "rd_pairs" + -RDpairs #for $rpi in $input_selection.rd_pairs_input - --rd_pair_1=${rpi.rd_pair_1} - --rd_pair_2=${rpi.rd_pair_2} + ${rpi.rd_pair_1} ${rpi.rd_pair_2} #end for #end if #if str($minOlap) - --minOlap=$minOlap + -M $minOlap #end if #if str($readsTruncation) - --readsTruncation=$readsTruncation + -t $readsTruncation #end if - --output=$output - --logfile=$logfile + 2>/dev/null ## need to discard stderr because edena writes some progress info there (e.g. "Computing overlaps >=30...") - - + + - + - - - + + + - - - + + + - + - - + + @@ -78,7 +78,9 @@ **What it does** -Edena is an overlaps graph based short reads assembler and is suited to Illumina GA reads. An assembly with Edena is a two step process: overlapping and assembling. +Edena is an overlaps graph based short reads assembler and is suited to Illumina GA reads. This program requires the reads to be all the same length, as Illumina GA reads are. This is due to historical reasons and because it greatly simplifies several computational steps. 454 or Sanger reads are therefore not suited to Edena. If you provide multiple files with different read lengths, Edena will trim the 3’ end of the reads so that the reads are all the same length as the shortest reads in the file. + +An assembly with Edena is a two step process: overlapping and assembling. In the overlapping step, the reads files are provided to the program which computes the transitively reduced overlaps graph. This structure is then stored together with the sequence reads in the overlapping file. diff -r cd6cc6d76708 -r b8c6a38530eb readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Fri Jan 31 12:08:21 2014 -0500 @@ -0,0 +1,29 @@ +Edena wrapper +============= + +Configuration +------------- + +edena_ovl_wrapper tool may be configured to use more than one CPU core by selecting an appropriate destination for this tool in Galaxy job_conf.xml file (see http://wiki.galaxyproject.org/Admin/Config/Jobs and http://wiki.galaxyproject.org/Admin/Config/Performance/Cluster ). + +If you are using Galaxy release_2013.11.04 or later, this tool will automatically use the number of CPU cores allocated by the job runner according to the configuration of the destination selected for this tool. + +If instead you are using an older Galaxy release, you should also add a line + + GALAXY_SLOTS=N; export GALAXY_SLOTS + +(where N is the number of CPU cores allocated by the job runner for this tool) to the file + + /edena/3.131028/crs4/edena//env.sh + +Version history +--------------- + +- Release 2: Support Edena v. 3.131028 (new , official overlapping log file, covStats output file removed, -lph and -sph options instead of -peHorizon). Use $GALAXY_SLOTS instead of $EDENA_SITE_OPTIONS. Directly call edena, remove edena_ovl_wrapper.py and edena_ass_wrapper.py . Discard stderr instead of redirecting to stdout. Do not redirect stdout to logfile. Add readme.rst . +- Release 1: Simplify passing repeated params to Python script. Add more info to help sections. +- Release 0: Initial release in the Tool Shed. + +Development +----------- + +Development is hosted at https://bitbucket.org/crs4/orione-tools . Contributions and bug reports are very welcome! diff -r cd6cc6d76708 -r b8c6a38530eb tool_dependencies.xml --- a/tool_dependencies.xml Fri Oct 18 14:09:11 2013 -0400 +++ b/tool_dependencies.xml Fri Jan 31 12:08:21 2014 -0500 @@ -1,9 +1,9 @@ - + - http://www.genomic.ch/edena/EdenaV3.130110.tar.gz + http://www.genomic.ch/edena/EdenaV3.131028.tar.gz make bin @@ -12,13 +12,10 @@ $INSTALL_DIR/bin - - "--nThreads 2" - -Change the EDENA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of threads to use (--nThreads). +Configuration: Previously (until Release 1), the EDENA_SITE_OPTIONS variable in the installed env.sh file was used to adjust the number of threads to use (--nThreads). This is not used anymore and may be removed.