Mercurial > repos > crs4 > glimmer
changeset 0:9c8ffce71f7c draft default tip
Uploaded
author | crs4 |
---|---|
date | Mon, 09 Sep 2013 12:16:17 -0400 |
parents | |
children | |
files | COPYING Galaxy-Workflow-Glimmer.ga anomaly_wrapper.py anomaly_wrapper.xml build-icm_wrapper.py build-icm_wrapper.xml datatypes_conf.xml extract_wrapper.py extract_wrapper.xml glimmer3_wrapper.py glimmer3_wrapper.xml long-orfs_wrapper.py long-orfs_wrapper.xml tool_dependencies.xml |
diffstat | 14 files changed, 1220 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Andrea Pinna <andrea.pinna@crs4.it> +Nicola Soranzo <nicola.soranzo@crs4.it> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy-Workflow-Glimmer.ga Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,175 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "glimmer-from-scratch", + "format-version": "0.1", + "name": "Glimmer", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Genome sequence" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 158, + "top": 200 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Genome sequence\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "loSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Long-ORFs", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "loOutput", + "type": "glimmer_coords" + } + ], + "position": { + "left": 248, + "top": 336 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "long-orfs_wrapper", + "tool_state": "{\"loFixed\": \"\\\"False\\\"\", \"__page__\": 0, \"loMaxOverlap\": \"\\\"\\\"\", \"loMinLen\": \"\\\"\\\"\", \"loStartCodons\": \"\\\"\\\"\", \"loTransTable\": \"\\\"\\\"\", \"loNoHeader\": \"\\\"False\\\"\", \"loLengthOpt\": \"\\\"False\\\"\", \"loCutoff\": \"\\\"\\\"\", \"loStopCodons\": \"\\\"\\\"\", \"loLinear\": \"\\\"False\\\"\", \"loWithoutStops\": \"\\\"False\\\"\", \"loEntropy\": \"null\", \"loSequence\": \"null\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "exCoords": { + "id": 1, + "output_name": "loOutput" + }, + "exSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Extract", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "exOutput", + "type": "fasta" + } + ], + "position": { + "left": 659, + "top": 231 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extract_wrapper", + "tool_state": "{\"__page__\": 0, \"exSequence\": \"null\", \"exCoords\": \"null\", \"exDir\": \"\\\"False\\\"\", \"exNoStop\": \"\\\"False\\\"\", \"exNoStart\": \"\\\"False\\\"\", \"exNoWrap\": \"\\\"False\\\"\", \"exMinLen\": \"\\\"\\\"\", \"ex2Fields\": \"\\\"False\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "biSequence": { + "id": 2, + "output_name": "exOutput" + } + }, + "inputs": [], + "name": "Build-ICM", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "biIcm", + "type": "glimmer_icm" + } + ], + "position": { + "left": 177, + "top": 715 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "build-icm_wrapper", + "tool_state": "{\"__page__\": 0, \"biStopCodons\": \"\\\"\\\"\", \"biPeriod\": \"\\\"3\\\"\", \"biTransTable\": \"\\\"\\\"\", \"biNoStops\": \"\\\"False\\\"\", \"biDepth\": \"\\\"7\\\"\", \"biReverse\": \"\\\"False\\\"\", \"biSequence\": \"null\", \"biWidth\": \"\\\"12\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "glIcm": { + "id": 3, + "output_name": "biIcm" + }, + "glSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Glimmer3", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "glDetail", + "type": "txt" + }, + { + "name": "glPredict", + "type": "txt" + } + ], + "position": { + "left": 508, + "top": 715 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "glimmer3_wrapper", + "tool_state": "{\"__page__\": 0, \"glFirstCodon\": \"\\\"False\\\"\", \"glNoIndep\": \"\\\"False\\\"\", \"glIcm\": \"null\", \"glRbsPwm\": \"null\", \"glEntropy\": \"null\", \"glSeparateGenes\": \"\\\"False\\\"\", \"glOrfCoords\": \"null\", \"glMaxOverlap\": \"\\\"\\\"\", \"glIgnoreScoreLen\": \"\\\"\\\"\", \"glGcPercent\": \"\\\"\\\"\", \"glSequence\": \"null\", \"glGeneLen\": \"\\\"\\\"\", \"glStopCodons\": \"\\\"\\\"\", \"glExtend\": \"\\\"False\\\"\", \"glLinear\": \"\\\"False\\\"\", \"glTransTable\": \"\\\"\\\"\", \"glThreshold\": \"\\\"\\\"\", \"glStartCodons\": \"\\\"\\\"\", \"glStartProbs\": \"\\\"\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + } + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anomaly_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> anomaly +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Anomaly input options...' + parser = optparse.OptionParser() + parser.add_option('--anSequence', dest='sequence', help='') + parser.add_option('--anCoords', dest='coords', help='') + parser.add_option('--anCheckFirstCodon', action='store_true', dest='check_first_codon', help='') + parser.add_option('--anCheckStopCodon', action='store_true', dest='check_stop_codon', help='') + parser.add_option('--anStartCodons', dest='start_codons', help='') + parser.add_option('--anStopCodons', dest='stop_codons', help='') + parser.add_option('--anOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Anomaly command to be executed + # sequence file + sequence = options.sequence + coords = options.coords + if options.start_codons: + start_codons = '-A %s' % (options.start_codons) + else: + start_codons = '' + if options.stop_codons: + stop_codons = '-Z %s' % (options.stop_codons) + else: + stop_codons = '' + if options.check_first_codon: + check_first_codon = '-s' + else: + check_first_codon = '' + if options.check_stop_codon: + check_stop_codon = '-t' + else: + check_stop_codon = '' + output = options.output + logfile = options.logfile + + # Build Anomaly command + cmd = 'anomaly %s %s %s %s %s %s > %s' % (start_codons, check_first_codon, check_stop_codon, stop_codons, sequence, coords, output) + print '\nAnomaly command to be executed: \n %s' % (cmd) + + print 'Executing Anomaly...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Anomaly executed!' + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anomaly_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,68 @@ +<tool id="anomaly_wrapper" name="Anomaly" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02">glimmer</requirement> + </requirements> + <command interpreter="python"> + anomaly_wrapper.py --anSequence $anSequence --anCoords $anCoords + #if $anStartCodons + --anStartCodons="$anStartCodons" + #end if + #if $anCheckFirstCodon + --anCheckFirstCodon + #end if + #if $anCheckStopCodon + --anCheckStopCodon + #end if + #if $anStopCodons + --anStopCodons="$anStopCodons" + #end if + --anOutput $anOutput --logfile $logfile + </command> + + <inputs> + <param name="anSequence" type="data" format="fasta" label="DNA sequence to be analyzed" help="FASTA format" /> + + <param name="anCoords" type="data" format="glimmer_coords" label="Region coordinates" /> + + <param name="anStartCodons" type="text" value="" optional="true" label="Specify allowable start codons as a comma-separated list (-A)" help="Sample format: 'atg,gtg' . The default start codons are atg, gtg and ttg." /> + + <param name="anCheckFirstCodon" type="boolean" checked="false" label="Omit the check that the first codon is a start codon (-s)" help="" /> + + <param name="anCheckStopCodon" type="boolean" checked="false" label="Check whether the codon preceding the start coordinate position is a stop codon (-t)" help="This is useful if the coordinates represent the entire region between stop codons." /> + + <param name="anStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z)" help="Sample format: 'tag,tga'. The default stop codons are tag, tga and taa." /> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="anOutput" format="txt" label="${tool.name} on ${on_string}: output" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +Read DNA sequence in "sequence-file" and for each region specified by the coordinates in "coord-file", check whether the region represents a normal gene, i.e., it begins with a start codon, ends with a stop codon, and has no frame shifts. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-icm_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> build-icm +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Build-ICM input options...' + parser = optparse.OptionParser() + parser.add_option('--biSequence', dest='sequence', help='') + parser.add_option('--biDepth', dest='depth', type='int', help='') + parser.add_option('--biNoStops', action='store_true', dest='no_stops', help='') + parser.add_option('--biPeriod', dest='period', type='int', help='') + parser.add_option('--biReverse', action='store_true', dest='reverse', help='') + parser.add_option('--biWidth', dest='width', type='int', help='') + parser.add_option('--biTransTable', dest='trans_table', type='int', help='') + parser.add_option('--biStopCodons', dest='stop_codons', help='') + parser.add_option('--biIcm', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Build-ICM command to be executed + sequence = options.sequence + if options.depth is not None: + depth = '--depth %d' % (options.depth) + else: + depth = '' + if options.no_stops: + no_stops = '--no_stops' + else: + no_stops = '' + if options.period is not None: + period = '--period %d' % (options.period) + else: + period = '' + if options.reverse: + reverse = '--reverse' + else: + reverse = '' + if options.width is not None: + width = '--width %d' % (options.width) + else: + width = '' + if options.trans_table is not None: + trans_table = '--trans_table %d' % (options.trans_table) + else: + trans_table = '' + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + output = options.output + logfile = options.logfile + + # Build Build-ICM command + cmd = 'build-icm %s %s %s %s %s %s %s %s < %s ' % (depth, no_stops, period, reverse, width, trans_table, stop_codons, output, sequence) + print '\nBuild-ICM command to be executed: \n %s' % (cmd) + + print 'Executing Build-ICM...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Build-ICM executed!' + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-icm_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,92 @@ +<tool id="build_icm_wrapper" name="Build-ICM" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02">glimmer</requirement> + </requirements> + <command interpreter="python"> + build-icm_wrapper.py --biSequence $biSequence + #if str($biDepth) + --biDepth=$biDepth + #end if + #if $biNoStops + --biNoStops + #end if + #if str($biPeriod) + --biPeriod=$biPeriod + #end if + #if $biReverse + --biReverse + #end if + #if str($biWidth) + --biWidth=$biWidth + #end if + #if str($biTransTable) + --biTransTable=$biTransTable + #end if + #if $biStopCodons + --biStopCodons="$biStopCodons" + #end if + --biIcm $biIcm --logfile $logfile + </command> + + <inputs> + <param name="biSequence" type="data" format="fasta" label="DNA sequences to be analyzed" help="FASTA format" /> + + <param name="biDepth" type="integer" value="7" optional="true" label="Depth of the ICM (-d, --depth)" help="The depth is the maximum number of positions in the context window that will be used to determine the probability of the predicted position. The default value is 7." /> + + <param name="biNoStops" type="boolean" checked="false" label="Do not use any input strings with in-frame stop codons (-F, --no_stops)" help="" /> + + <param name="biPeriod" type="integer" value="3" optional="true" label="Period of the ICM (-p, --period)" help="The period is the number of different submodels for different positions in the text in a cyclic pattern. E.g., if the period is 3, the first submodel will determine positions 1, 4, 7, etc; the second submodel will determine positions 2, 5, 8, etc; and the third submodel will determine positions 3, 6, 9, etc. For a non-periodic model, use a value of 1. The default value is 3." /> + + <param name="biReverse" type="boolean" checked="false" label="Use the reverse of the input strings to build the ICM (-r, --reverse)" help="Note that this is merely the reverse and NOT the reverse-complement. In other words, the model is built in the backwards direction." /> + +<!-- + <param name="biText" type="boolean" optional="true" truevalue="true" falsevalue="false" checked="False" label="Output the model in a text format (-t, - -text)" help="This is for informational/debugging purposes only - the glimmer3 program cannot read models in this form." /> + + <param name="biVerbose" type="integer" value="" optional="true" label="Verbose level (-v, - -verbose)" help="This controls extra debugging output: the higher the value the more output." /> +--> + + <param name="biWidth" type="integer" value="12" optional="true" label="Width of the ICM (-w, --width)" help="The width includes the predicted position. The default value is 12." /> + + <param name="biTransTable" type="integer" value="" optional="true" label="Use GenBank translation table number n to specify stop codons (-z, --trans_table)" help="" /> + + <param name="biStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z, --stop_codons)" help="Sample format: 'tag,tga,taa'. The default stop codons are tag, tga and taa." /> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="biIcm" format="glimmer_icm" label="${tool.name} on ${on_string}: ICM" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +A probability model of coding sequences, called an interpolated context model or ICM, must be built. This is done by the program *build-icm* from a set of training sequences. These sequences can be obtained in several ways: + +1) From known genes in the genome, e.g. genes identified by homology searches. +2) From long, non-overlapping ORFs in the genome as produced by the program *long-orfs*. +3) From genes in a highly similar species/strain. + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<datatypes> + <registration> + <datatype extension="glimmer_entropy_profiles" type="galaxy.datatypes.data:Text" mimetype="text/plain" subclass="True" display_in_upload="true" /> + <datatype extension="glimmer_pwm" type="galaxy.datatypes.data:Text" mimetype="text/plain" subclass="True" display_in_upload="true" /> + <datatype extension="glimmer_coords" type="galaxy.datatypes.data:Text" mimetype="text/plain" subclass="True" display_in_upload="true" /> + <datatype extension="glimmer_icm" type="galaxy.datatypes.binary:Binary" mimetype="application/octet-stream" subclass="True" display_in_upload="true" /> + </registration> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> extract +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Extract input options...' + parser = optparse.OptionParser() + parser.add_option('--exSequence', dest='sequence', help='') + parser.add_option('--exCoords', dest='coords', help='') + parser.add_option('--ex2Fields', action='store_true', dest='twofields', help='') + parser.add_option('--exDir', action='store_true', dest='dir', help='') + parser.add_option('--exMinLen', dest='minlen', type='int', help='') + parser.add_option('--exNoStart', action='store_true', dest='nostart', help='') + parser.add_option('--exNoStop', action='store_true', dest='nostop', help='') + parser.add_option('--exNoWrap', action='store_true', dest='nowrap', help='') + parser.add_option('--exOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Extract command to be executed + sequence = options.sequence + coords = options.coords + if options.twofields: + twofields = '--2_fields' + else: + twofields = '' + if options.dir: + direct = '--dir' + else: + direct = '' + if options.minlen is not None: + minlen = '--minlen %d' % (options.minlen) + else: + minlen = '' + if options.nostart: + nostart = '--nostart' + else: + nostart = '' + if options.nostop: + nostop = '--nostop' + else: + nostop = '' + if options.nowrap: + nowrap = '--nowrap' + else: + nowrap = '' + output = options.output + logfile = options.logfile + + # Build Extract command + cmd = 'extract %s %s %s %s %s %s %s %s > %s' % (twofields, direct, minlen, nostart, nostop, nowrap, sequence, coords, output) + print '\nExtract command to be executed: \n %s' % (cmd) + + print 'Executing Extract...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Extract executed!' + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,80 @@ +<tool id="extract_wrapper" name="Extract" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02">glimmer</requirement> + </requirements> + <command interpreter="python"> + extract_wrapper.py --exSequence $exSequence --exCoords $exCoords + #if $ex2Fields + --ex2Fields + #end if + #if $exDir + --exDir + #end if + #if str($exMinLen) + --exMinLen=$exMinLen + #end if + #if $exNoStart + --exNoStart + #end if + #if $exNoStop + --exNoStop + #end if + #if $exNoWrap + --exNoWrap + #end if + --exOutput $exOutput --logfile $logfile + </command> + + <inputs> + <param name="exSequence" type="data" format="fasta" label="DNA sequence to be analyzed" help="FASTA format" /> + + <param name="exCoords" type="data" format="glimmer_coords" label="Coordinates produced by Long-ORFs" /> + + <param name="ex2Fields" type="boolean" checked="false" label="Output sequence in 2 fields (-2, --2_fields)" help="Output each sequence as 2 fields (tag and sequence) on a single line." /> + + <param name="exDir" type="boolean" checked="false" label="Specify sequence direction (-d, --dir)" help="Use the 4th column of each input line to specify the direction of the sequence. Positive is forward, negative is reverse. The input sequence is assumed to be circular." /> + + <param name="exMinLen" type="integer" value="" optional="true" label="Minimum length of sequence (-l, --minlen)" help="Don't output any sequence shorter than n characters." /> + + <param name="exNoStart" type="boolean" checked="false" label="Omit first characters (-s, --nostart)" help="Omit the first 3 characters of each output string." /> + + <param name="exNoStop" type="boolean" checked="false" label="Omit last characters (-t, --nostop)" help="Omit the last 3 characters of each output string." /> + + <param name="exNoWrap" type="boolean" checked="false" label="No wraparound (-w, --nowrap)" help="Use the actual input coordinates without any wraparound that would be needed by a circular genome. Without this option, the output sequence is the shorter of the two ways around the circle. Tick 'Specify sequence direction' to specify direction explicitly." /> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="exOutput" format="fasta" label="${tool.name} on ${on_string}: output" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +Read FASTA-format *sequence-file* and extract from it the subsequences specified by *coords*. By default, *coords* is the name of a file containing lines of the form *tag* *start* *stop* [*frame*] ... +Coordinates are inclusive counting from 1, e.g. *1 3* represents the 1st 3 characters of the sequence. For each line the corresponding region of *sequence-file* is extracted and output (after reverse-complementing if necessary). + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer3_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> glimmer3 +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import shutil +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing input options...' + parser = optparse.OptionParser() + parser.add_option('--glSequence', dest='sequence', help='') + parser.add_option('--glIcm', dest='icm', help='') + #parser.add_option('--glPrefix', dest='prefix', help='') + parser.add_option('--glStartCodons', dest='start_codons', help='') + parser.add_option('--glRbsPwm', dest='rbs_pwm', help='') + parser.add_option('--glGcPercent', dest='gc_percent', type='float', help='') + parser.add_option('--glEntropy', dest='entropy', help='') + parser.add_option('--glFirstCodon', action='store_true', dest='first_codon', help='') + parser.add_option('--glGeneLen', dest='gene_len', type='int', help='') + parser.add_option('--glIgnore', dest='ignore', help='') + parser.add_option('--glLinear', action='store_true', dest='linear', help='') + parser.add_option('--glOrfCoords', dest='orf_coords', help='') + parser.add_option('--glSeparateGenes', action='store_true', dest='separate_genes', help='') + parser.add_option('--glMaxOverlap', dest='max_olap', type='int', help='') + parser.add_option('--glStartProbs', dest='start_probs', help='') + parser.add_option('--glIgnoreScoreLen', dest='ignore_score_len', type='int', help='') + parser.add_option('--glNoIndep', action='store_true', dest='no_indep', help='') + parser.add_option('--glThreshold', dest='threshold', type='int', help='') + parser.add_option('--glExtend', action='store_true', dest='extend', help='') + parser.add_option('--glTransTable', dest='trans_table', type='int', help='') + parser.add_option('--glStopCodons', dest='stop_codons', help='') + parser.add_option('--glDetail', dest='detail', help='') + parser.add_option('--glPredict', dest='predict', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Glimmer3 command to be executed + # sequence file + sequence = options.sequence + # icm file + icm = options.icm + # prefix (not needed) + prefix = 'prefix' + # start codons + if options.start_codons: + start_codons = '--start_codons %s' % (options.start_codons) + else: + start_codons = '' + # rbs_pwm + if options.rbs_pwm: + rbs_pwm = '--rbs_pwm %s' % (options.rbs_pwm) + else: + rbs_pwm = '' + # gc percentage + if options.gc_percent is not None: + gc_percent = '--gc_percent %s' % (options.gc_percent) + else: + gc_percent = '' + # entropy + if options.entropy: + entropy = "--entropy '%s'" % (options.entropy) + else: + entropy = '' + # first_codon + if options.first_codon: + first_codon = '--first_codon' + else: + first_codon = '' + # gene length + if options.gene_len is not None: + gene_len = '--gene_len %d' % (options.gene_len) + else: + gene_len = '' + # ignore + if options.ignore: + ignore = '--ignore %s' % (options.ignore) + else: + ignore = '' + # linear + if options.linear: + linear = '--linear' + else: + linear = '' + # orf_coords + if options.orf_coords: + orf_coords = '--orf_coords %s' % (options.orf_coords) + else: + orf_coords = '' + # separate genes + if options.separate_genes: + separate_genes = '--separate_genes' + else: + separate_genes = '' + # max overlap + if options.max_olap is not None: + max_olap = '--max_olap %d' % (options.max_olap) + else: + max_olap = '' + # start probs + if options.start_probs: + start_probs = '--start_probs %s' % (options.start_probs) + else: + start_probs = '' + # ignore score length + if options.ignore_score_len is not None: + ignore_score_len = '--ignore_score_len %d' % (options.ignore_score_len) + else: + ignore_score_len = '' + # no indep + if options.no_indep: + no_indep = '--no_indep' + else: + no_indep = '' + # threshold + if options.threshold is not None: + threshold = '--threshold %d' % (options.threshold) + else: + threshold = '' + # extend + if options.extend: + extend = '--extend' + else: + extend = '' + # trans table + if options.trans_table is not None: + trans_table = '--trans_table %d' % (options.trans_table) + else: + trans_table = '' + # stop codons + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + logfile = options.logfile + + # Build Glimmer3 command + cmd = 'glimmer3 %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (start_codons, rbs_pwm, gc_percent, entropy, first_codon, gene_len, ignore, linear, orf_coords, separate_genes, max_olap, start_probs, ignore_score_len, no_indep, threshold, extend, trans_table, stop_codons, sequence, icm, prefix) + print '\nGlimmer3 command to be executed: \n %s' % (cmd) + + print 'Executing Glimmer3...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Glimmer3 executed!' + + shutil.move(prefix + ".detail", options.detail) + shutil.move(prefix + ".predict", options.predict) + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer3_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,147 @@ +<tool id="glimmer3_wrapper" name="Glimmer3" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02">glimmer</requirement> + </requirements> + <command interpreter="python"> + glimmer3_wrapper.py --glSequence $glSequence --glIcm $glIcm + #if $glStartCodons + --glStartCodons="$glStartCodons" + #end if + #if $glRbsPwm + --glRbsPwm=$glRbsPwm + #end if + #if str($glGcPercent) + --glGcPercent=$glGcPercent + #end if + #if $glEntropy.glEntropy_select == 'default' + --glEntropy='#' + #else if $glEntropy.glEntropy_select == 'fromfile' + --glEntropy=${glEntropy.glEntropyFile} + #end if + #if $glFirstCodon + --glFirstCodon + #end if + #if str($glGeneLen) + --glGeneLen=$glGeneLen + #end if + #if $glLinear + --glLinear + #end if + #if $glOrfCoords + --glOrfCoords=$glOrfCoords + #end if + #if $glSeparateGenes + --glSeparateGenes + #end if + #if str($glMaxOverlap) + --glMaxOverlap=$glMaxOverlap + #end if + #if $glStartProbs + --glStartProbs="$glStartProbs" + #end if + #if str($glIgnoreScoreLen) + --glIgnoreScoreLen=$glIgnoreScoreLen + #end if + #if $glNoIndep + --glNoIndep + #end if + #if str($glThreshold) + --glThreshold=$glThreshold + #end if + #if $glExtend + --glExtend + #end if + #if str($glTransTable) + --glTransTable=$glTransTable + #end if + #if $glStopCodons + --glStopCodons="$glStopCodons" + #end if + --glDetail $glDetail --glPredict $glPredict --logfile $logfile + </command> + + <inputs> + <param name="glSequence" type="data" format="fasta" label="DNA sequences to be analyzed" help="FASTA format" /> + + <param name="glIcm" type="data" format="glimmer_icm" label="ICM model produced by Build-ICM" /> + + <param name="glStartCodons" type="text" value="" optional="true" label="Specify allowable start codons as a comma-separated list (-A, --start_codons)" help="Sample format: 'atg,gtg' . The default start codons are atg, gtg and ttg. Use the 'GC percentage' option to specify the relative proportions of use, elsethe the proportions will be equal." /> + + <param name="glRbsPwm" type="data" format="glimmer_pwm" optional="true" label="Position weight matrix representing the ribosome binding site for genes (-b, --rbs_pwm)" help="Used to improve the accuracy of start site predictions." /> + + <param name="glGcPercent" type="float" value="" optional="true" label="GC percentage of the independent model, e.g. 45.2 (-C, --gc_percent)" help="If this option is not specified, the GC percentage will be counted from the input file." /> + + <conditional name="glEntropy"> + <param name="glEntropy_select" type="select" label="Use entropy profiles (-E, --entropy)"> + <option value="no">No</option> + <option value="default">Use default entropy profiles, constructed from a wide range of species (-E #)</option> + <option value="fromfile">Use entropy profiles from history</option> + </param> + <when value="no" /> + <when value="default" /> + <when value="fromfile"> + <param name="glEntropyFile" type="data" format="glimmer_entropy_profiles" label="Entropy profiles" help="As generated by Glimmer entropy-profile" /> + </when> + </conditional> + + <param name="glFirstCodon" type="boolean" checked="false" label="Use the first possible codon in an ORF as the start codon for initial scoring purposes (-f, --first_codon)" help="Otherwise, the highest-scoring codon will be used. This only affects the start positions in the .detail file. The final start predictions in the .predict file are always based on the scoring functions." /> + + <param name="glGeneLen" type="integer" value="" optional="true" label="Minimum gene length in number of nucleotides (-g, --gene_len)" help="It does not include the bases in the stop codon." /> + + <param name="glLinear" type="boolean" checked="false" label="Assuming a linear genome (-l, --linear)" help="No 'wrap-around' genes with part at the beginning of the sequence and the rest at the end of the sequence." /> + + <param name="glOrfCoords" type="data" format="glimmer_coords" optional="true" label="Coordinates file specifying a list of ORFs that should be scored separately, with no overlap rules (-L, --orf_coords)" help="The output with this option goes both to the .predict file and to the .detail file." /> + + <param name="glSeparateGenes" type="boolean" checked="false" label="Separate genes (-M, --separate_genes)" help="" /> + + <param name="glMaxOverlap" type="integer" value="" optional="true" label="Maximum overlap length (-o, --max_olap)" help="Overlaps of this many or fewer bases between genes are not regarded as overlaps." /> + + <param name="glStartProbs" type="text" value="" optional="true" label="Probability of different start codons (-P, --start_probs)" help="If no --codon_list option is given, then there should be 3 values: for atg, gtg and ttg, in that order. Sample format: -P 0.6,0.35,0.05. If --codon_list is specified without --start_probs, then each start codon is equally likely (which is very unusual)." /> + + <param name="glIgnoreScoreLen" type="integer" value="" optional="true" label="Consider any gene n or more bases long as a potential gene, regardless of its in-frame score (-q, --ignore_score_len)" help="Without this option, this value is calculated automatically to be the length such that the expected number of ORFs this long or longer in a random sequence of a million bases is one." /> + + <param name="glNoIndep" type="boolean" checked="false" label="Do not use the independent probability score column (-r, --no_indep)" help="Using this option will produce more short gene predictions." /> + + <param name="glThreshold" type="integer" value="" optional="true" label="Threshold score for consideration as a gene (-t, --threshold)" help="If the in-frame score ≥ n , then the region is given a number and considered a potential gene. Note this is the integer score in the column labelled 'InFrm' in the .detail file, not the decimal score in the column labelled 'Raw'." /> + + <param name="glExtend" type="boolean" checked="false" label="Also score ORFs that extend off the end of the sequence(s) (-X, --extend)" help="This option presumes that the sequence(s) is linear and not circular. Reported positions off the end of the sequence are the nearest positions in the correct reading frame. Note that this ignores any partial codons at the ends of a sequence. Suppose, for example, that a sequence is 998bp long and an ORF in reading frame +1 starts at position 601 and extends off the end of the sequence. Then the end of that gene/ORF will be reported at position 999, as if the stop codon were in positions 997 ... 999. This is true even if the last two characters of the sequence are, say, cc and cannot possibly be part of a stop codon." /> + + <param name="glTransTable" type="integer" value="" optional="true" label="Use GenBank translation table number n to specify stop codons (-z, --trans_table)" help="" /> + + <param name="glStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z, --stop_codons)" help="Sample format: 'tag,tga,taa'. The default stop codons are tag, tga and taa." /> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="glDetail" format="txt" label="${tool.name} on ${on_string}: detail" /> + <data name="glPredict" format="txt" label="${tool.name} on ${on_string}: predict" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +Read DNA sequences and predict genes in them using an Interpolated Context Model (ICM). Output details go to file *detail* and predictions go to file *predict*. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/long-orfs_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> long-orfs +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Long-ORFs input options...' + parser = optparse.OptionParser() + parser.add_option('--loSequence', dest='sequence', help='') + parser.add_option('--loStartCodons', dest='start_codons', help='') + parser.add_option('--loEntropy', dest='entropy', help='') + parser.add_option('--loFixed', action='store_true', dest='fixed', help='') + parser.add_option('--loMinLen', dest='min_len', type='int', help='') + parser.add_option('--loIgnore', dest='ignore', help='') + parser.add_option('--loLinear', action='store_true', dest='linear', help='') + parser.add_option('--loLengthOpt', action='store_true', dest='length_opt', help='') + parser.add_option('--loNoHeader', action='store_true', dest='no_header', help='') + parser.add_option('--loMaxOverlap', dest='max_olap', type='int', help='') + parser.add_option('--loCutoff', dest='cutoff', type='float', help='') + parser.add_option('--loWithoutStops', action='store_true', dest='without_stops', help='') + parser.add_option('--loTransTable', dest='trans_table', type='int', help='') + parser.add_option('--loStopCodons', dest='stop_codons', help='') + parser.add_option('--loOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Long-ORFs command to be executed + sequence = options.sequence + if options.start_codons: + start_codons = '--start_codons %s' % (options.start_codons) + else: + start_codons = '' + if options.entropy: + entropy = '--entropy %s' % (options.entropy) + else: + entropy = '' + if options.fixed: + fixed = '--fixed' + else: + fixed = '' + if options.min_len is not None: + min_len = '--min_len %d' % (options.min_len) + else: + min_len = '' + if options.ignore: + ignore = '--ignore %s' % (options.ignore) + else: + ignore = '' + if options.linear: + linear = '--linear' + else: + linear = '' + if options.length_opt: + length_opt = '--length_opt' + else: + length_opt = '' + if options.no_header: + no_header = '--no_header' + else: + no_header = '' + if options.max_olap is not None: + max_olap = '--max_olap %d' % (options.max_olap) + else: + max_olap = '' + if options.cutoff is not None: + cutoff = '--cutoff %s' % (options.cutoff) + else: + cutoff = '' + if options.without_stops: + without_stops = '--without_stops' + else: + without_stops = '' + if options.trans_table is not None: + trans_table = '--trans_table %s' % (options.trans_table) + else: + trans_table = '' + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + output = options.output + logfile = options.logfile + + # Build Long-ORFs command + cmd = 'long-orfs %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (sequence, start_codons, entropy, fixed, min_len, ignore, linear, length_opt, no_header, max_olap, cutoff, without_stops, trans_table, stop_codons, output) + print '\nLong-ORFs command to be executed: \n %s' % (cmd) + + print 'Executing Long-ORFs...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Long-ORFs executed!' + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/long-orfs_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,107 @@ +<tool id="long_orfs_wrapper" name="Long-ORFs" version="0.2"> + <description></description> + <requirements> + <requirement type="package" version="3.02">glimmer</requirement> + </requirements> + <command interpreter="python"> + long-orfs_wrapper.py --loSequence $loSequence + #if $loStartCodons + --loStartCodons="$loStartCodons" + #end if + #if $loEntropy + --loEntropy=$loEntropy + #end if + #if $loFixed + --loFixed + #end if + #if str($loMinLen) + --loMinLen=$loMinLen + #end if + #if $loLinear + --loLinear + #end if + #if $loLengthOpt + --loLengthOpt + #end if + #if $loNoHeader + --loNoHeader + #end if + #if str($loMaxOverlap) + --loMaxOverlap=$loMaxOverlap + #end if + #if str($loCutoff) + --loCutoff=$loCutoff + #end if + #if $loWithoutStops + --loWithoutStops + #end if + #if str($loTransTable) + --loTransTable=$loTransTable + #end if + #if $loStopCodons + --loStopCodons="$loStopCodons" + #end if + --loOutput $loOutput --logfile $logfile + </command> + + <inputs> + <param name="loSequence" type="data" format="fasta" label="DNA sequence to be analyzed" help="FASTA format" /> + + <param name="loStartCodons" type="text" value="" optional="true" label="Specify allowable start codons as a comma-separated list (-A, --start_codons)" help="Sample format: 'atg,gtg'. The default start codons are atg, gtg and ttg." /> + + <param name="loEntropy" type="data" format="glimmer_entropy_profiles" optional="true" label="Entropy profiles (-E, --entropy)" help="The entropy profiles are used only if the cutoff option is specified." /> + + <param name="loFixed" type="boolean" checked="false" label="Fixed minimum gene length (-f, --fixed)" help="Do NOT automatically calculate the minimum gene length that maximizes the number or length of output regions, but instead use either the value specified by the 'minimum gene length' option or else the default, which is 90." /> + + <param name="loMinLen" type="integer" value="" optional="true" label="Minimum gene length in number of nucleotides (-g, --min_len)" help="It does not include the bases in the stop codon." /> + + <param name="loLinear" type="boolean" checked="false" label="Assuming a linear genome (-l, --linear)" help="No 'wrap-around' genes with part at the beginning of the sequence and the rest at the end of the sequence." /> + + <param name="loLengthOpt" type="boolean" checked="false" label="Optimize minimum gene length (-L, --length_opt)" help="Find and use as the minimum gene length the value that maximizes the total length of non-overlapping genes, instead of the default behaviour, which is to maximize the total number of non-overlapping genes." /> + + <param name="loNoHeader" type="boolean" checked="true" label="Do not include the program-settings header information in the output file (-n, --no_header)" help="The output file will contain only the coordinates of the selected ORFs." /> + + <param name="loMaxOverlap" type="integer" value="" optional="true" label="Maximum overlap length (-o, --max_olap)" help="Overlaps of this many or fewer bases between genes are not regarded as overlaps." /> + + <param name="loCutoff" type="float" value="" optional="true" label="Only genes with an entropy distance score less than this value will be considered (-t, --cutoff)" help=" This cutoff is made before any subsequent steps in the algorithm." /> + + <param name="loWithoutStops" type="boolean" checked="false" label="Do NOT include the stop codon in the region described by the output coordinates (-w, --without_stops)" help="By default it is included." /> + + <param name="loTransTable" type="integer" value="" optional="true" label="Use GenBank translation table number n to specify stop codons (-z, --trans_table)" help="" /> + + <param name="loStopCodons" type="text" value="" optional="true" label="Specify allowable stop codons as a comma-separated list (-Z, --stop_codons)" help="Sample format: 'tag,tga'. The default stop codons are tag, tga and taa." /> + </inputs> + + <outputs> + <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" /> + <data name="loOutput" format="glimmer_coords" label="${tool.name} on ${on_string}: output" /> + </outputs> + + <tests> + + </tests> + <help> +**What it does** + +This program identifies long, non-overlapping open reading frames (ORFs) in a DNA sequence file. These ORFs are very likely to contain genes, and can be used as a set of training sequences for Build-ICM. More specifically, among all ORFs longer than a minimum length, those that do not overlap any others are output. The start codon used for each ORF is the first possible one. The program, by default, automatically determines the value that maximizes the number of ORFs that are output. With the -t option, the initial set of candidate ORFs also can be filtered using entropy distance, which generally produces a larger, more accurate training set, particularly for high-GC-content genomes. + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,20 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="glimmer" version="3.02"> + <install version="1.0"> + <actions> + <action type="download_by_url" target_filename="glimmer3.02.tar.gz">http://ccb.jhu.edu/software/glimmer/glimmer302b.tar.gz</action> + <action type="shell_command">make -C src/</action> + <!-- remove "test" executable which would take precedence over /usr/bin/test --> + <action type="shell_command">rm -f bin/test</action> + <action type="move_directory_files"> + <source_directory>bin</source_directory> + <destination_directory>$INSTALL_DIR/bin</destination_directory> + </action> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + </package> +</tool_dependency>