changeset 0:95505a9fa26f draft

Uploaded
author crs4
date Tue, 10 Sep 2013 13:11:26 -0400
parents
children 4b6f16a79fe4
files COPYING prokka.py prokka.xml tool_dependencies.xml
diffstat 4 files changed, 240 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COPYING	Tue Sep 10 13:11:26 2013 -0400
@@ -0,0 +1,23 @@
+Copyright © 2013 CRS4 Srl. http://www.crs4.it/
+Created by:
+Paolo Uva <paolo.uva@crs4.it>
+Nicola Soranzo <nicola.soranzo@crs4.it>
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prokka.py	Tue Sep 10 13:11:26 2013 -0400
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+"""
+Wrapper for Prokka - Prokaryotic annotation tool
+Author: Paolo Uva paolo dot uva at crs4 dot it
+Date: February 14, 2013
+Update: March 14, 2013 - Added more options
+"""
+
+import optparse
+import shutil
+import subprocess
+import sys
+
+
+def __main__():
+    #Parse Command Line
+    parser = optparse.OptionParser()
+    parser.add_option('--cpus', dest='cpus', type='int', help='Number of CPUs to use [0=all]')
+    parser.add_option('--fasta', dest='fasta', help='FASTA file with contigs')
+    parser.add_option('--kingdom', dest='kingdom', choices=['Archaea', 'Bacteria', 'Viruses'], default='Bacteria', help='Kingdom')
+    parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size')
+    parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs")
+    parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre")
+    parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations. It can be viewed directly in Artemis or IGV")
+    parser.add_option('--gbk', dest="gbk", help="This is a standard Genbank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-Genbank, with one record for each sequence")
+    parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences")
+    parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences")
+    parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS")
+    parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to Genbank. It needs to be edited to set the correct taxonomy, authors, related publication etc")
+    parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines")
+    parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file")
+    parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report")
+    parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run")
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Wrong number of arguments')
+
+    # Build command
+    cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else ''
+    rfam = '--rfam' if options.rfam else ''
+    mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else ''
+    
+    cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta)
+    print '\nProkka command to be executed: \n %s' % cl
+
+    # Run command
+    log = open(options.log, 'w') if options.log else sys.stdout
+    try:
+        subprocess.check_call(cl, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because prokka writes many logging info there
+    finally:
+        if log != sys.stdout:
+            log.close()
+    
+    # Rename output files
+    suffix = ['gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'gff']
+    for s in suffix:
+        shutil.move( 'prokka.' + s, getattr(options, s))
+
+if __name__ == "__main__":
+    __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prokka.xml	Tue Sep 10 13:11:26 2013 -0400
@@ -0,0 +1,112 @@
+<tool id="prokka" name="Prokka" version="1.0.0">
+  <description>Prokaryotic Annotation</description>
+  <requirements>
+    <requirement type="package" version="2.2.26+">blast+</requirement>
+    <requirement type="package" version="3.1b1">hmmer</requirement>
+    <requirement type="package" version="1.2.36">aragorn</requirement>
+    <requirement type="package" version="1.1rc4">infernal</requirement>
+    <requirement type="package" version="1.7">prokka</requirement>
+  </requirements>
+  <version_command>prokka --version</version_command>
+  <command interpreter="python">
+    prokka.py
+    \${PROKKA_SITE_OPTIONS:---cpus 8}
+    ## Reads in FASTA format
+    --fasta=$fasta_file
+    ## Additional inputs
+    --kingdom $kingdom_type.kingdom
+    #if str($mincontig)
+      --mincontig $mincontig
+    #end if
+    #if $rfam
+      --rfam
+    #end if
+    #if $centre
+      --centre "$centre"
+    #end if
+    ## Output files
+    --gff=$out_gff
+    --gbk=$out_gbk
+    --fna=$out_fna
+    --faa=$out_faa
+    --ffn=$out_ffn
+    --sqn=$out_sqn
+    --fsa=$out_fsa
+    --tbl=$out_tbl
+    --err=$out_err
+    --log=$out_log
+  </command>
+  <inputs>
+    <param name="fasta_file" type="data" format="fasta" label="Contigs" help="FASTA format" />
+    <conditional name="kingdom_type">
+      <param name="kingdom" type="select" label="Kingdom (--kingdom)">
+        <option value="Archaea">Archaea</option>
+        <option value="Bacteria" selected="true">Bacteria</option>
+        <option value="Viruses">Viruses</option>
+      </param>
+      <when value="Archaea" />
+      <when value="Bacteria" />
+      <when value="Viruses" />
+    </conditional>
+    <param name="mincontig" type="integer" value="200" optional="true" label="Minimun contig size - NCBI needs 200 (--mincontig)" />
+    <param name="centre" type="text" value="CRS4" label="Sequencing centre ID (--centre)" />
+    <param name="rfam" type="boolean" checked="false" label="Enable searching for ncRNAs with Infernal-Rfam - SLOW (--rfam)" />
+  </inputs>
+  <outputs>
+    <data format="gff" name="out_gff" label="${tool.name} on ${on_string}: gff"/>
+    <data format="txt" name="out_gbk" label="${tool.name} on ${on_string}: gbk"/>
+    <data format="fasta" name="out_fna" label="${tool.name} on ${on_string}: fna"/>
+    <data format="fasta" name="out_faa" label="${tool.name} on ${on_string}: faa"/>
+    <data format="fasta" name="out_ffn" label="${tool.name} on ${on_string}: ffn"/>
+    <data format="asn1" name="out_sqn" label="${tool.name} on ${on_string}: sqn"/>
+    <data format="fasta" name="out_fsa" label="${tool.name} on ${on_string}: fsa"/>
+    <data format="txt" name="out_tbl" label="${tool.name} on ${on_string}: tbl"/>
+    <data format="txt" name="out_err" label="${tool.name} on ${on_string}: err"/>
+    <data format="txt" name="out_log" label="${tool.name} on ${on_string}: log"/>
+  </outputs>
+
+  <help>
+**What it does**
+
+Prokka_ is a software tool to annotate bacterial, archaeal and viral genomes very rapidly, and produce output files that require only minor tweaking to submit to Genbank/ENA/DDBJ.
+
+.. _Prokka: http://www.vicbioinformatics.com/software.prokka.shtml
+
+**Output files**
+
+Prokka creates several output files::
+
+ gff		This is the master annotation in GFF3 format, containing both sequences and annotations.
+		It can be viewed directly in Artemis or IGV
+ gbk		This is a standard Genbank file derived from the master .gff
+		If the input to prokka was a multi-FASTA, then this will be a multi-Genbank,
+		with one record for each sequence
+ fna		Nucleotide FASTA file of the input contig sequences
+ faa		Protein FASTA file of the translated CDS sequences
+ ffn		Nucleotide FASTA file of all the annotated sequences, not just CDS
+ sqn		An ASN1 format "Sequin" file for submission to Genbank.
+		It needs to be edited to set the correct taxonomy, authors, related publication etc.
+ fsa		Nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file.
+		It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines
+ tbl		Feature Table file, used by "tbl2asn" to create the .sqn file.
+ err		Unacceptable annotations - the NCBI discrepancy report.
+ log		Contains all the output that Prokka produced during its run.
+		This is a record of what settings you used.
+
+**License and citation**
+
+This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
+
+.. _CRS4 Srl.: http://www.crs4.it/
+.. _MIT license: http://opensource.org/licenses/MIT
+
+If you use this tool in Galaxy, please cite |Cuccuru2013|_.
+
+.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
+.. _Cuccuru2013: http://orione.crs4.it/
+
+This tool uses `Prokka`_, which is licensed separately. Please cite Seemann T. Prokka: Prokaryotic Genome Annotation System (in preparation).
+
+.. _Prokka: http://www.vicbioinformatics.com/software.prokka.shtml
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Sep 10 13:11:26 2013 -0400
@@ -0,0 +1,45 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="blast+" version="2.2.26+">
+    <repository changeset_revision="40c69b76b46e" name="package_blast_plus_2_2_26" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="hmmer" version="3.1b1">
+    <repository changeset_revision="007c736bf7e8" name="package_hmmer_3_1" owner="lionelguy" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="aragorn" version="1.2.36">
+    <repository changeset_revision="f09e2902e6ed" name="package_aragorn_1_2_36" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="infernal" version="1.1rc4">
+    <repository changeset_revision="b9cc978bc83b" name="package_infernal_1_1rc4" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="prokka" version="1.7">
+    <install version="1.0">
+      <actions>
+        <action target_filename="" type="download_by_url">http://www.vicbioinformatics.com/prokka-1.7.tar.gz</action>
+        <action type="move_directory_files">
+          <source_directory>.</source_directory>
+          <destination_directory>$INSTALL_DIR</destination_directory>
+        </action>
+        <action type="set_environment">
+          <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable>
+        </action>
+        <action type="set_environment">
+          <environment_variable action="set_to" name="PROKKA_SITE_OPTIONS">"--cpus 8"</environment_variable>
+        </action>
+      </actions>
+    </install>
+    <readme>
+Dependencies of Prokka which needs to be installed separately:
+- Perl core modules: File::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds;
+- Perl modules: Bio::SeqIO from BioPerl ( http://search.cpan.org/dist/BioPerl/ ) &gt;= 1.6.1, XML::Simple ( http://search.cpan.org/dist/XML-Simple/ );
+- Prodigal ( http://prodigal.ornl.gov/ ) &gt;= 2.60 ;
+- tbl2asn ( http://www.ncbi.nlm.nih.gov/genbank/tbl2asn2/ ) &gt;= 21.0 ;
+- GNU Parallel ( http://www.gnu.org/software/parallel/ ) &gt;= 20120322 ;
+- Barrnap ( http://www.vicbioinformatics.com/software.barrnap.shtml ) &gt;= 0.1 .
+
+Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).
+
+Note: Prokka is about a 2.0 GB download due to included custom databases.
+    </readme>
+  </package>
+</tool_dependency>