changeset 2:4bafaa074484 draft

Merge with Lionel Guy's wrapper. Directly call prokka, remove prokka.py . Add locustag, increment, gffver, compliant, addgenes, genus, species, strain, plasmid, gcode, usegenus, metagenome, fast, evalue, norrna, notrna params. Update BLAST+ dependency to 2.2.28. Add dependencies on prodigal and barrnap. Add readme.rst .
author crs4
date Fri, 25 Oct 2013 08:59:51 -0400
parents 4b6f16a79fe4
children fffa1ae330ae
files COPYING prokka.py prokka.xml readme.rst tool_dependencies.xml
diffstat 5 files changed, 178 insertions(+), 116 deletions(-) [+]
line wrap: on
line diff
--- a/COPYING	Thu Sep 26 12:39:52 2013 -0400
+++ b/COPYING	Fri Oct 25 08:59:51 2013 -0400
@@ -1,7 +1,9 @@
 Copyright © 2013 CRS4 Srl. http://www.crs4.it/
+Copyright © 2013 Lionel Guy
 Created by:
 Paolo Uva <paolo.uva@crs4.it>
 Nicola Soranzo <nicola.soranzo@crs4.it>
+Lionel Guy <lionel.guy@icm.uu.se>
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the
--- a/prokka.py	Thu Sep 26 12:39:52 2013 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Wrapper for Prokka - Prokaryotic annotation tool
-Author: Paolo Uva paolo dot uva at crs4 dot it
-Date: February 14, 2013
-Update: March 14, 2013 - Added more options
-"""
-
-import optparse
-import shutil
-import subprocess
-import sys
-
-
-def __main__():
-    #Parse Command Line
-    parser = optparse.OptionParser()
-    parser.add_option('--cpus', dest='cpus', type='int', help='Number of CPUs to use [0=all]')
-    parser.add_option('--fasta', dest='fasta', help='FASTA file with contigs')
-    parser.add_option('--kingdom', dest='kingdom', choices=['Archaea', 'Bacteria', 'Viruses'], default='Bacteria', help='Kingdom')
-    parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size')
-    parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs")
-    parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre")
-    parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations")
-    parser.add_option('--gbk', dest="gbk", help="This is a standard GenBank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence")
-    parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences")
-    parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences")
-    parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS")
-    parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc.")
-    parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines")
-    parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file")
-    parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report")
-    parser.add_option('--txt', dest='txt', help='Statistics relating to the annotated features found')
-    parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run")
-    (options, args) = parser.parse_args()
-    if len(args) > 0:
-        parser.error('Wrong number of arguments')
-
-    # Build command
-    cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else ''
-    rfam = '--rfam' if options.rfam else ''
-    mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else ''
-
-    cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta)
-    print '\nProkka command to be executed:\n %s' % cl
-
-    # Run command
-    log = open(options.log, 'w') if options.log else sys.stdout
-    try:
-        subprocess.check_call(cl, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because prokka writes many logging info there
-    finally:
-        if log != sys.stdout:
-            log.close()
-
-    # Rename output files
-    suffix = ['gff', 'gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'txt']
-    for s in suffix:
-        shutil.move('prokka.' + s, getattr(options, s))
-
-if __name__ == "__main__":
-    __main__()
--- a/prokka.xml	Thu Sep 26 12:39:52 2013 -0400
+++ b/prokka.xml	Fri Oct 25 08:59:51 2013 -0400
@@ -1,76 +1,156 @@
-<tool id="prokka" name="Prokka" version="1.0.1">
-  <description>Prokaryotic Annotation</description>
+<tool id="prokka" name="Prokka" version="1.1.0">
+  <description>prokaryotic genome annotation</description>
   <requirements>
-    <requirement type="package" version="2.2.26+">blast+</requirement>
+    <requirement type="package" version="2.2.28">blast+</requirement>
     <requirement type="package" version="3.1b1">hmmer</requirement>
     <requirement type="package" version="1.2.36">aragorn</requirement>
+    <requirement type="package" version="2.60">prodigal</requirement>
+    <requirement type="binary">tbl2asn</requirement>
+    <requirement type="binary">parallel</requirement>
+    <requirement type="package" version="0.2">barrnap</requirement>
     <requirement type="package" version="1.1rc4">infernal</requirement>
     <requirement type="package" version="1.7">prokka</requirement>
   </requirements>
   <version_command>prokka --version</version_command>
-  <command interpreter="python">
-    prokka.py
+  <command>
+    prokka
     \${PROKKA_SITE_OPTIONS:---cpus 8}
-    ## Reads in FASTA format
-    --fasta=$fasta_file
-    ## Additional inputs
-    --kingdom $kingdom_type.kingdom
-    #if str($mincontig)
-      --mincontig $mincontig
+    --quiet ## to avoid non-error messages written to stderr
+    --outdir outdir --prefix prokka ## used in outputs section
+    #if $locustag
+      --locustag "$locustag"
+    #end if
+    #if str($increment)
+      --increment $increment
     #end if
-    #if $rfam
-      --rfam
+    --gffver $gffver
+    #if $compliant.compliant_select == "no"
+      #if $compliant.addgenes
+        --addgenes
+      #end if
+      #if str($compliant.mincontig)
+        --mincontig $compliant.mincontig
+      #end if
+    #else
+      --compliant
     #end if
     #if $centre
       --centre "$centre"
     #end if
-    ## Output files
-    --gff=$out_gff
-    --gbk=$out_gbk
-    --fna=$out_fna
-    --faa=$out_faa
-    --ffn=$out_ffn
-    --sqn=$out_sqn
-    --fsa=$out_fsa
-    --tbl=$out_tbl
-    --err=$out_err
-    --txt=$out_txt
-    --log=$out_log
+    #if $genus
+      --genus "$genus"
+    #end if
+    #if $species
+      --species "$species"
+    #end if
+    #if $strain
+      --strain "$strain"
+    #end if
+    #if $plasmid
+      --plasmid "$plasmid"
+    #end if
+    --kingdom $kingdom.kingdom_select
+    #if str($kingdom.gcode)
+      --gcode $kingdom.gcode
+    #end if
+    #if $usegenus
+      --usegenus
+    #end if
+    #if $metagenome
+      --metagenome
+    #end if
+    #if $fast
+      --fast
+    #end if
+    #if str($evalue)
+      --evalue $evalue
+    #end if
+    #if $rfam
+      --rfam
+    #end if
+    #if $norrna
+      --norrna
+    #end if
+    #if $notrna
+      --notrna
+    #end if
+    $input
   </command>
   <inputs>
-    <param name="fasta_file" type="data" format="fasta" label="Contigs" help="FASTA format" />
-    <conditional name="kingdom_type">
-      <param name="kingdom" type="select" label="Kingdom (--kingdom)">
+    <param name="input" type="data" format="fasta" label="Contigs to annotate" help="FASTA format" />
+    <param name="locustag" type="text" value="PROKKA" optional="true" label="Locus tag prefix (--locustag)" />
+    <param name="increment" type="integer" value="1" optional="true" label="Locus tag counter increment (--increment)">
+      <validator type="in_range" min="1" />
+    </param>
+    <param name="gffver" type="select" label="GFF version (--gffver)">
+      <option value="3">3</option>
+      <option value="2">2</option>
+      <option value="1">1</option>
+    </param>
+    <conditional name="compliant">
+      <param name="compliant_select" type="select" label="Force GenBank/ENA/DDJB compliance (--compliant)" help="Equivalent to --addgenes --mincontiglen 200 --centre Prokka (or other centre specified below)">
+        <option value="no">No</option>
+        <option value="yes">Yes</option>
+      </param>
+      <when value="no">
+        <param name="addgenes" type="boolean" checked="false" label="Add 'gene' features for each 'CDS' feature (--addgenes)" />
+        <param name="mincontig" type="integer" value="200" optional="true" label="Minimum contig size (--mincontiglen)" help="NCBI needs 200" />
+      </when>
+      <when value="yes" />
+    </conditional>
+    <param name="centre" type="text" value="" optional="true" label="Sequencing centre ID (--centre)" />
+    <param name="genus" type="text" value="" optional="true" label="Genus name (--genus)" help="May be used to aid annotation, see --usegenus below" />
+    <param name="species" type="text" value="" optional="true" label="Species name (--species)" />
+    <param name="strain" type="text" value="" optional="true" label="Strain name (--strain)" />
+    <param name="plasmid" type="text" value="" optional="true" label="Plasmid name or identifier (--plasmid)" />
+    <conditional name="kingdom">
+      <param name="kingdom_select" type="select" label="Kingdom (--kingdom)">
         <option value="Archaea">Archaea</option>
         <option value="Bacteria" selected="true">Bacteria</option>
         <option value="Viruses">Viruses</option>
       </param>
-      <when value="Archaea" />
-      <when value="Bacteria" />
-      <when value="Viruses" />
+      <when value="Archaea">
+        <param name="gcode" type="integer" value="11" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash -->
+      </when>
+      <when value="Bacteria">
+        <param name="gcode" type="integer" value="11" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash -->
+        <!-- <param name="gram" type="select" display="radio" label="Gram (- -gram)">
+          <option selected="true" value="none">N/A</option>
+          <option value="pos">positive</option>
+          <option value="neg">negative</option>
+        </param> SignalP is not FOSS -->
+      </when>
+      <when value="Viruses">
+        <param name="gcode" type="integer" value="1" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash -->
+      </when>
     </conditional>
-    <param name="mincontig" type="integer" value="200" optional="true" label="Minimun contig size - NCBI needs 200 (--mincontig)" />
-    <param name="centre" type="text" value="CRS4" label="Sequencing centre ID (--centre)" />
-    <param name="rfam" type="boolean" checked="false" label="Enable searching for ncRNAs with Infernal-Rfam - SLOW (--rfam)" />
+    <param name="usegenus" type="boolean" checked="false" label="Use genus-specific BLAST database (--usegenus)" help="Will use the BLAST database for the genus specified above, if installed" />
+    <param name="metagenome" type="boolean" checked="false" label="Improve gene predictions for highly fragmented genomes (--metagenome)" help="Will set --meta option for Prodigal" />
+    <param name="fast" type="boolean" checked="false" label="Fast mode (--fast)" help="Skip CDS /product searching" />
+    <param name="evalue" type="float" value="1e-06" optional="true" label="Similarity e-value cut-off">
+      <validator type="in_range" min="0" />
+    </param>
+    <param name="rfam" type="boolean" checked="false" label="Enable searching for ncRNAs with Infernal+Rfam (SLOW!) (--rfam)" />
+    <param name="norrna" type="boolean" checked="false" label="Don't run rRNA search with Barrnap" />
+    <param name="notrna" type="boolean" checked="false" label="Don't run tRNA search with Aragorn" />
   </inputs>
   <outputs>
-    <data format="gff" name="out_gff" label="${tool.name} on ${on_string}: gff" />
-    <data format="txt" name="out_gbk" label="${tool.name} on ${on_string}: gbk" />
-    <data format="fasta" name="out_fna" label="${tool.name} on ${on_string}: fna" />
-    <data format="fasta" name="out_faa" label="${tool.name} on ${on_string}: faa" />
-    <data format="fasta" name="out_ffn" label="${tool.name} on ${on_string}: ffn" />
-    <data format="asn1" name="out_sqn" label="${tool.name} on ${on_string}: sqn" />
-    <data format="fasta" name="out_fsa" label="${tool.name} on ${on_string}: fsa" />
-    <data format="txt" name="out_tbl" label="${tool.name} on ${on_string}: tbl" />
-    <data format="txt" name="out_err" label="${tool.name} on ${on_string}: err" />
-    <data format="txt" name="out_txt" label="${tool.name} on ${on_string}: txt" />
-    <data format="txt" name="out_log" label="${tool.name} on ${on_string}: log" />
+    <data name="out_gff" format="gff" label="${tool.name} on ${on_string}: gff" from_work_dir="outdir/prokka.gff" />
+    <data name="out_gbk" format="txt" label="${tool.name} on ${on_string}: gbk" from_work_dir="outdir/prokka.gbk" />
+    <data name="out_fna" format="fasta" label="${tool.name} on ${on_string}: fna" from_work_dir="outdir/prokka.fna" />
+    <data name="out_faa" format="fasta" label="${tool.name} on ${on_string}: faa" from_work_dir="outdir/prokka.faa" />
+    <data name="out_ffn" format="fasta" label="${tool.name} on ${on_string}: ffn" from_work_dir="outdir/prokka.ffn" />
+    <data name="out_sqn" format="asn1" label="${tool.name} on ${on_string}: sqn" from_work_dir="outdir/prokka.sqn" />
+    <data name="out_fsa" format="fasta" label="${tool.name} on ${on_string}: fsa" from_work_dir="outdir/prokka.fsa" />
+    <data name="out_tbl" format="txt" label="${tool.name} on ${on_string}: tbl" from_work_dir="outdir/prokka.tbl" />
+    <data name="out_err" format="txt" label="${tool.name} on ${on_string}: err" from_work_dir="outdir/prokka.err" />
+    <data name="out_txt" format="txt" label="${tool.name} on ${on_string}: txt" from_work_dir="outdir/prokka.txt" />
+    <data name="out_log" format="txt" label="${tool.name} on ${on_string}: log" from_work_dir="outdir/prokka.log" />
   </outputs>
-
   <help>
 **What it does**
 
-Prokka_ is a software tool to annotate bacterial, archaeal and viral genomes very rapidly, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ.
+Prokka_ is a software tool to rapidly annotate bacterial, archaeal and viral genomes, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ.
 
 .. _Prokka: http://www.vicbioinformatics.com/software.prokka.shtml
 
@@ -79,7 +159,7 @@
 Prokka creates several output files:
 
 gff
-    This is the master annotation in GFF3 format, containing both sequences and annotations
+    This is the master annotation in GFF format, containing both sequences and annotations
 gbk
     This is a standard GenBank file derived from the master .gff . If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence
 fna
@@ -103,7 +183,7 @@
 
 **License and citation**
 
-This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
+This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_, Lionel Guy and is released under the `MIT license`_.
 
 .. _CRS4 Srl.: http://www.crs4.it/
 .. _MIT license: http://opensource.org/licenses/MIT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Fri Oct 25 08:59:51 2013 -0400
@@ -0,0 +1,35 @@
+Prokka wrapper
+==============
+
+Warning
+-------
+
+Prokka includes custom databases and is thus about a 2.0 GB download!
+
+Dependencies of Prokka which needs to be installed separately
+-------------------------------------------------------------
+
+- Perl core modules: File\::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds;
+- Perl modules: Bio::SeqIO from BioPerl_ >= 1.6.900, `XML::Simple`_;
+- `GNU Parallel`_ >= 20130422 is required, but is shipped with Prokka and thus is not managed by the tool dependency system;
+- tbl2asn_ >= 21.0 is required. This dependency is not managed here since versions are increasing very rapidly;
+- SignalP_ >= 3.0 is an optional dependency to find signal peptides. For licensing reasons, it is not used in the tool wrapper.
+
+.. _BioPerl: http://search.cpan.org/dist/BioPerl/
+.. _XML::Simple: http://search.cpan.org/dist/XML-Simple/
+.. _GNU Parallel: http://www.gnu.org/software/parallel/
+.. _tbl2asn: http://www.ncbi.nlm.nih.gov/genbank/tbl2asn2/
+.. _SignalP: http://www.cbs.dtu.dk/services/SignalP/
+
+Configuration
+-------------
+
+Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).
+
+Version history
+---------------
+
+- v0.1 (LG): initial release in the toolshed, supports Prokka 1.6.
+- v0.2 (LG): added this readme file, supports Prokka 1.7, and adds dependencies management.
+- v1.1.0: merge the wrappers by CRS4 and Lionel Guy, add COPYING file with MIT license, make all params optional, add gffver param, correctly quote text params in command, use float type for 'evalue' param, describe output files in help, upgrade BLAST+ dependency to version 2.2.28, depend on package_aragorn_1_2_36 instead of trna_prediction, depend on package_prodigal_2_60 instead of prodigal, depend on package_barrnap_0_2 instead of barrnap, add PROKKA_SITE_OPTIONS to env.sh and remove 'cpus' param.
+
--- a/tool_dependencies.xml	Thu Sep 26 12:39:52 2013 -0400
+++ b/tool_dependencies.xml	Fri Oct 25 08:59:51 2013 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
-  <package name="blast+" version="2.2.26+">
-    <repository changeset_revision="40c69b76b46e" name="package_blast_plus_2_2_26" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  <package name="blast+" version="2.2.28">
+    <repository changeset_revision="ed85ca8e4295" name="package_blast_plus_2_2_28" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
   </package>
   <package name="hmmer" version="3.1b1">
     <repository changeset_revision="007c736bf7e8" name="package_hmmer_3_1" owner="lionelguy" toolshed="http://toolshed.g2.bx.psu.edu" />
@@ -9,13 +9,19 @@
   <package name="aragorn" version="1.2.36">
     <repository changeset_revision="f09e2902e6ed" name="package_aragorn_1_2_36" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
   </package>
+  <package name="prodigal" version="2.60">
+    <repository changeset_revision="acf0e8b718c9" name="package_prodigal_2_60" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="barrnap" version="0.2">
+    <repository changeset_revision="8ab7a17861ea" name="package_barrnap_0_2" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+  </package>
   <package name="infernal" version="1.1rc4">
     <repository changeset_revision="b9cc978bc83b" name="package_infernal_1_1rc4" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
   </package>
   <package name="prokka" version="1.7">
     <install version="1.0">
       <actions>
-        <action target_filename="" type="download_by_url">http://www.vicbioinformatics.com/prokka-1.7.tar.gz</action>
+        <action type="download_by_url">http://www.vicbioinformatics.com/prokka-1.7.tar.gz</action>
         <action type="move_directory_files">
           <source_directory>.</source_directory>
           <destination_directory>$INSTALL_DIR</destination_directory>
@@ -29,6 +35,8 @@
       </actions>
     </install>
     <readme>
+Warning: Prokka includes custom databases and is thus about a 2.0 GB download!
+
 Dependencies of Prokka which needs to be installed separately:
 - Perl core modules: File::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds;
 - Perl modules: Bio::SeqIO from BioPerl ( http://search.cpan.org/dist/BioPerl/ ) &gt;= 1.6.900, XML::Simple ( http://search.cpan.org/dist/XML-Simple/ );
@@ -37,9 +45,7 @@
 - GNU Parallel ( http://www.gnu.org/software/parallel/ ) &gt;= 20130422 ;
 - Barrnap ( http://www.vicbioinformatics.com/software.barrnap.shtml ) &gt;= 0.1 .
 
-Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).
-
-Note: Prokka is about a 2.0 GB download due to included custom databases.
+Configuration: Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).
     </readme>
   </package>
 </tool_dependency>