Mercurial > repos > crs4 > prokka

--- a/prokka.py	Tue Sep 10 13:11:26 2013 -0400
+++ b/prokka.py	Thu Sep 26 12:39:52 2013 -0400
@@ -21,15 +21,16 @@
     parser.add_option('--mincontig', dest='mincontig', type='int', help='Minimun contig size')
     parser.add_option('--rfam', action="store_true", dest="rfam", help="Enable searching for ncRNAs")
     parser.add_option('--centre', dest="centre", default="CRS4", help="Sequencing centre")
-    parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations. It can be viewed directly in Artemis or IGV")
-    parser.add_option('--gbk', dest="gbk", help="This is a standard Genbank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-Genbank, with one record for each sequence")
+    parser.add_option('--gff', dest="gff", help="This is the master annotation in GFF3 format, containing both sequences and annotations")
+    parser.add_option('--gbk', dest="gbk", help="This is a standard GenBank file derived from the master .gff. If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence")
     parser.add_option('--fna', dest="fna", help="Nucleotide FASTA file of the input contig sequences")
     parser.add_option('--faa', dest="faa", help="Protein FASTA file of the translated CDS sequences")
     parser.add_option('--ffn', dest="ffn", help="Nucleotide FASTA file of all the annotated sequences, not just CDS")
-    parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to Genbank. It needs to be edited to set the correct taxonomy, authors, related publication etc")
+    parser.add_option('--sqn', dest="sqn", help="An ASN1 format Sequin file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc.")
     parser.add_option('--fsa', dest="fsa", help="Nucleotide FASTA file of the input contig sequences, used by tbl2asn to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines")
     parser.add_option('--tbl', dest="tbl", help="Feature Table file, used by tbl2asn to create the .sqn file")
     parser.add_option('--err', dest="err", help="Unacceptable annotations - the NCBI discrepancy report")
+    parser.add_option('--txt', dest='txt', help='Statistics relating to the annotated features found')
     parser.add_option('--log', dest="log", help="Contains all the output that Prokka produced during its run")
     (options, args) = parser.parse_args()
     if len(args) > 0:
@@ -39,9 +40,9 @@
     cpus = "--cpus %d" % (options.cpus) if options.cpus is not None else ''
     rfam = '--rfam' if options.rfam else ''
     mincontig = "--mincontig %d" % options.mincontig if options.mincontig is not None else ''
-
+
     cl = "prokka --force --outdir . --prefix prokka --kingdom %s %s --centre %s %s %s %s" % (options.kingdom, mincontig, options.centre, rfam, cpus, options.fasta)
-    print '\nProkka command to be executed: \n %s' % cl
+    print '\nProkka command to be executed:\n %s' % cl

     # Run command
     log = open(options.log, 'w') if options.log else sys.stdout
@@ -50,11 +51,11 @@
     finally:
         if log != sys.stdout:
             log.close()
-
+
     # Rename output files
-    suffix = ['gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'gff']
+    suffix = ['gff', 'gbk', 'fna', 'faa', 'ffn', 'sqn', 'fsa', 'tbl', 'err', 'txt']
     for s in suffix:
-        shutil.move( 'prokka.' + s, getattr(options, s))
+        shutil.move('prokka.' + s, getattr(options, s))

 if __name__ == "__main__":
     __main__()
--- a/prokka.xml	Tue Sep 10 13:11:26 2013 -0400
+++ b/prokka.xml	Thu Sep 26 12:39:52 2013 -0400
@@ -1,4 +1,4 @@
-<tool id="prokka" name="Prokka" version="1.0.0">
+<tool id="prokka" name="Prokka" version="1.0.1">
   <description>Prokaryotic Annotation</description>
   <requirements>
     <requirement type="package" version="2.2.26+">blast+</requirement>
@@ -34,6 +34,7 @@
     --fsa=$out_fsa
     --tbl=$out_tbl
     --err=$out_err
+    --txt=$out_txt
     --log=$out_log
   </command>
   <inputs>
@@ -53,45 +54,52 @@
     <param name="rfam" type="boolean" checked="false" label="Enable searching for ncRNAs with Infernal-Rfam - SLOW (--rfam)" />
   </inputs>
   <outputs>
-    <data format="gff" name="out_gff" label="${tool.name} on ${on_string}: gff"/>
-    <data format="txt" name="out_gbk" label="${tool.name} on ${on_string}: gbk"/>
-    <data format="fasta" name="out_fna" label="${tool.name} on ${on_string}: fna"/>
-    <data format="fasta" name="out_faa" label="${tool.name} on ${on_string}: faa"/>
-    <data format="fasta" name="out_ffn" label="${tool.name} on ${on_string}: ffn"/>
-    <data format="asn1" name="out_sqn" label="${tool.name} on ${on_string}: sqn"/>
-    <data format="fasta" name="out_fsa" label="${tool.name} on ${on_string}: fsa"/>
-    <data format="txt" name="out_tbl" label="${tool.name} on ${on_string}: tbl"/>
-    <data format="txt" name="out_err" label="${tool.name} on ${on_string}: err"/>
-    <data format="txt" name="out_log" label="${tool.name} on ${on_string}: log"/>
+    <data format="gff" name="out_gff" label="${tool.name} on ${on_string}: gff" />
+    <data format="txt" name="out_gbk" label="${tool.name} on ${on_string}: gbk" />
+    <data format="fasta" name="out_fna" label="${tool.name} on ${on_string}: fna" />
+    <data format="fasta" name="out_faa" label="${tool.name} on ${on_string}: faa" />
+    <data format="fasta" name="out_ffn" label="${tool.name} on ${on_string}: ffn" />
+    <data format="asn1" name="out_sqn" label="${tool.name} on ${on_string}: sqn" />
+    <data format="fasta" name="out_fsa" label="${tool.name} on ${on_string}: fsa" />
+    <data format="txt" name="out_tbl" label="${tool.name} on ${on_string}: tbl" />
+    <data format="txt" name="out_err" label="${tool.name} on ${on_string}: err" />
+    <data format="txt" name="out_txt" label="${tool.name} on ${on_string}: txt" />
+    <data format="txt" name="out_log" label="${tool.name} on ${on_string}: log" />
   </outputs>

   <help>
 **What it does**

-Prokka_ is a software tool to annotate bacterial, archaeal and viral genomes very rapidly, and produce output files that require only minor tweaking to submit to Genbank/ENA/DDBJ.
+Prokka_ is a software tool to annotate bacterial, archaeal and viral genomes very rapidly, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ.

 .. _Prokka: http://www.vicbioinformatics.com/software.prokka.shtml

 **Output files**

-Prokka creates several output files::
+Prokka creates several output files:

- gff		This is the master annotation in GFF3 format, containing both sequences and annotations.
-		It can be viewed directly in Artemis or IGV
- gbk		This is a standard Genbank file derived from the master .gff
-		If the input to prokka was a multi-FASTA, then this will be a multi-Genbank,
-		with one record for each sequence
- fna		Nucleotide FASTA file of the input contig sequences
- faa		Protein FASTA file of the translated CDS sequences
- ffn		Nucleotide FASTA file of all the annotated sequences, not just CDS
- sqn		An ASN1 format "Sequin" file for submission to Genbank.
-		It needs to be edited to set the correct taxonomy, authors, related publication etc.
- fsa		Nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file.
-		It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines
- tbl		Feature Table file, used by "tbl2asn" to create the .sqn file.
- err		Unacceptable annotations - the NCBI discrepancy report.
- log		Contains all the output that Prokka produced during its run.
-		This is a record of what settings you used.
+gff
+    This is the master annotation in GFF3 format, containing both sequences and annotations
+gbk
+    This is a standard GenBank file derived from the master .gff . If the input to prokka was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence
+fna
+    Nucleotide FASTA file of the input contig sequences
+faa
+    Protein FASTA file of the translated CDS sequences
+ffn
+    Nucleotide FASTA file of all the annotated sequences, not just CDS
+sqn
+    An ASN1 format "Sequin" file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc.
+fsa
+    Nucleotide FASTA file of the input contig sequences, used by "tbl2asn" to create the .sqn file. It is mostly the same as the .fna file, but with extra Sequin tags in the sequence description lines
+tbl
+    Feature Table file, used by "tbl2asn" to create the .sqn file
+err
+    Unacceptable annotations - the NCBI discrepancy report
+log
+    Contains all the output that Prokka produced during its run
+txt
+    Statistics relating to the annotated features found

 **License and citation**
--- a/tool_dependencies.xml	Tue Sep 10 13:11:26 2013 -0400
+++ b/tool_dependencies.xml	Thu Sep 26 12:39:52 2013 -0400
@@ -31,10 +31,10 @@
     <readme>
 Dependencies of Prokka which needs to be installed separately:
 - Perl core modules: File::Copy, FindBin, Getopt::Long, List::Util, Scalar::Util, Time::Piece, Time::Seconds;
-- Perl modules: Bio::SeqIO from BioPerl ( http://search.cpan.org/dist/BioPerl/ ) &gt;= 1.6.1, XML::Simple ( http://search.cpan.org/dist/XML-Simple/ );
+- Perl modules: Bio::SeqIO from BioPerl ( http://search.cpan.org/dist/BioPerl/ ) &gt;= 1.6.900, XML::Simple ( http://search.cpan.org/dist/XML-Simple/ );
 - Prodigal ( http://prodigal.ornl.gov/ ) &gt;= 2.60 ;
 - tbl2asn ( http://www.ncbi.nlm.nih.gov/genbank/tbl2asn2/ ) &gt;= 21.0 ;
-- GNU Parallel ( http://www.gnu.org/software/parallel/ ) &gt;= 20120322 ;
+- GNU Parallel ( http://www.gnu.org/software/parallel/ ) &gt;= 20130422 ;
 - Barrnap ( http://www.vicbioinformatics.com/software.barrnap.shtml ) &gt;= 0.1 .

 Change the PROKKA_SITE_OPTIONS variable in the installed env.sh file to adjust the number of CPUs to use (--cpus).