changeset 2:9fbf29a8c12b draft

v0.0.6 use format_source; v0.0.5 more explicit error msg, citation info
author peterjc
date Wed, 05 Aug 2015 10:52:56 -0400
parents ec202446408a
children de803005027f
files tools/align_back_trans/README.rst tools/align_back_trans/align_back_trans.py tools/align_back_trans/align_back_trans.xml tools/align_back_trans/tool_dependencies.xml
diffstat 4 files changed, 62 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/tools/align_back_trans/README.rst	Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/README.rst	Wed Aug 05 10:52:56 2015 -0400
@@ -1,7 +1,7 @@
 Galaxy tool to back-translate a protein alignment to nucleotides
 ================================================================
 
-This tool is copyright 2012-2014 by Peter Cock, The James Hutton Institute
+This tool is copyright 2012-2015 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below (MIT licence).
 
@@ -43,11 +43,12 @@
 
     <tool file="align_back_trans/align_back_trans.xml" />
 
-You will also need to install Biopython 1.62 or later. If you want to run
-the unit tests, include this line in ``tools_conf.xml.sample`` and the sample
-FASTA files under the ``test-data`` directory. Then::
+You will also need to install Biopython 1.62 or later.
 
-    ./run_functional_tests.sh -id align_back_trans
+If you wish to run the unit tests, also	move/copy the ``test-data/`` files
+under Galaxy's ``test-data/`` folder. Then::
+
+    ./run_tests.sh -id align_back_trans
 
 That's it.
 
@@ -63,6 +64,11 @@
 v0.0.3  - First official release
 v0.0.4  - Simplified XML to apply input format to output data.
         - Fixed error message when sequence length not a multiple of three.
+v0.0.5  - More explicit error messages when seqences lengths do not match.
+        - Tool definition now embeds citation information.
+v0.0.6  - Reorder XML elements (internal change only).
+        - Use ``format_source=...`` tag.
+        - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
 ======= ======================================================================
 
 
@@ -75,22 +81,31 @@
 With the addition of a Galaxy wrapper, developement moved here:
 https://github.com/peterjc/pico_galaxy/tree/master/tools/align_back_trans
 
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+    $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/align_back_trans/
+    ...
+
+or::
 
-    $ tar -czf align_back_trans.tar.gz tools/align_back_trans/README.rst tools/align_back_trans/align_back_trans.py tools/align_back_trans/align_back_trans.xml tools/align_back_trans/tool_dependencies.xml test-data/demo_nucs.fasta test-data/demo_nucs_trailing_stop.fasta test-data/demo_prot_align.fasta test-data/demo_nuc_align.fasta
+    $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/align_back_trans/
+    ...
+
+To just build and check the tar ball, use::
 
-Check this worked::
-
-    $ tar -tzf align_back_trans.tar.gz
+    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/align_back_trans/
+    ...
+    $ tar -tzf shed_upload.tar.gz 
+    test-data/demo_nucs.fasta
+    test-data/demo_nucs_trailing_stop.fasta
+    test-data/demo_prot_align.fasta
+    test-data/demo_nuc_align.fasta
     tools/align_back_trans/README.rst
     tools/align_back_trans/align_back_trans.py
     tools/align_back_trans/align_back_trans.xml
     tools/align_back_trans/tool_dependencies.xml
-    test-data/demo_nucs.fasta
-    test-data/demo_nucs_trailing_stop.fasta
-    test-data/demo_prot_align.fasta
-    test-data/demo_nuc_align.fasta
 
 
 Licence (MIT)
--- a/tools/align_back_trans/align_back_trans.py	Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/align_back_trans.py	Wed Aug 05 10:52:56 2015 -0400
@@ -15,8 +15,6 @@
 * http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
 
 See accompanying text file for licence details (MIT licence).
-
-This is version 0.0.3 of the script.
 """
 
 import sys
@@ -28,10 +26,10 @@
 from Bio.Data.CodonTable import ambiguous_generic_by_id
 
 if "-v" in sys.argv or "--version" in sys.argv:
-    print "v0.0.4"
+    print "v0.0.5"
     sys.exit(0)
 
-def stop_err(msg, error_level=1):
+def sys_exit(msg, error_level=1):
     """Print error message to stdout and quit with given error level."""
     sys.stderr.write("%s\n" % msg)
     sys.exit(error_level)
@@ -39,7 +37,7 @@
 def check_trans(identifier, nuc, prot, table):
     """Returns nucleotide sequence if works (can remove trailing stop)"""
     if len(nuc) % 3:
-        stop_err("Nucleotide sequence for %s is length %i (not a multiple of three)"
+        sys_exit("Nucleotide sequence for %s is length %i (not a multiple of three)"
                  % (identifier, len(nuc)))
 
     p = str(prot).upper().replace("*", "X")
@@ -49,19 +47,20 @@
             #Allow this...
             t = t[:-1]
             nuc  = nuc[:-3] #edit return value
-    if len(t) != len(p) and p in t:
-        stop_err("%s translation matched but only as subset of nucleotides, "
-                 "wrong start codon?" % identifier)
-    if len(t) != len(p) and p[1:] in t:
-        stop_err("%s translation matched (ignoring first base) but only "
-                 "as subset of nucleotides, wrong start codon?" % identifier)
     if len(t) != len(p):
-        stop_err("Inconsistent lengths for %s, ungapped protein %i, "
-                 "tripled %i vs ungapped nucleotide %i" %
-                 (identifier,
-                  len(p),
-                  len(p) * 3,
-                  len(nuc)))
+        err = ("Inconsistent lengths for %s, ungapped protein %i, "
+               "tripled %i vs ungapped nucleotide %i." %
+               (identifier, len(p), len(p) * 3, len(nuc)))
+        if t.endswith(p):
+            err += "\nThere are %i extra nucleotides at the start." % (len(t) - len(p))
+        elif t.startswith(p):
+            err += "\nThere are %i extra nucleotides at the end." % (len(t) - len(p))
+        elif p in t:
+            #TODO - Calculate and report the number to trim at start and end?
+            err += "\nHowever, protein sequence found within translated nucleotides."
+        elif p[1:] in t:
+            err += "\nHowever, ignoring first amino acid, protein sequence found within translated nucleotides."
+        sys_exit(err)
 
 
     if t == p:
@@ -71,7 +70,7 @@
         if str(nuc[0:3]).upper() in ambiguous_generic_by_id[table].start_codons:
             return nuc
         else:
-            stop_err("Translation check failed for %s\n"
+            sys_exit("Translation check failed for %s\n"
                      "Would match if %s was a start codon (check correct table used)\n"
                      % (identifier, nuc[0:3].upper()))
     else:
@@ -86,7 +85,7 @@
                 sys.stderr.write("Protein:     %s\n" % p[offset:offset+60])
                 sys.stderr.write("             %s\n" % m[offset:offset+60])
                 sys.stderr.write("Translation: %s\n\n" % t[offset:offset+60])
-        stop_err("Translation check failed for %s\n" % identifier)
+        sys_exit("Translation check failed for %s\n" % identifier)
 
 def sequence_back_translate(aligned_protein_record, unaligned_nucleotide_record, gap, table=0):
     #TODO - Separate arguments for protein gap and nucleotide gap?
@@ -107,7 +106,7 @@
     if table:
         ungapped_nucleotide = check_trans(aligned_protein_record.id, ungapped_nucleotide, ungapped_protein, table)
     elif len(ungapped_protein) * 3 != len(ungapped_nucleotide):
-        stop_err("Inconsistent lengths for %s, ungapped protein %i, "
+        sys_exit("Inconsistent lengths for %s, ungapped protein %i, "
                  "tripled %i vs ungapped nucleotide %i" %
                  (aligned_protein_record.id,
                   len(ungapped_protein),
@@ -160,7 +159,7 @@
 elif len(sys.argv) == 6:
     align_format, prot_align_file, nuc_fasta_file, nuc_align_file, table = sys.argv[1:]
 else:
-    stop_err("""This is a Python script for 'back-translating' a protein alignment,
+    sys_exit("""This is a Python script for 'back-translating' a protein alignment,
 
 It requires three, four or five arguments:
 - alignment format (e.g. fasta, clustal),
@@ -185,7 +184,7 @@
 try:
     table = int(table)
 except:
-    stop_err("Bad table argument %r" % table)
+    sys_exit("Bad table argument %r" % table)
 
 prot_align = AlignIO.read(prot_align_file, align_format, alphabet=generic_protein)
 nuc_dict = SeqIO.index(nuc_fasta_file, "fasta")
--- a/tools/align_back_trans/align_back_trans.xml	Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/align_back_trans.xml	Wed Aug 05 10:52:56 2015 -0400
@@ -1,18 +1,18 @@
-<tool id="align_back_trans" name="Thread nucleotides onto a protein alignment (back-translation)" version="0.0.4">
+<tool id="align_back_trans" name="Thread nucleotides onto a protein alignment (back-translation)" version="0.0.6">
     <description>Gives a codon aware alignment</description>
     <requirements>
         <requirement type="package" version="1.63">biopython</requirement>
         <requirement type="python-module">Bio</requirement>
     </requirements>
-    <version_command interpreter="python">align_back_trans.py --version</version_command>
-    <command interpreter="python">
-align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
-    </command>
     <stdio>
         <!-- Anything other than zero is an error -->
         <exit_code range="1:" />
         <exit_code range=":-1" />
     </stdio>
+    <version_command interpreter="python">align_back_trans.py --version</version_command>
+    <command interpreter="python">
+align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
+    </command>
     <inputs>
         <param name="prot_align" type="data" format="fasta,muscle,clustal" label="Aligned protein file" help="Mutliple sequence file in FASTA, ClustalW or PHYLIP format." />
         <param name="table" type="select" label="Genetic code" help="Tables from the NCBI, these determine the start and stop codons">
@@ -38,7 +38,7 @@
         <param name="nuc_file" type="data" format="fasta" label="Unaligned nucleotide sequences" help="FASTA format, using same identifiers as your protein alignment" />
     </inputs>
     <outputs>
-        <data name="out_nuc_align" format="input" metadata_source="prot_align" label="${prot_align.name} (back-translated)"/>
+        <data name="out_nuc_align" format_source="prot_align" metadata_source="prot_align" label="${prot_align.name} (back-translated)"/>
     </outputs>
     <tests>
         <test>
@@ -122,4 +122,8 @@
 This tool is available to install into other Galaxy Instances via the Galaxy
 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
     </help>
+    <citations>
+        <citation type="doi">10.7717/peerj.167</citation>
+        <citation type="doi">10.1093/bioinformatics/btp163</citation>
+    </citations>
 </tool>
--- a/tools/align_back_trans/tool_dependencies.xml	Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/tool_dependencies.xml	Wed Aug 05 10:52:56 2015 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="biopython" version="1.63">
-        <repository changeset_revision="a5c49b83e983" name="package_biopython_1_63" owner="biopython" toolshed="http://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="a5c49b83e983" name="package_biopython_1_63" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>