# HG changeset patch
# User peterjc
# Date 1438786376 14400
# Node ID 9fbf29a8c12b128ac2af92ed26e8aec645e76b65
# Parent ec202446408a48b6d7c9843642aa4212acc1692c
v0.0.6 use format_source; v0.0.5 more explicit error msg, citation info
diff -r ec202446408a -r 9fbf29a8c12b tools/align_back_trans/README.rst
--- a/tools/align_back_trans/README.rst Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/README.rst Wed Aug 05 10:52:56 2015 -0400
@@ -1,7 +1,7 @@
Galaxy tool to back-translate a protein alignment to nucleotides
================================================================
-This tool is copyright 2012-2014 by Peter Cock, The James Hutton Institute
+This tool is copyright 2012-2015 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
See the licence text below (MIT licence).
@@ -43,11 +43,12 @@
-You will also need to install Biopython 1.62 or later. If you want to run
-the unit tests, include this line in ``tools_conf.xml.sample`` and the sample
-FASTA files under the ``test-data`` directory. Then::
+You will also need to install Biopython 1.62 or later.
- ./run_functional_tests.sh -id align_back_trans
+If you wish to run the unit tests, also move/copy the ``test-data/`` files
+under Galaxy's ``test-data/`` folder. Then::
+
+ ./run_tests.sh -id align_back_trans
That's it.
@@ -63,6 +64,11 @@
v0.0.3 - First official release
v0.0.4 - Simplified XML to apply input format to output data.
- Fixed error message when sequence length not a multiple of three.
+v0.0.5 - More explicit error messages when seqences lengths do not match.
+ - Tool definition now embeds citation information.
+v0.0.6 - Reorder XML elements (internal change only).
+ - Use ``format_source=...`` tag.
+ - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
======= ======================================================================
@@ -75,22 +81,31 @@
With the addition of a Galaxy wrapper, developement moved here:
https://github.com/peterjc/pico_galaxy/tree/master/tools/align_back_trans
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+ $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/align_back_trans/
+ ...
+
+or::
- $ tar -czf align_back_trans.tar.gz tools/align_back_trans/README.rst tools/align_back_trans/align_back_trans.py tools/align_back_trans/align_back_trans.xml tools/align_back_trans/tool_dependencies.xml test-data/demo_nucs.fasta test-data/demo_nucs_trailing_stop.fasta test-data/demo_prot_align.fasta test-data/demo_nuc_align.fasta
+ $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/align_back_trans/
+ ...
+
+To just build and check the tar ball, use::
-Check this worked::
-
- $ tar -tzf align_back_trans.tar.gz
+ $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/align_back_trans/
+ ...
+ $ tar -tzf shed_upload.tar.gz
+ test-data/demo_nucs.fasta
+ test-data/demo_nucs_trailing_stop.fasta
+ test-data/demo_prot_align.fasta
+ test-data/demo_nuc_align.fasta
tools/align_back_trans/README.rst
tools/align_back_trans/align_back_trans.py
tools/align_back_trans/align_back_trans.xml
tools/align_back_trans/tool_dependencies.xml
- test-data/demo_nucs.fasta
- test-data/demo_nucs_trailing_stop.fasta
- test-data/demo_prot_align.fasta
- test-data/demo_nuc_align.fasta
Licence (MIT)
diff -r ec202446408a -r 9fbf29a8c12b tools/align_back_trans/align_back_trans.py
--- a/tools/align_back_trans/align_back_trans.py Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/align_back_trans.py Wed Aug 05 10:52:56 2015 -0400
@@ -15,8 +15,6 @@
* http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
See accompanying text file for licence details (MIT licence).
-
-This is version 0.0.3 of the script.
"""
import sys
@@ -28,10 +26,10 @@
from Bio.Data.CodonTable import ambiguous_generic_by_id
if "-v" in sys.argv or "--version" in sys.argv:
- print "v0.0.4"
+ print "v0.0.5"
sys.exit(0)
-def stop_err(msg, error_level=1):
+def sys_exit(msg, error_level=1):
"""Print error message to stdout and quit with given error level."""
sys.stderr.write("%s\n" % msg)
sys.exit(error_level)
@@ -39,7 +37,7 @@
def check_trans(identifier, nuc, prot, table):
"""Returns nucleotide sequence if works (can remove trailing stop)"""
if len(nuc) % 3:
- stop_err("Nucleotide sequence for %s is length %i (not a multiple of three)"
+ sys_exit("Nucleotide sequence for %s is length %i (not a multiple of three)"
% (identifier, len(nuc)))
p = str(prot).upper().replace("*", "X")
@@ -49,19 +47,20 @@
#Allow this...
t = t[:-1]
nuc = nuc[:-3] #edit return value
- if len(t) != len(p) and p in t:
- stop_err("%s translation matched but only as subset of nucleotides, "
- "wrong start codon?" % identifier)
- if len(t) != len(p) and p[1:] in t:
- stop_err("%s translation matched (ignoring first base) but only "
- "as subset of nucleotides, wrong start codon?" % identifier)
if len(t) != len(p):
- stop_err("Inconsistent lengths for %s, ungapped protein %i, "
- "tripled %i vs ungapped nucleotide %i" %
- (identifier,
- len(p),
- len(p) * 3,
- len(nuc)))
+ err = ("Inconsistent lengths for %s, ungapped protein %i, "
+ "tripled %i vs ungapped nucleotide %i." %
+ (identifier, len(p), len(p) * 3, len(nuc)))
+ if t.endswith(p):
+ err += "\nThere are %i extra nucleotides at the start." % (len(t) - len(p))
+ elif t.startswith(p):
+ err += "\nThere are %i extra nucleotides at the end." % (len(t) - len(p))
+ elif p in t:
+ #TODO - Calculate and report the number to trim at start and end?
+ err += "\nHowever, protein sequence found within translated nucleotides."
+ elif p[1:] in t:
+ err += "\nHowever, ignoring first amino acid, protein sequence found within translated nucleotides."
+ sys_exit(err)
if t == p:
@@ -71,7 +70,7 @@
if str(nuc[0:3]).upper() in ambiguous_generic_by_id[table].start_codons:
return nuc
else:
- stop_err("Translation check failed for %s\n"
+ sys_exit("Translation check failed for %s\n"
"Would match if %s was a start codon (check correct table used)\n"
% (identifier, nuc[0:3].upper()))
else:
@@ -86,7 +85,7 @@
sys.stderr.write("Protein: %s\n" % p[offset:offset+60])
sys.stderr.write(" %s\n" % m[offset:offset+60])
sys.stderr.write("Translation: %s\n\n" % t[offset:offset+60])
- stop_err("Translation check failed for %s\n" % identifier)
+ sys_exit("Translation check failed for %s\n" % identifier)
def sequence_back_translate(aligned_protein_record, unaligned_nucleotide_record, gap, table=0):
#TODO - Separate arguments for protein gap and nucleotide gap?
@@ -107,7 +106,7 @@
if table:
ungapped_nucleotide = check_trans(aligned_protein_record.id, ungapped_nucleotide, ungapped_protein, table)
elif len(ungapped_protein) * 3 != len(ungapped_nucleotide):
- stop_err("Inconsistent lengths for %s, ungapped protein %i, "
+ sys_exit("Inconsistent lengths for %s, ungapped protein %i, "
"tripled %i vs ungapped nucleotide %i" %
(aligned_protein_record.id,
len(ungapped_protein),
@@ -160,7 +159,7 @@
elif len(sys.argv) == 6:
align_format, prot_align_file, nuc_fasta_file, nuc_align_file, table = sys.argv[1:]
else:
- stop_err("""This is a Python script for 'back-translating' a protein alignment,
+ sys_exit("""This is a Python script for 'back-translating' a protein alignment,
It requires three, four or five arguments:
- alignment format (e.g. fasta, clustal),
@@ -185,7 +184,7 @@
try:
table = int(table)
except:
- stop_err("Bad table argument %r" % table)
+ sys_exit("Bad table argument %r" % table)
prot_align = AlignIO.read(prot_align_file, align_format, alphabet=generic_protein)
nuc_dict = SeqIO.index(nuc_fasta_file, "fasta")
diff -r ec202446408a -r 9fbf29a8c12b tools/align_back_trans/align_back_trans.xml
--- a/tools/align_back_trans/align_back_trans.xml Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/align_back_trans.xml Wed Aug 05 10:52:56 2015 -0400
@@ -1,18 +1,18 @@
-
+
Gives a codon aware alignment
biopython
Bio
- align_back_trans.py --version
-
-align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
-
+ align_back_trans.py --version
+
+align_back_trans.py $prot_align.ext "$prot_align" "$nuc_file" "$out_nuc_align" "$table"
+
@@ -38,7 +38,7 @@
-
+
@@ -122,4 +122,8 @@
This tool is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/align_back_trans
+
+ 10.7717/peerj.167
+ 10.1093/bioinformatics/btp163
+
diff -r ec202446408a -r 9fbf29a8c12b tools/align_back_trans/tool_dependencies.xml
--- a/tools/align_back_trans/tool_dependencies.xml Wed Jun 04 08:42:23 2014 -0400
+++ b/tools/align_back_trans/tool_dependencies.xml Wed Aug 05 10:52:56 2015 -0400
@@ -1,6 +1,6 @@
-
+