changeset 2:6901298ac16c

Migrated tool version 0.0.5 from old tool shed archive to new tool shed repository
author peterjc
date Tue, 07 Jun 2011 18:04:39 -0400
parents 3ff1dcbb9440
children f3b373a41f81
files tools/protein_analysis/README tools/protein_analysis/suite_config.xml tools/protein_analysis/tmhmm2.py tools/protein_analysis/tmhmm2.xml
diffstat 4 files changed, 14 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/tools/protein_analysis/README	Tue Jun 07 18:04:05 2011 -0400
+++ b/tools/protein_analysis/README	Tue Jun 07 18:04:39 2011 -0400
@@ -70,6 +70,8 @@
        - Renamed test output file to use Galaxy convention of *.tabular
 v0.0.3 - Check for tmhmm2 silent failures (no output)
        - Additional unit tests
+v0.0.4 - Ignore comment lines in tmhmm2 output.
+v0.0.5 - Explicitly request tmhmm short output (may not be the default)
 
 Developers
 ==========
--- a/tools/protein_analysis/suite_config.xml	Tue Jun 07 18:04:05 2011 -0400
+++ b/tools/protein_analysis/suite_config.xml	Tue Jun 07 18:04:39 2011 -0400
@@ -1,6 +1,6 @@
-    <suite id="tmhmm_and_signalp" name="TMHMM and SignalP" version="0.0.3">
+    <suite id="tmhmm_and_signalp" name="TMHMM and SignalP" version="0.0.5">
         <description>Wrappers for TMHMM and SignalP</description>
-        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.3">
+        <tool id="tmhmm2" name="TMHMM 2.0" version="0.0.5">
             <description>Find transmembrane domains in protein sequences</description>
         </tool>
         <tool id="signalp3" name="SignalP 3.0" version="0.0.3">
--- a/tools/protein_analysis/tmhmm2.py	Tue Jun 07 18:04:05 2011 -0400
+++ b/tools/protein_analysis/tmhmm2.py	Tue Jun 07 18:04:39 2011 -0400
@@ -6,14 +6,17 @@
 v2.0 program (not the webservice) requesting the short output (one line per
 protein).
 
-First major feature is cleaning up the tabular output. The raw output from
-TMHMM v2.0 looks like this (six columns tab separated):
+The first major feature is cleaning up the tabular output. The short form raw
+output from TMHMM v2.0 looks like this (six columns tab separated):
 
  gi|2781234|pdb|1JLY|B	len=304 ExpAA=0.01	First60=0.00	PredHel=0	Topology=o
  gi|4959044|gb|AAD34209.1|AF069992_1	len=600	ExpAA=0.00	First60=0.00	PredHel=0	Topology=o
  gi|671626|emb|CAA85685.1|	len=473 ExpAA=0.19	First60=0.00 PredHel=0	Topology=o
  gi|3298468|dbj|BAA31520.1|	len=107	ExpAA=59.37	First60=31.17	PredHel=3	Topology=o23-45i52-74o89-106i
 
+If there are any additional 'comment' lines starting with the hash (#)
+character these are ignored by this script.
+
 In order to make it easier to use in Galaxy, this wrapper script simplifies
 this to remove the redundant tags, and instead adds a comment line at the
 top with the column names:
@@ -55,7 +58,8 @@
     """Clean up tabular TMHMM output, returns output line count."""
     count = 0
     for line in raw_handle:
-        if not line:
+        if not line.strip() or line.startswith("#"):
+            #Ignore any blank lines or comment lines
             continue
         parts = line.rstrip("\r\n").split("\t")
         try:
@@ -82,7 +86,7 @@
 #split_fasta returns an empty list (i.e. zero temp files).
 fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK)
 temp_files = [f+".out" for f in fasta_files]
-jobs = ["tmhmm %s > %s" % (fasta, temp)
+jobs = ["tmhmm -short %s > %s" % (fasta, temp)
         for fasta, temp in zip(fasta_files, temp_files)]
 
 def clean_up(file_list):
--- a/tools/protein_analysis/tmhmm2.xml	Tue Jun 07 18:04:05 2011 -0400
+++ b/tools/protein_analysis/tmhmm2.xml	Tue Jun 07 18:04:39 2011 -0400
@@ -1,4 +1,4 @@
-<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.3">
+<tool id="tmhmm2" name="TMHMM 2.0" version="0.0.5">
     <description>Find transmembrane domains in protein sequences</description>
     <command interpreter="python">
       tmhmm2.py 8 $fasta_file $tabular_file
@@ -52,7 +52,7 @@
 
 **Notes**
 
-The raw output from TMHMM v2.0 looks like this (six columns tab separated):
+The short format output from TMHMM v2.0 looks like this (six columns tab separated, shown here as a table):
 
 =================================== ======= =========== ============= ========= =============================
 gi|2781234|pdb|1JLY|B               len=304 ExpAA=0.01  First60=0.00  PredHel=0 Topology=o