Galaxy |

Changeset 3:dd268de3a107 (2017-03-03)

Previous changeset 2:7ea4df039a53 (2017-02-22) Next changeset 4:66170848da6c (2017-03-15)

Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8

modified:
fasta_header_converter.py
fasta_header_converter.xml

diff -r 7ea4df039a53 -r dd268de3a107 fasta_header_converter.py
--- a/fasta_header_converter.py Wed Feb 22 05:48:02 2017 -0500
+++ b/fasta_header_converter.py Fri Mar 03 07:22:53 2017 -0500

[

@@ -1,7 +1,28 @@
from __future__ import print_function

+import collections
import json
import optparse
+import sys
+
+Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
+
+
+def FASTAReader_gen(fasta_filename):
+    with open(fasta_filename) as fasta_file:
+        line = fasta_file.readline()
+        while True:
+            if not line:
+                return
+            assert line.startswith('>'), "FASTA headers must start with >"
+            header = line.rstrip()
+            sequence_parts = []
+            line = fasta_file.readline()
+            while line and line[0] != '>':
+                sequence_parts.append(line.rstrip())
+                line = fasta_file.readline()
+            sequence = "\n".join(sequence_parts)
+            yield Sequence(header, sequence)

def read_gene_info(gene_info):
@@ -17,23 +38,25 @@
                   help='Gene feature information in JSON format')
parser.add_option('-f', '--fasta', dest="input_fasta_filename",
                   help='Sequences in FASTA format')
+parser.add_option('-o', '--output', dest="output_fasta_filename",
+                  help='Output FASTA file name')
options, args = parser.parse_args()

if options.input_gene_filename is None:
     raise Exception('-j option must be specified')
-
if options.input_fasta_filename is None:
     raise Exception('-f option must be specified')
+if options.output_fasta_filename is None:
+    raise Exception('-o option must be specified')

with open(options.input_gene_filename) as json_fh:
     gene_info = json.load(json_fh)
transcript_species_dict = read_gene_info(gene_info)

-with open(options.input_fasta_filename) as fasta_fh:
-    for line in fasta_fh:
-        line = line.rstrip()
-        if line.startswith(">"):
-            name = line[1:].lstrip()
-            print(">" + name + "_" + transcript_species_dict[name])
-        else:
-            print(line)
+with open(options.output_fasta_filename, 'w') as output_fasta_file:
+    for entry in FASTAReader_gen(options.input_fasta_filename):
+        name = entry.header[1:].lstrip()
+        if name not in transcript_species_dict:
+            print("Transcript '%s' not found in the gene feature information" % name, file=sys.stderr)
+            continue
+        output_fasta_file.write(">%s_%s\n%s\n" % (name, transcript_species_dict[name], entry.sequence))

diff -r 7ea4df039a53 -r dd268de3a107 fasta_header_converter.xml
--- a/fasta_header_converter.xml Wed Feb 22 05:48:02 2017 -0500
+++ b/fasta_header_converter.xml Fri Mar 03 07:22:53 2017 -0500

[

@@ -1,11 +1,11 @@
<tool id="fasta_header_converter" name="FASTA header converter" version="0.1.1">
     <description>to append species information</description>
-    <command>
+    <command detect_errors="exit_code">
<![CDATA[
python '$__tool_directory__/fasta_header_converter.py'
-f '$fastaFile'
-j '$genesFile'
-> '$outputFile'
+-o '$outputFile'
]]>
     </command>
     <inputs>