Mercurial > repos > earlhaminst > treebest_best
changeset 3:dd268de3a107 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
author | earlhaminst |
---|---|
date | Fri, 03 Mar 2017 07:22:53 -0500 |
parents | 7ea4df039a53 |
children | 66170848da6c |
files | fasta_header_converter.py fasta_header_converter.xml |
diffstat | 2 files changed, 34 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/fasta_header_converter.py Wed Feb 22 05:48:02 2017 -0500 +++ b/fasta_header_converter.py Fri Mar 03 07:22:53 2017 -0500 @@ -1,7 +1,28 @@ from __future__ import print_function +import collections import json import optparse +import sys + +Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) + + +def FASTAReader_gen(fasta_filename): + with open(fasta_filename) as fasta_file: + line = fasta_file.readline() + while True: + if not line: + return + assert line.startswith('>'), "FASTA headers must start with >" + header = line.rstrip() + sequence_parts = [] + line = fasta_file.readline() + while line and line[0] != '>': + sequence_parts.append(line.rstrip()) + line = fasta_file.readline() + sequence = "\n".join(sequence_parts) + yield Sequence(header, sequence) def read_gene_info(gene_info): @@ -17,23 +38,25 @@ help='Gene feature information in JSON format') parser.add_option('-f', '--fasta', dest="input_fasta_filename", help='Sequences in FASTA format') +parser.add_option('-o', '--output', dest="output_fasta_filename", + help='Output FASTA file name') options, args = parser.parse_args() if options.input_gene_filename is None: raise Exception('-j option must be specified') - if options.input_fasta_filename is None: raise Exception('-f option must be specified') +if options.output_fasta_filename is None: + raise Exception('-o option must be specified') with open(options.input_gene_filename) as json_fh: gene_info = json.load(json_fh) transcript_species_dict = read_gene_info(gene_info) -with open(options.input_fasta_filename) as fasta_fh: - for line in fasta_fh: - line = line.rstrip() - if line.startswith(">"): - name = line[1:].lstrip() - print(">" + name + "_" + transcript_species_dict[name]) - else: - print(line) +with open(options.output_fasta_filename, 'w') as output_fasta_file: + for entry in FASTAReader_gen(options.input_fasta_filename): + name = entry.header[1:].lstrip() + if name not in transcript_species_dict: + print("Transcript '%s' not found in the gene feature information" % name, file=sys.stderr) + continue + output_fasta_file.write(">%s_%s\n%s\n" % (name, transcript_species_dict[name], entry.sequence))
--- a/fasta_header_converter.xml Wed Feb 22 05:48:02 2017 -0500 +++ b/fasta_header_converter.xml Fri Mar 03 07:22:53 2017 -0500 @@ -1,11 +1,11 @@ <tool id="fasta_header_converter" name="FASTA header converter" version="0.1.1"> <description>to append species information</description> - <command> + <command detect_errors="exit_code"> <![CDATA[ python '$__tool_directory__/fasta_header_converter.py' -f '$fastaFile' -j '$genesFile' -> '$outputFile' +-o '$outputFile' ]]> </command> <inputs>