Mercurial > repos > earlhaminst > treebest_best
view fasta_header_converter.py @ 2:7ea4df039a53 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 978a8efb9969646b97873d69348971860f2793f5
author | earlhaminst |
---|---|
date | Wed, 22 Feb 2017 05:48:02 -0500 |
parents | 4f9e5110914b |
children | dd268de3a107 |
line wrap: on
line source
from __future__ import print_function import json import optparse def read_gene_info(gene_info): transcript_species_dict = dict() for gene_dict in gene_info.values(): for transcript in gene_dict['Transcript']: transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "") return transcript_species_dict parser = optparse.OptionParser() parser.add_option('-j', '--json', dest="input_gene_filename", help='Gene feature information in JSON format') parser.add_option('-f', '--fasta', dest="input_fasta_filename", help='Sequences in FASTA format') options, args = parser.parse_args() if options.input_gene_filename is None: raise Exception('-j option must be specified') if options.input_fasta_filename is None: raise Exception('-f option must be specified') with open(options.input_gene_filename) as json_fh: gene_info = json.load(json_fh) transcript_species_dict = read_gene_info(gene_info) with open(options.input_fasta_filename) as fasta_fh: for line in fasta_fh: line = line.rstrip() if line.startswith(">"): name = line[1:].lstrip() print(">" + name + "_" + transcript_species_dict[name]) else: print(line)