comparison fasta_header_converter.py @ 0:4f9e5110914b draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
author earlhaminst
date Tue, 20 Dec 2016 16:32:25 -0500
parents
children dd268de3a107
comparison
equal deleted inserted replaced
-1:000000000000 0:4f9e5110914b
1 from __future__ import print_function
2
3 import json
4 import optparse
5
6
7 def read_gene_info(gene_info):
8 transcript_species_dict = dict()
9 for gene_dict in gene_info.values():
10 for transcript in gene_dict['Transcript']:
11 transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "")
12 return transcript_species_dict
13
14
15 parser = optparse.OptionParser()
16 parser.add_option('-j', '--json', dest="input_gene_filename",
17 help='Gene feature information in JSON format')
18 parser.add_option('-f', '--fasta', dest="input_fasta_filename",
19 help='Sequences in FASTA format')
20 options, args = parser.parse_args()
21
22 if options.input_gene_filename is None:
23 raise Exception('-j option must be specified')
24
25 if options.input_fasta_filename is None:
26 raise Exception('-f option must be specified')
27
28 with open(options.input_gene_filename) as json_fh:
29 gene_info = json.load(json_fh)
30 transcript_species_dict = read_gene_info(gene_info)
31
32 with open(options.input_fasta_filename) as fasta_fh:
33 for line in fasta_fh:
34 line = line.rstrip()
35 if line.startswith(">"):
36 name = line[1:].lstrip()
37 print(">" + name + "_" + transcript_species_dict[name])
38 else:
39 print(line)