annotate fasta_header_converter.py @ 1:fd85bf67c4bb draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 6a1dfb73f2172182096724ce3bee400287fd9c2f-dirty
author earlhaminst
date Tue, 20 Dec 2016 17:17:56 -0500
parents 4f9e5110914b
children dd268de3a107
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
1 from __future__ import print_function
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
2
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
3 import json
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
4 import optparse
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
5
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
6
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
7 def read_gene_info(gene_info):
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
8 transcript_species_dict = dict()
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
9 for gene_dict in gene_info.values():
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
10 for transcript in gene_dict['Transcript']:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
11 transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "")
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
12 return transcript_species_dict
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
13
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
14
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
15 parser = optparse.OptionParser()
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
16 parser.add_option('-j', '--json', dest="input_gene_filename",
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
17 help='Gene feature information in JSON format')
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
18 parser.add_option('-f', '--fasta', dest="input_fasta_filename",
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
19 help='Sequences in FASTA format')
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
20 options, args = parser.parse_args()
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
21
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
22 if options.input_gene_filename is None:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
23 raise Exception('-j option must be specified')
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
24
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
25 if options.input_fasta_filename is None:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
26 raise Exception('-f option must be specified')
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
27
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
28 with open(options.input_gene_filename) as json_fh:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
29 gene_info = json.load(json_fh)
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
30 transcript_species_dict = read_gene_info(gene_info)
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
31
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
32 with open(options.input_fasta_filename) as fasta_fh:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
33 for line in fasta_fh:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
34 line = line.rstrip()
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
35 if line.startswith(">"):
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
36 name = line[1:].lstrip()
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
37 print(">" + name + "_" + transcript_species_dict[name])
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
38 else:
4f9e5110914b planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff changeset
39 print(line)