Mercurial > repos > earlhaminst > treebest_best
annotate fasta_header_converter.py @ 0:4f9e5110914b draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
| author | earlhaminst |
|---|---|
| date | Tue, 20 Dec 2016 16:32:25 -0500 |
| parents | |
| children | dd268de3a107 |
| rev | line source |
|---|---|
|
0
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
1 from __future__ import print_function |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
2 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
3 import json |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
4 import optparse |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
5 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
6 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
7 def read_gene_info(gene_info): |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
8 transcript_species_dict = dict() |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
9 for gene_dict in gene_info.values(): |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
10 for transcript in gene_dict['Transcript']: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
11 transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "") |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
12 return transcript_species_dict |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
13 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
14 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
15 parser = optparse.OptionParser() |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
16 parser.add_option('-j', '--json', dest="input_gene_filename", |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
17 help='Gene feature information in JSON format') |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
18 parser.add_option('-f', '--fasta', dest="input_fasta_filename", |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
19 help='Sequences in FASTA format') |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
20 options, args = parser.parse_args() |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
21 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
22 if options.input_gene_filename is None: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
23 raise Exception('-j option must be specified') |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
24 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
25 if options.input_fasta_filename is None: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
26 raise Exception('-f option must be specified') |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
27 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
28 with open(options.input_gene_filename) as json_fh: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
29 gene_info = json.load(json_fh) |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
30 transcript_species_dict = read_gene_info(gene_info) |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
31 |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
32 with open(options.input_fasta_filename) as fasta_fh: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
33 for line in fasta_fh: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
34 line = line.rstrip() |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
35 if line.startswith(">"): |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
36 name = line[1:].lstrip() |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
37 print(">" + name + "_" + transcript_species_dict[name]) |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
38 else: |
|
4f9e5110914b
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
earlhaminst
parents:
diff
changeset
|
39 print(line) |
