Mercurial > repos > iuc > snpeff
annotate gbk2fa.py @ 30:c7275bd8b4d6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 280d5247becaa9a70db9c5c4a2f1fc50d302838a
author | iuc |
---|---|
date | Mon, 18 Nov 2024 22:15:34 +0000 |
parents | ca2b512e8d7c |
children |
rev | line source |
---|---|
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
1 import argparse |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
2 import bz2 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
3 import gzip |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
4 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
5 from Bio import SeqIO |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
6 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
7 |
25
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
8 def get_opener(gbk_filename): |
29
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
9 """Determines the appropriate opener for a given file, supporting |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
10 bzip2, gzip, or standard open. |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
11 """ |
25
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
12 try: |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
13 bz2.open(gbk_filename).read(1) |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
14 return bz2.open |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
15 except OSError: |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
16 pass |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
17 try: |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
18 gzip.open(gbk_filename).read(1) |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
19 return gzip.open |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
20 except OSError: |
5c7b70713fb5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents:
24
diff
changeset
|
21 return open |
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
22 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
23 |
29
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
24 def main(): |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
25 parser = argparse.ArgumentParser( |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
26 description="Convert GenBank files to FASTA format. " |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
27 "Supports gzip and bzip2 compressed files." |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
28 ) |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
29 parser.add_argument( |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
30 "genbank_file", |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
31 help="GenBank input file. Can be compressed with gzip or bzip2" |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
32 ) |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
33 parser.add_argument( |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
34 "fasta_file", |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
35 help="FASTA output dataset" |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
36 ) |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
37 parser.add_argument( |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
38 "--remove_version", action="store_true", |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
39 help="Remove version number from NCBI formatted accession numbers. " |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
40 "For example, this converts 'B000657.2' to 'B000657'." |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
41 ) |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
42 args = parser.parse_args() |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
43 |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
44 gbk_open = get_opener(args.genbank_file) |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
45 with gbk_open(args.genbank_file, 'rt') as input_handle, \ |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
46 open(args.fasta_file, 'w') as output_handle: |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
47 for seq_record in SeqIO.parse(input_handle, 'genbank'): |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
48 if args.remove_version: |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
49 seq_id = seq_record.id.split('.')[0] |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
50 else: |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
51 seq_id = seq_record.id |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
52 print(f'Writing FASTA record: {seq_id}') |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
53 output_handle.write(f'>{seq_id}\n') |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
54 output_handle.write(f'{seq_record.seq}\n') |
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
55 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
56 |
29
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
57 if __name__ == "__main__": |
ca2b512e8d7c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
iuc
parents:
25
diff
changeset
|
58 main() |