annotate get_hrun.py @ 0:84f70ce0b830 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
author iuc
date Tue, 02 Mar 2021 21:35:40 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
2 import argparse
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
3
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
4 import vcf
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
5 from pyfaidx import Fasta
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
6 from vcf.parser import _Info as VcfInfo
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
7
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
8
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
9 parser = argparse.ArgumentParser(description='Generate report tables')
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
10 parser.add_argument("--reference",
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
11 required=True,
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
12 help="Filepath to reference FASTA file")
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
13 parser.add_argument("--in-vcf",
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
14 required=True,
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
15 help="Filepath to vcf file to be analyzed")
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
16 parser.add_argument("--out-vcf",
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
17 required=True,
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
18 help="Filepath to vcf file to be output")
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
19
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
20 args = parser.parse_args()
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
21 ref_path = args.reference
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
22 reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000)
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
23 in_vcf_path = args.in_vcf
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
24 in_vcf_handle = open(in_vcf_path)
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
25 in_vcf = vcf.Reader(in_vcf_handle)
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
26 in_vcf.infos['HRUN'] = VcfInfo(
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
27 'HRUN', 1, 'Integer',
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
28 'Homopolymer length to the right of report indel position',
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
29 "get_hrun", "1.0")
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
30 out_vcf_path = args.out_vcf
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
31 out_vcf_handle = open(out_vcf_path, 'w')
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
32 out_vcf = vcf.Writer(out_vcf_handle, in_vcf)
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
33 for record in in_vcf:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
34 chrom = record.CHROM
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
35 pos = record.POS - 1
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
36 ref = record.REF
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
37 calc_hrun = False
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
38 for alt in record.ALT:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
39 if len(ref) != len(alt):
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
40 calc_hrun = True
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
41 if calc_hrun:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
42 window = 50
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
43 hrun = 1
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
44 start = pos + 2
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
45 end = start + window
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
46 base = reference[chrom][pos + 1]
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
47 seq_len = len(reference[chrom])
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
48 for i in range(start, len(reference)):
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
49 base2 = reference[chrom][i]
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
50 if base == base2:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
51 hrun += 1
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
52 else:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
53 break
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
54 # Extend to left in case not left aligned
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
55 for i in range(pos, -1, -1):
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
56 if reference[chrom][i] == base:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
57 hrun += 1
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
58 else:
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
59 break
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
60 record.add_info('HRUN', [hrun])
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
61 out_vcf.write_record(record)
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
62 in_vcf_handle.close()
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
63 out_vcf.close()
84f70ce0b830 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff changeset
64 out_vcf_handle.close()