Mercurial > repos > iuc > get_hrun
annotate get_hrun.py @ 0:84f70ce0b830 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
author | iuc |
---|---|
date | Tue, 02 Mar 2021 21:35:40 +0000 |
parents | |
children |
rev | line source |
---|---|
0
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
2 import argparse |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
3 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
4 import vcf |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
5 from pyfaidx import Fasta |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
6 from vcf.parser import _Info as VcfInfo |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
7 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
8 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser(description='Generate report tables') |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
10 parser.add_argument("--reference", |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
11 required=True, |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
12 help="Filepath to reference FASTA file") |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
13 parser.add_argument("--in-vcf", |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
14 required=True, |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
15 help="Filepath to vcf file to be analyzed") |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
16 parser.add_argument("--out-vcf", |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
17 required=True, |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
18 help="Filepath to vcf file to be output") |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
19 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
20 args = parser.parse_args() |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
21 ref_path = args.reference |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
22 reference = Fasta(ref_path, sequence_always_upper=True, read_ahead=1000) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
23 in_vcf_path = args.in_vcf |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
24 in_vcf_handle = open(in_vcf_path) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
25 in_vcf = vcf.Reader(in_vcf_handle) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
26 in_vcf.infos['HRUN'] = VcfInfo( |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
27 'HRUN', 1, 'Integer', |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
28 'Homopolymer length to the right of report indel position', |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
29 "get_hrun", "1.0") |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
30 out_vcf_path = args.out_vcf |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
31 out_vcf_handle = open(out_vcf_path, 'w') |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
32 out_vcf = vcf.Writer(out_vcf_handle, in_vcf) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
33 for record in in_vcf: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
34 chrom = record.CHROM |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
35 pos = record.POS - 1 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
36 ref = record.REF |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
37 calc_hrun = False |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
38 for alt in record.ALT: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
39 if len(ref) != len(alt): |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
40 calc_hrun = True |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
41 if calc_hrun: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
42 window = 50 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
43 hrun = 1 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
44 start = pos + 2 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
45 end = start + window |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
46 base = reference[chrom][pos + 1] |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
47 seq_len = len(reference[chrom]) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
48 for i in range(start, len(reference)): |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
49 base2 = reference[chrom][i] |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
50 if base == base2: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
51 hrun += 1 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
52 else: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
53 break |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
54 # Extend to left in case not left aligned |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
55 for i in range(pos, -1, -1): |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
56 if reference[chrom][i] == base: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
57 hrun += 1 |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
58 else: |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
59 break |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
60 record.add_info('HRUN', [hrun]) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
61 out_vcf.write_record(record) |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
62 in_vcf_handle.close() |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
63 out_vcf.close() |
84f70ce0b830
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/get_hrun commit 5810d89666698dbe49ef17c334799fce76823621"
iuc
parents:
diff
changeset
|
64 out_vcf_handle.close() |