comparison mut2sscs.py @ 2:3f1dbd2c59bf draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
author iuc
date Tue, 10 Nov 2020 12:55:29 +0000
parents 3556001ff2db
children
comparison
equal deleted inserted replaced
1:3556001ff2db 2:3f1dbd2c59bf
9 and extracts all tags of reads that carry the mutation. 9 and extracts all tags of reads that carry the mutation.
10 Calculates statistics about number of ab/ba/duplex per mutation. 10 Calculates statistics about number of ab/ba/duplex per mutation.
11 11
12 ======= ========== ================= ================================ 12 ======= ========== ================= ================================
13 Version Date Author Description 13 Version Date Author Description
14 0.2.1 2019-10-27 Gundula Povysil - 14 2.0.0 2020-10-30 Gundula Povysil -
15 ======= ========== ================= ================================ 15 ======= ========== ================= ================================
16 16
17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json 17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json
18 18
19 """ 19 """
23 import argparse 23 import argparse
24 import json 24 import json
25 import os 25 import os
26 import sys 26 import sys
27 27
28 import numpy as np
29 import pysam 28 import pysam
29 from cyvcf2 import VCF
30 30
31 31
32 def make_argparser(): 32 def make_argparser():
33 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') 33 parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
34 parser.add_argument('--mutFile', 34 parser.add_argument('--mutFile',
35 help='TABULAR file with DCS mutations.') 35 help='VCR file with DCS mutations.')
36 parser.add_argument('--bamFile', 36 parser.add_argument('--bamFile',
37 help='BAM file with aligned SSCS reads.') 37 help='BAM file with aligned SSCS reads.')
38 parser.add_argument('--outputJson', 38 parser.add_argument('--outputJson',
39 help='Output JSON file to store SSCS counts.') 39 help='Output JSON file to store SSCS counts.')
40 return parser 40 return parser
52 sys.exit("Error: Could not find '{}'".format(file1)) 52 sys.exit("Error: Could not find '{}'".format(file1))
53 53
54 if os.path.isfile(file2) is False: 54 if os.path.isfile(file2) is False:
55 sys.exit("Error: Could not find '{}'".format(file2)) 55 sys.exit("Error: Could not find '{}'".format(file2))
56 56
57 # 1. read mut file 57 # read SSCS bam file
58 with open(file1, 'r') as mut:
59 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
60
61 # 2 read SSCS bam file
62 # pysam.index(file2)
63 bam = pysam.AlignmentFile(file2, "rb") 58 bam = pysam.AlignmentFile(file2, "rb")
64 59
65 # get tags 60 # get tags
66 mut_pos_dict = {} 61 mut_pos_dict = {}
67 ref_pos_dict = {} 62 ref_pos_dict = {}
68 if len(mut_array) == 13:
69 mut_array = mut_array.reshape((1, len(mut_array)))
70 63
71 for m in range(0, len(mut_array[:, 0])): 64 for variant in VCF(file1):
72 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) 65 chrom = variant.CHROM
73 chrom = mut_array[m, 1] 66 stop_pos = variant.start
74 stop_pos = mut_array[m, 2].astype(int)
75 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) 67 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
76 ref = mut_array[m, 9] 68 ref = variant.REF
77 alt = mut_array[m, 10] 69 alt = variant.ALT[0]
78 70
79 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): 71 if len(ref) == len(alt):
80 if pileupcolumn.reference_pos == stop_pos - 1: 72 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
81 count_alt = 0 73 if pileupcolumn.reference_pos == stop_pos:
82 count_ref = 0 74 count_alt = 0
83 count_indel = 0 75 count_ref = 0
84 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), 76 count_indel = 0
85 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) 77 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
86 for pileupread in pileupcolumn.pileups: 78 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
87 if not pileupread.is_del and not pileupread.is_refskip: 79 for pileupread in pileupcolumn.pileups:
88 tag = pileupread.alignment.query_name 80 if not pileupread.is_del and not pileupread.is_refskip:
89 abba = tag[-2:] 81 tag = pileupread.alignment.query_name
90 # query position is None if is_del or is_refskip is set. 82 abba = tag[-2:]
91 if pileupread.alignment.query_sequence[pileupread.query_position] == alt: 83 # query position is None if is_del or is_refskip is set.
92 count_alt += 1 84 if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
93 if chrom_stop_pos in mut_pos_dict: 85 count_alt += 1
94 if abba in mut_pos_dict[chrom_stop_pos]: 86 if chrom_stop_pos in mut_pos_dict:
95 mut_pos_dict[chrom_stop_pos][abba] += 1 87 if abba in mut_pos_dict[chrom_stop_pos]:
88 mut_pos_dict[chrom_stop_pos][abba] += 1
89 else:
90 mut_pos_dict[chrom_stop_pos][abba] = 1
96 else: 91 else:
92 mut_pos_dict[chrom_stop_pos] = {}
97 mut_pos_dict[chrom_stop_pos][abba] = 1 93 mut_pos_dict[chrom_stop_pos][abba] = 1
98 else: 94 if chrom_stop_pos not in ref_pos_dict:
99 mut_pos_dict[chrom_stop_pos] = {} 95 ref_pos_dict[chrom_stop_pos] = {}
100 mut_pos_dict[chrom_stop_pos][abba] = 1 96 ref_pos_dict[chrom_stop_pos][abba] = 0
101 elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: 97
102 count_ref += 1 98 elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
103 if chrom_stop_pos in ref_pos_dict: 99 count_ref += 1
104 if abba in ref_pos_dict[chrom_stop_pos]: 100 if chrom_stop_pos in ref_pos_dict:
105 ref_pos_dict[chrom_stop_pos][abba] += 1 101 if abba in ref_pos_dict[chrom_stop_pos]:
102 ref_pos_dict[chrom_stop_pos][abba] += 1
103 else:
104 ref_pos_dict[chrom_stop_pos][abba] = 1
106 else: 105 else:
106 ref_pos_dict[chrom_stop_pos] = {}
107 ref_pos_dict[chrom_stop_pos][abba] = 1 107 ref_pos_dict[chrom_stop_pos][abba] = 1
108 else: 108 else:
109 ref_pos_dict[chrom_stop_pos] = {} 109 count_indel += 1
110 ref_pos_dict[chrom_stop_pos][abba] = 1
111 else:
112 count_indel += 1
113 110
114 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % 111 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
115 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) 112 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
116 113
117 # if mutation is in DCS file but not in SSCS, then set counts to NA 114 # if mutation is in DCS file but not in SSCS, then set counts to NA
118 if chrom_stop_pos not in mut_pos_dict.keys(): 115 if chrom_stop_pos not in mut_pos_dict.keys():
119 mut_pos_dict[chrom_stop_pos] = {} 116 mut_pos_dict[chrom_stop_pos] = {}
120 mut_pos_dict[chrom_stop_pos]["ab"] = 0 117 mut_pos_dict[chrom_stop_pos]["ab"] = 0
121 mut_pos_dict[chrom_stop_pos]["ba"] = 0 118 mut_pos_dict[chrom_stop_pos]["ba"] = 0
122 ref_pos_dict[chrom_stop_pos] = {} 119 ref_pos_dict[chrom_stop_pos] = {}
123 ref_pos_dict[chrom_stop_pos]["ab"] = 0 120 ref_pos_dict[chrom_stop_pos]["ab"] = 0
124 ref_pos_dict[chrom_stop_pos]["ba"] = 0 121 ref_pos_dict[chrom_stop_pos]["ba"] = 0
122 else:
123 print("indels are currently not evaluated")
125 bam.close() 124 bam.close()
126 125
127 # save counts 126 # save counts
128 with open(sscs_counts_json, "w") as f: 127 with open(sscs_counts_json, "w") as f:
129 json.dump((mut_pos_dict, ref_pos_dict), f) 128 json.dump((mut_pos_dict, ref_pos_dict), f)