comparison mut2sscs.py @ 11:84a1a3f70407 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 15 Feb 2021 21:53:24 +0000
parents e18c5293aac7
children d21960b45a6b
comparison
equal deleted inserted replaced
10:e18c5293aac7 11:84a1a3f70407
9 and extracts all tags of reads that carry the mutation. 9 and extracts all tags of reads that carry the mutation.
10 Calculates statistics about number of ab/ba/duplex per mutation. 10 Calculates statistics about number of ab/ba/duplex per mutation.
11 11
12 ======= ========== ================= ================================ 12 ======= ========== ================= ================================
13 Version Date Author Description 13 Version Date Author Description
14 0.2.1 2019-10-27 Gundula Povysil - 14 2.0.0 2020-10-30 Gundula Povysil -
15 ======= ========== ================= ================================ 15 ======= ========== ================= ================================
16 16
17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json 17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json
18 18
19 """ 19 """
23 import argparse 23 import argparse
24 import json 24 import json
25 import os 25 import os
26 import sys 26 import sys
27 27
28 import numpy as np
29 import pysam 28 import pysam
30 from cyvcf2 import VCF 29 from cyvcf2 import VCF
31 30
32 31
33 def make_argparser(): 32 def make_argparser():
54 53
55 if os.path.isfile(file2) is False: 54 if os.path.isfile(file2) is False:
56 sys.exit("Error: Could not find '{}'".format(file2)) 55 sys.exit("Error: Could not find '{}'".format(file2))
57 56
58 # read SSCS bam file 57 # read SSCS bam file
59 # pysam.index(file2)
60 bam = pysam.AlignmentFile(file2, "rb") 58 bam = pysam.AlignmentFile(file2, "rb")
61 59
62 # get tags 60 # get tags
63 mut_pos_dict = {} 61 mut_pos_dict = {}
64 ref_pos_dict = {} 62 ref_pos_dict = {}
66 for variant in VCF(file1): 64 for variant in VCF(file1):
67 chrom = variant.CHROM 65 chrom = variant.CHROM
68 stop_pos = variant.start 66 stop_pos = variant.start
69 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos) 67 #chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
70 ref = variant.REF 68 ref = variant.REF
71 if len(variant.ALT) == 0: 69 alt = variant.ALT[0]
72 continue
73 else:
74 alt = variant.ALT[0]
75 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt 70 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) + "#" + ref + "#" + alt
76 71
77 if len(ref) == len(alt): 72 if len(ref) == len(alt):
78
79 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000): 73 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
80 if pileupcolumn.reference_pos == stop_pos: 74 if pileupcolumn.reference_pos == stop_pos:
81 count_alt = 0 75 count_alt = 0
82 count_ref = 0 76 count_ref = 0
83 count_indel = 0 77 count_indel = 0
135 json.dump((mut_pos_dict, ref_pos_dict), f) 129 json.dump((mut_pos_dict, ref_pos_dict), f)
136 130
137 131
138 if __name__ == '__main__': 132 if __name__ == '__main__':
139 sys.exit(mut2sscs(sys.argv)) 133 sys.exit(mut2sscs(sys.argv))
140