variant_analyzer2: mut2sscs.py comparison

comparison mut2sscs.py @ 0:e5953c54cfb5 draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8

author	mheinzl
date	Sun, 04 Oct 2020 17:19:39 +0000
parents
children	11a2a34f8a2b

comparison

equal deleted inserted replaced

--1:000000000000
+:e5953c54cfb5
+#!/usr/bin/env python
+"""mut2sscs.py
+Author -- Gundula Povysil
+Contact -- povysil@bioinf.jku.at
+Takes a tabular file with mutations from DCS and a BAM file of SSCS as input
+and extracts all tags of reads that carry the mutation.
+Calculates statistics about number of ab/ba/duplex per mutation.
+=======  ==========  =================  ================================
+Version  Date        Author             Description
+0.2.1    2019-10-27  Gundula Povysil    -
+=======  ==========  =================  ================================
+USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json
+"""
+from __future__ import division
+import argparse
+import json
+import os
+import sys
+import numpy as np
+import pysam
+def make_argparser():
+parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
+parser.add_argument('--mutFile',
+help='TABULAR file with DCS mutations.')
+parser.add_argument('--bamFile',
+help='BAM file with aligned SSCS reads.')
+parser.add_argument('--outputJson',
+help='Output JSON file to store SSCS counts.')
+return parser
+def mut2sscs(argv):
+parser = make_argparser()
+args = parser.parse_args(argv[1:])
+file1 = args.mutFile
+file2 = args.bamFile
+sscs_counts_json = args.outputJson
+if os.path.isfile(file1) is False:
+sys.exit("Error: Could not find '{}'".format(file1))
+if os.path.isfile(file2) is False:
+sys.exit("Error: Could not find '{}'".format(file2))
+# 1. read mut file
+with open(file1, 'r') as mut:
+mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
+# 2 read SSCS bam file
+# pysam.index(file2)
+bam = pysam.AlignmentFile(file2, "rb")
+# get tags
+mut_pos_dict = {}
+ref_pos_dict = {}
+if mut_array.shape == (1,13):
+mut_array = mut_array.reshape((1, len(mut_array)))
+for m in range(0, len(mut_array[:, 0])):
+print(str(m + 1) + " of " + str(len(mut_array[:, 0])))
+chrom = mut_array[m, 1]
+stop_pos = mut_array[m, 2].astype(int)
+chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
+ref = mut_array[m, 9]
+alt = mut_array[m, 10]
+for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=1000000000):
+if pileupcolumn.reference_pos == stop_pos - 1:
+count_alt = 0
+count_ref = 0
+count_indel = 0
+print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
+"difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
+for pileupread in pileupcolumn.pileups:
+if not pileupread.is_del and not pileupread.is_refskip:
+tag = pileupread.alignment.query_name
+abba = tag[-2:]
+# query position is None if is_del or is_refskip is set.
+if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
+count_alt += 1
+if chrom_stop_pos in mut_pos_dict:
+if abba in mut_pos_dict[chrom_stop_pos]:
+mut_pos_dict[chrom_stop_pos][abba] += 1
+else:
+mut_pos_dict[chrom_stop_pos][abba] = 1
+else:
+mut_pos_dict[chrom_stop_pos] = {}
+mut_pos_dict[chrom_stop_pos][abba] = 1
+elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
+count_ref += 1
+if chrom_stop_pos in ref_pos_dict:
+if abba in ref_pos_dict[chrom_stop_pos]:
+ref_pos_dict[chrom_stop_pos][abba] += 1
+else:
+ref_pos_dict[chrom_stop_pos][abba] = 1
+else:
+ref_pos_dict[chrom_stop_pos] = {}
+ref_pos_dict[chrom_stop_pos][abba] = 1
+else:
+count_indel += 1
+print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
+(pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
+# if mutation is in DCS file but not in SSCS, then set counts to NA
+if chrom_stop_pos not in mut_pos_dict.keys():
+mut_pos_dict[chrom_stop_pos] = {}
+mut_pos_dict[chrom_stop_pos]["ab"] = 0
+mut_pos_dict[chrom_stop_pos]["ba"] = 0
+ref_pos_dict[chrom_stop_pos] = {}
+ref_pos_dict[chrom_stop_pos]["ab"] = 0
+ref_pos_dict[chrom_stop_pos]["ba"] = 0
+bam.close()
+# save counts
+with open(sscs_counts_json, "w") as f:
+json.dump((mut_pos_dict, ref_pos_dict), f)
+if __name__ == '__main__':
+sys.exit(mut2sscs(sys.argv))

Mercurial > repos > mheinzl > variant_analyzer2

comparison mut2sscs.py @ 0:e5953c54cfb5 draft