variant_analyzer: mut2sscs.py comparison

comparison mut2sscs.py @ 2:3f1dbd2c59bf draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"

author	iuc
date	Tue, 10 Nov 2020 12:55:29 +0000
parents	3556001ff2db
children

comparison

equal deleted inserted replaced

-:3556001ff2db
+:3f1dbd2c59bf
 and extracts all tags of reads that carry the mutation.
 Calculates statistics about number of ab/ba/duplex per mutation.
 =======  ==========  =================  ================================
 Version  Date        Author             Description
-0.2.1    2019-10-27  Gundula Povysil    -
+2.0.0    2020-10-30  Gundula Povysil    -
 =======  ==========  =================  ================================
 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json
 """
 import argparse
 import json
 import os
 import sys
-import numpy as np
 import pysam
+from cyvcf2 import VCF
 def make_argparser():
-parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
+parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
 parser.add_argument('--mutFile',
-help='TABULAR file with DCS mutations.')
+help='VCR file with DCS mutations.')
 parser.add_argument('--bamFile',
 help='BAM file with aligned SSCS reads.')
 parser.add_argument('--outputJson',
 help='Output JSON file to store SSCS counts.')
 return parser
 sys.exit("Error: Could not find '{}'".format(file1))
 if os.path.isfile(file2) is False:
 sys.exit("Error: Could not find '{}'".format(file2))
-# 1. read mut file
+# read SSCS bam file
-with open(file1, 'r') as mut:
-mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
-# 2 read SSCS bam file
-# pysam.index(file2)
 bam = pysam.AlignmentFile(file2, "rb")
 # get tags
 mut_pos_dict = {}
 ref_pos_dict = {}
-if len(mut_array) == 13:
-mut_array = mut_array.reshape((1, len(mut_array)))
-for m in range(0, len(mut_array[:, 0])):
+for variant in VCF(file1):
-print(str(m + 1) + " of " + str(len(mut_array[:, 0])))
+chrom = variant.CHROM
-chrom = mut_array[m, 1]
+stop_pos = variant.start
-stop_pos = mut_array[m, 2].astype(int)
 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
-ref = mut_array[m, 9]
+ref = variant.REF
-alt = mut_array[m, 10]
+alt = variant.ALT[0]
-for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000):
+if len(ref) == len(alt):
-if pileupcolumn.reference_pos == stop_pos - 1:
+for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
-count_alt = 0
+if pileupcolumn.reference_pos == stop_pos:
-count_ref = 0
+count_alt = 0
-count_indel = 0
+count_ref = 0
-print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
+count_indel = 0
-"difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
+print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
-for pileupread in pileupcolumn.pileups:
+"difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
-if not pileupread.is_del and not pileupread.is_refskip:
+for pileupread in pileupcolumn.pileups:
-tag = pileupread.alignment.query_name
+if not pileupread.is_del and not pileupread.is_refskip:
-abba = tag[-2:]
+tag = pileupread.alignment.query_name
-# query position is None if is_del or is_refskip is set.
+abba = tag[-2:]
-if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
+# query position is None if is_del or is_refskip is set.
-count_alt += 1
+if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
-if chrom_stop_pos in mut_pos_dict:
+count_alt += 1
-if abba in mut_pos_dict[chrom_stop_pos]:
+if chrom_stop_pos in mut_pos_dict:
-mut_pos_dict[chrom_stop_pos][abba] += 1
+if abba in mut_pos_dict[chrom_stop_pos]:
+mut_pos_dict[chrom_stop_pos][abba] += 1
+else:
+mut_pos_dict[chrom_stop_pos][abba] = 1
 else:
+mut_pos_dict[chrom_stop_pos] = {}
 mut_pos_dict[chrom_stop_pos][abba] = 1
-else:
+if chrom_stop_pos not in ref_pos_dict:
-mut_pos_dict[chrom_stop_pos] = {}
+ref_pos_dict[chrom_stop_pos] = {}
-mut_pos_dict[chrom_stop_pos][abba] = 1
+ref_pos_dict[chrom_stop_pos][abba] = 0
-elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
-count_ref += 1
+elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
-if chrom_stop_pos in ref_pos_dict:
+count_ref += 1
-if abba in ref_pos_dict[chrom_stop_pos]:
+if chrom_stop_pos in ref_pos_dict:
-ref_pos_dict[chrom_stop_pos][abba] += 1
+if abba in ref_pos_dict[chrom_stop_pos]:
+ref_pos_dict[chrom_stop_pos][abba] += 1
+else:
+ref_pos_dict[chrom_stop_pos][abba] = 1
 else:
+ref_pos_dict[chrom_stop_pos] = {}
 ref_pos_dict[chrom_stop_pos][abba] = 1
 else:
-ref_pos_dict[chrom_stop_pos] = {}
+count_indel += 1
-ref_pos_dict[chrom_stop_pos][abba] = 1
-else:
-count_indel += 1
 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
 # if mutation is in DCS file but not in SSCS, then set counts to NA
 if chrom_stop_pos not in mut_pos_dict.keys():
 mut_pos_dict[chrom_stop_pos] = {}
 mut_pos_dict[chrom_stop_pos]["ab"] = 0
 mut_pos_dict[chrom_stop_pos]["ba"] = 0
 ref_pos_dict[chrom_stop_pos] = {}
 ref_pos_dict[chrom_stop_pos]["ab"] = 0
 ref_pos_dict[chrom_stop_pos]["ba"] = 0
+else:
+print("indels are currently not evaluated")
 bam.close()
 # save counts
 with open(sscs_counts_json, "w") as f:
 json.dump((mut_pos_dict, ref_pos_dict), f)

Mercurial > repos > iuc > variant_analyzer

comparison mut2sscs.py @ 2:3f1dbd2c59bf draft default tip