variant_analyzer2: mut2sscs.py comparison

comparison mut2sscs.py @ 6:11a2a34f8a2b draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8

author	mheinzl
date	Mon, 18 Jan 2021 09:49:15 +0000
parents	e5953c54cfb5
children	ded0dc6a20d3

comparison

equal deleted inserted replaced

-:d9cbf833624e
+:11a2a34f8a2b
 import os
 import sys
 import numpy as np
 import pysam
+from cyvcf2 import VCF
 def make_argparser():
-parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
+parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.')
 parser.add_argument('--mutFile',
-help='TABULAR file with DCS mutations.')
+help='VCR file with DCS mutations.')
 parser.add_argument('--bamFile',
 help='BAM file with aligned SSCS reads.')
 parser.add_argument('--outputJson',
 help='Output JSON file to store SSCS counts.')
 return parser
 sys.exit("Error: Could not find '{}'".format(file1))
 if os.path.isfile(file2) is False:
 sys.exit("Error: Could not find '{}'".format(file2))
-# 1. read mut file
+# read SSCS bam file
-with open(file1, 'r') as mut:
+#    pysam.index(file2)
-mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
-# 2 read SSCS bam file
-# pysam.index(file2)
 bam = pysam.AlignmentFile(file2, "rb")
 # get tags
 mut_pos_dict = {}
 ref_pos_dict = {}
-if mut_array.shape == (1,13):
-mut_array = mut_array.reshape((1, len(mut_array)))
-for m in range(0, len(mut_array[:, 0])):
+for variant in VCF(file1):
-print(str(m + 1) + " of " + str(len(mut_array[:, 0])))
+chrom = variant.CHROM
-chrom = mut_array[m, 1]
+stop_pos = variant.start
-stop_pos = mut_array[m, 2].astype(int)
 chrom_stop_pos = str(chrom) + "#" + str(stop_pos)
-ref = mut_array[m, 9]
+ref = variant.REF
-alt = mut_array[m, 10]
+alt = variant.ALT[0]
+#        nc = variant.format('NC')
+ad = variant.format('AD')
-for pileupcolumn in bam.pileup(chrom.tostring(), stop_pos - 2, stop_pos, max_depth=1000000000):
+if len(ref) == len(alt):
-if pileupcolumn.reference_pos == stop_pos - 1:
-count_alt = 0
+for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000):
-count_ref = 0
+if pileupcolumn.reference_pos == stop_pos:
-count_indel = 0
+count_alt = 0
-print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
+count_ref = 0
-"difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
+count_indel = 0
-for pileupread in pileupcolumn.pileups:
+print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups),
-if not pileupread.is_del and not pileupread.is_refskip:
+"difference= ", len(pileupcolumn.pileups) - pileupcolumn.n)
-tag = pileupread.alignment.query_name
+for pileupread in pileupcolumn.pileups:
-abba = tag[-2:]
+if not pileupread.is_del and not pileupread.is_refskip:
-# query position is None if is_del or is_refskip is set.
+tag = pileupread.alignment.query_name
-if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
+abba = tag[-2:]
-count_alt += 1
+# query position is None if is_del or is_refskip is set.
-if chrom_stop_pos in mut_pos_dict:
+if pileupread.alignment.query_sequence[pileupread.query_position] == alt:
-if abba in mut_pos_dict[chrom_stop_pos]:
+count_alt += 1
-mut_pos_dict[chrom_stop_pos][abba] += 1
+if chrom_stop_pos in mut_pos_dict:
+if abba in mut_pos_dict[chrom_stop_pos]:
+mut_pos_dict[chrom_stop_pos][abba] += 1
+else:
+mut_pos_dict[chrom_stop_pos][abba] = 1
 else:
+mut_pos_dict[chrom_stop_pos] = {}
 mut_pos_dict[chrom_stop_pos][abba] = 1
-else:
+if chrom_stop_pos not in ref_pos_dict:
-mut_pos_dict[chrom_stop_pos] = {}
+ref_pos_dict[chrom_stop_pos] = {}
-mut_pos_dict[chrom_stop_pos][abba] = 1
+ref_pos_dict[chrom_stop_pos][abba] = 0
-elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
-count_ref += 1
+elif pileupread.alignment.query_sequence[pileupread.query_position] == ref:
-if chrom_stop_pos in ref_pos_dict:
+count_ref += 1
-if abba in ref_pos_dict[chrom_stop_pos]:
+if chrom_stop_pos in ref_pos_dict:
-ref_pos_dict[chrom_stop_pos][abba] += 1
+if abba in ref_pos_dict[chrom_stop_pos]:
+ref_pos_dict[chrom_stop_pos][abba] += 1
+else:
+ref_pos_dict[chrom_stop_pos][abba] = 1
 else:
+ref_pos_dict[chrom_stop_pos] = {}
 ref_pos_dict[chrom_stop_pos][abba] = 1
 else:
-ref_pos_dict[chrom_stop_pos] = {}
+count_indel += 1
-ref_pos_dict[chrom_stop_pos][abba] = 1
-else:
-count_indel += 1
 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" %
 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel))
 # if mutation is in DCS file but not in SSCS, then set counts to NA
 if chrom_stop_pos not in mut_pos_dict.keys():
 mut_pos_dict[chrom_stop_pos] = {}
 mut_pos_dict[chrom_stop_pos]["ab"] = 0
 mut_pos_dict[chrom_stop_pos]["ba"] = 0
 ref_pos_dict[chrom_stop_pos] = {}
 ref_pos_dict[chrom_stop_pos]["ab"] = 0
 ref_pos_dict[chrom_stop_pos]["ba"] = 0
+else:
+print("indels are currently not evaluated")
 bam.close()
 # save counts
 with open(sscs_counts_json, "w") as f:
 json.dump((mut_pos_dict, ref_pos_dict), f)
 if __name__ == '__main__':
 sys.exit(mut2sscs(sys.argv))

Mercurial > repos > mheinzl > variant_analyzer2

comparison mut2sscs.py @ 6:11a2a34f8a2b draft