Mercurial > repos > iuc > variant_analyzer
comparison mut2sscs.py @ 2:3f1dbd2c59bf draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
| author | iuc |
|---|---|
| date | Tue, 10 Nov 2020 12:55:29 +0000 |
| parents | 3556001ff2db |
| children |
comparison
equal
deleted
inserted
replaced
| 1:3556001ff2db | 2:3f1dbd2c59bf |
|---|---|
| 9 and extracts all tags of reads that carry the mutation. | 9 and extracts all tags of reads that carry the mutation. |
| 10 Calculates statistics about number of ab/ba/duplex per mutation. | 10 Calculates statistics about number of ab/ba/duplex per mutation. |
| 11 | 11 |
| 12 ======= ========== ================= ================================ | 12 ======= ========== ================= ================================ |
| 13 Version Date Author Description | 13 Version Date Author Description |
| 14 0.2.1 2019-10-27 Gundula Povysil - | 14 2.0.0 2020-10-30 Gundula Povysil - |
| 15 ======= ========== ================= ================================ | 15 ======= ========== ================= ================================ |
| 16 | 16 |
| 17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json | 17 USAGE: python mut2sscs.py DCS_Mutations.tabular SSCS.bam SSCS_counts.json |
| 18 | 18 |
| 19 """ | 19 """ |
| 23 import argparse | 23 import argparse |
| 24 import json | 24 import json |
| 25 import os | 25 import os |
| 26 import sys | 26 import sys |
| 27 | 27 |
| 28 import numpy as np | |
| 29 import pysam | 28 import pysam |
| 29 from cyvcf2 import VCF | |
| 30 | 30 |
| 31 | 31 |
| 32 def make_argparser(): | 32 def make_argparser(): |
| 33 parser = argparse.ArgumentParser(description='Takes a tabular file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') | 33 parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file.') |
| 34 parser.add_argument('--mutFile', | 34 parser.add_argument('--mutFile', |
| 35 help='TABULAR file with DCS mutations.') | 35 help='VCR file with DCS mutations.') |
| 36 parser.add_argument('--bamFile', | 36 parser.add_argument('--bamFile', |
| 37 help='BAM file with aligned SSCS reads.') | 37 help='BAM file with aligned SSCS reads.') |
| 38 parser.add_argument('--outputJson', | 38 parser.add_argument('--outputJson', |
| 39 help='Output JSON file to store SSCS counts.') | 39 help='Output JSON file to store SSCS counts.') |
| 40 return parser | 40 return parser |
| 52 sys.exit("Error: Could not find '{}'".format(file1)) | 52 sys.exit("Error: Could not find '{}'".format(file1)) |
| 53 | 53 |
| 54 if os.path.isfile(file2) is False: | 54 if os.path.isfile(file2) is False: |
| 55 sys.exit("Error: Could not find '{}'".format(file2)) | 55 sys.exit("Error: Could not find '{}'".format(file2)) |
| 56 | 56 |
| 57 # 1. read mut file | 57 # read SSCS bam file |
| 58 with open(file1, 'r') as mut: | |
| 59 mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) | |
| 60 | |
| 61 # 2 read SSCS bam file | |
| 62 # pysam.index(file2) | |
| 63 bam = pysam.AlignmentFile(file2, "rb") | 58 bam = pysam.AlignmentFile(file2, "rb") |
| 64 | 59 |
| 65 # get tags | 60 # get tags |
| 66 mut_pos_dict = {} | 61 mut_pos_dict = {} |
| 67 ref_pos_dict = {} | 62 ref_pos_dict = {} |
| 68 if len(mut_array) == 13: | |
| 69 mut_array = mut_array.reshape((1, len(mut_array))) | |
| 70 | 63 |
| 71 for m in range(0, len(mut_array[:, 0])): | 64 for variant in VCF(file1): |
| 72 print(str(m + 1) + " of " + str(len(mut_array[:, 0]))) | 65 chrom = variant.CHROM |
| 73 chrom = mut_array[m, 1] | 66 stop_pos = variant.start |
| 74 stop_pos = mut_array[m, 2].astype(int) | |
| 75 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) | 67 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
| 76 ref = mut_array[m, 9] | 68 ref = variant.REF |
| 77 alt = mut_array[m, 10] | 69 alt = variant.ALT[0] |
| 78 | 70 |
| 79 for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): | 71 if len(ref) == len(alt): |
| 80 if pileupcolumn.reference_pos == stop_pos - 1: | 72 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=1000000000): |
| 81 count_alt = 0 | 73 if pileupcolumn.reference_pos == stop_pos: |
| 82 count_ref = 0 | 74 count_alt = 0 |
| 83 count_indel = 0 | 75 count_ref = 0 |
| 84 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), | 76 count_indel = 0 |
| 85 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) | 77 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
| 86 for pileupread in pileupcolumn.pileups: | 78 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
| 87 if not pileupread.is_del and not pileupread.is_refskip: | 79 for pileupread in pileupcolumn.pileups: |
| 88 tag = pileupread.alignment.query_name | 80 if not pileupread.is_del and not pileupread.is_refskip: |
| 89 abba = tag[-2:] | 81 tag = pileupread.alignment.query_name |
| 90 # query position is None if is_del or is_refskip is set. | 82 abba = tag[-2:] |
| 91 if pileupread.alignment.query_sequence[pileupread.query_position] == alt: | 83 # query position is None if is_del or is_refskip is set. |
| 92 count_alt += 1 | 84 if pileupread.alignment.query_sequence[pileupread.query_position] == alt: |
| 93 if chrom_stop_pos in mut_pos_dict: | 85 count_alt += 1 |
| 94 if abba in mut_pos_dict[chrom_stop_pos]: | 86 if chrom_stop_pos in mut_pos_dict: |
| 95 mut_pos_dict[chrom_stop_pos][abba] += 1 | 87 if abba in mut_pos_dict[chrom_stop_pos]: |
| 88 mut_pos_dict[chrom_stop_pos][abba] += 1 | |
| 89 else: | |
| 90 mut_pos_dict[chrom_stop_pos][abba] = 1 | |
| 96 else: | 91 else: |
| 92 mut_pos_dict[chrom_stop_pos] = {} | |
| 97 mut_pos_dict[chrom_stop_pos][abba] = 1 | 93 mut_pos_dict[chrom_stop_pos][abba] = 1 |
| 98 else: | 94 if chrom_stop_pos not in ref_pos_dict: |
| 99 mut_pos_dict[chrom_stop_pos] = {} | 95 ref_pos_dict[chrom_stop_pos] = {} |
| 100 mut_pos_dict[chrom_stop_pos][abba] = 1 | 96 ref_pos_dict[chrom_stop_pos][abba] = 0 |
| 101 elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: | 97 |
| 102 count_ref += 1 | 98 elif pileupread.alignment.query_sequence[pileupread.query_position] == ref: |
| 103 if chrom_stop_pos in ref_pos_dict: | 99 count_ref += 1 |
| 104 if abba in ref_pos_dict[chrom_stop_pos]: | 100 if chrom_stop_pos in ref_pos_dict: |
| 105 ref_pos_dict[chrom_stop_pos][abba] += 1 | 101 if abba in ref_pos_dict[chrom_stop_pos]: |
| 102 ref_pos_dict[chrom_stop_pos][abba] += 1 | |
| 103 else: | |
| 104 ref_pos_dict[chrom_stop_pos][abba] = 1 | |
| 106 else: | 105 else: |
| 106 ref_pos_dict[chrom_stop_pos] = {} | |
| 107 ref_pos_dict[chrom_stop_pos][abba] = 1 | 107 ref_pos_dict[chrom_stop_pos][abba] = 1 |
| 108 else: | 108 else: |
| 109 ref_pos_dict[chrom_stop_pos] = {} | 109 count_indel += 1 |
| 110 ref_pos_dict[chrom_stop_pos][abba] = 1 | |
| 111 else: | |
| 112 count_indel += 1 | |
| 113 | 110 |
| 114 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % | 111 print("coverage at pos %s = %s, ref = %s, alt = %s, indel = %s,\n" % |
| 115 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) | 112 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_indel)) |
| 116 | 113 |
| 117 # if mutation is in DCS file but not in SSCS, then set counts to NA | 114 # if mutation is in DCS file but not in SSCS, then set counts to NA |
| 118 if chrom_stop_pos not in mut_pos_dict.keys(): | 115 if chrom_stop_pos not in mut_pos_dict.keys(): |
| 119 mut_pos_dict[chrom_stop_pos] = {} | 116 mut_pos_dict[chrom_stop_pos] = {} |
| 120 mut_pos_dict[chrom_stop_pos]["ab"] = 0 | 117 mut_pos_dict[chrom_stop_pos]["ab"] = 0 |
| 121 mut_pos_dict[chrom_stop_pos]["ba"] = 0 | 118 mut_pos_dict[chrom_stop_pos]["ba"] = 0 |
| 122 ref_pos_dict[chrom_stop_pos] = {} | 119 ref_pos_dict[chrom_stop_pos] = {} |
| 123 ref_pos_dict[chrom_stop_pos]["ab"] = 0 | 120 ref_pos_dict[chrom_stop_pos]["ab"] = 0 |
| 124 ref_pos_dict[chrom_stop_pos]["ba"] = 0 | 121 ref_pos_dict[chrom_stop_pos]["ba"] = 0 |
| 122 else: | |
| 123 print("indels are currently not evaluated") | |
| 125 bam.close() | 124 bam.close() |
| 126 | 125 |
| 127 # save counts | 126 # save counts |
| 128 with open(sscs_counts_json, "w") as f: | 127 with open(sscs_counts_json, "w") as f: |
| 129 json.dump((mut_pos_dict, ref_pos_dict), f) | 128 json.dump((mut_pos_dict, ref_pos_dict), f) |
