Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 13:02bf6425fc25 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 22 Feb 2021 14:18:57 +0000 |
parents | 7a418148319d |
children | bcdb63df70ce |
comparison
equal
deleted
inserted
replaced
12:7a418148319d | 13:02bf6425fc25 |
---|---|
21 """ | 21 """ |
22 | 22 |
23 from __future__ import division | 23 from __future__ import division |
24 | 24 |
25 import argparse | 25 import argparse |
26 import csv | |
26 import json | 27 import json |
27 import operator | 28 import operator |
28 import os | 29 import os |
29 import re | 30 import re |
30 import sys | 31 import sys |
32 | |
31 | 33 |
32 import numpy as np | 34 import numpy as np |
33 import pysam | 35 import pysam |
34 import xlsxwriter | 36 import xlsxwriter |
35 from cyvcf2 import VCF | 37 from cyvcf2 import VCF |
45 help='JSON file with data collected by mut2read.py.') | 47 help='JSON file with data collected by mut2read.py.') |
46 parser.add_argument('--sscsJson', | 48 parser.add_argument('--sscsJson', |
47 help='JSON file with SSCS counts collected by mut2sscs.py.') | 49 help='JSON file with SSCS counts collected by mut2sscs.py.') |
48 parser.add_argument('--outputFile', | 50 parser.add_argument('--outputFile', |
49 help='Output xlsx file with summary of mutations.') | 51 help='Output xlsx file with summary of mutations.') |
52 parser.add_argument('--outputFile_csv', | |
53 help='Output csv file with summary of mutations.') | |
50 parser.add_argument('--outputFile2', | 54 parser.add_argument('--outputFile2', |
51 help='Output xlsx file with allele frequencies of mutations.') | 55 help='Output xlsx file with allele frequencies of mutations.') |
52 parser.add_argument('--outputFile3', | 56 parser.add_argument('--outputFile3', |
53 help='Output xlsx file with examples of the tier classification.') | 57 help='Output xlsx file with examples of the tier classification.') |
54 parser.add_argument('--thresh', type=int, default=0, | 58 parser.add_argument('--thresh', type=int, default=0, |
57 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 61 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
58 parser.add_argument('--trim', type=int, default=10, | 62 parser.add_argument('--trim', type=int, default=10, |
59 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | 63 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
60 parser.add_argument('--chimera_correction', action="store_true", | 64 parser.add_argument('--chimera_correction', action="store_true", |
61 help='Count chimeric variants and correct the variant frequencies') | 65 help='Count chimeric variants and correct the variant frequencies') |
66 | |
67 | |
62 return parser | 68 return parser |
63 | 69 |
64 | 70 |
65 def safe_div(x, y): | 71 def safe_div(x, y): |
66 if y == 0: | 72 if y == 0: |
76 json_file = args.inputJson | 82 json_file = args.inputJson |
77 sscs_json = args.sscsJson | 83 sscs_json = args.sscsJson |
78 outfile = args.outputFile | 84 outfile = args.outputFile |
79 outfile2 = args.outputFile2 | 85 outfile2 = args.outputFile2 |
80 outfile3 = args.outputFile3 | 86 outfile3 = args.outputFile3 |
87 outputFile_csv = args.outputFile_csv | |
81 thresh = args.thresh | 88 thresh = args.thresh |
82 phred_score = args.phred | 89 phred_score = args.phred |
83 trim = args.trim | 90 trim = args.trim |
84 chimera_correction = args.chimera_correction | 91 chimera_correction = args.chimera_correction |
85 | 92 |
225 if len(value) < thresh: | 232 if len(value) < thresh: |
226 pure_tags_dict_short[key] = value | 233 pure_tags_dict_short[key] = value |
227 else: | 234 else: |
228 pure_tags_dict_short = pure_tags_dict | 235 pure_tags_dict_short = pure_tags_dict |
229 | 236 |
237 csv_data = open(outputFile_csv, "w") | |
238 csv_writer = csv.writer(csv_data) | |
239 | |
230 # output summary with threshold | 240 # output summary with threshold |
231 workbook = xlsxwriter.Workbook(outfile) | 241 workbook = xlsxwriter.Workbook(outfile) |
232 workbook2 = xlsxwriter.Workbook(outfile2) | 242 workbook2 = xlsxwriter.Workbook(outfile2) |
233 workbook3 = xlsxwriter.Workbook(outfile3) | 243 workbook3 = xlsxwriter.Workbook(outfile3) |
234 ws1 = workbook.add_worksheet("Results") | 244 ws1 = workbook.add_worksheet("Results") |
253 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 263 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
254 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 264 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
255 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 265 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
256 'in phase', 'chimeric tag') | 266 'in phase', 'chimeric tag') |
257 ws1.write_row(0, 0, header_line) | 267 ws1.write_row(0, 0, header_line) |
268 csv_writer.writerow(header_line) | |
258 counter_tier11 = 0 | 269 counter_tier11 = 0 |
259 counter_tier12 = 0 | 270 counter_tier12 = 0 |
260 counter_tier21 = 0 | 271 counter_tier21 = 0 |
261 counter_tier22 = 0 | 272 counter_tier22 = 0 |
262 counter_tier23 = 0 | 273 counter_tier23 = 0 |
680 read_pos2 = read_len_median2 = None | 691 read_pos2 = read_len_median2 = None |
681 if (read_pos3 == -1): | 692 if (read_pos3 == -1): |
682 read_pos3 = read_len_median3 = None | 693 read_pos3 = read_len_median3 = None |
683 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 694 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
684 ws1.write_row(row, 0, line) | 695 ws1.write_row(row, 0, line) |
696 csv_writer.writerow(line) | |
685 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 697 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
686 ws1.write_row(row + 1, 0, line) | 698 ws1.write_row(row + 1, 0, line) |
699 csv_writer.writerow(line) | |
687 | 700 |
688 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 701 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
689 {'type': 'formula', | 702 {'type': 'formula', |
690 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 703 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
691 'format': format1, | 704 'format': format1, |
902 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) | 915 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
903 row += 3 | 916 row += 3 |
904 workbook.close() | 917 workbook.close() |
905 workbook2.close() | 918 workbook2.close() |
906 workbook3.close() | 919 workbook3.close() |
920 csv_data.close() | |
907 | 921 |
908 | 922 |
909 if __name__ == '__main__': | 923 if __name__ == '__main__': |
910 sys.exit(read2mut(sys.argv)) | 924 sys.exit(read2mut(sys.argv)) |