comparison read2mut.py @ 13:02bf6425fc25 draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 22 Feb 2021 14:18:57 +0000
parents 7a418148319d
children bcdb63df70ce
comparison
equal deleted inserted replaced
12:7a418148319d 13:02bf6425fc25
21 """ 21 """
22 22
23 from __future__ import division 23 from __future__ import division
24 24
25 import argparse 25 import argparse
26 import csv
26 import json 27 import json
27 import operator 28 import operator
28 import os 29 import os
29 import re 30 import re
30 import sys 31 import sys
32
31 33
32 import numpy as np 34 import numpy as np
33 import pysam 35 import pysam
34 import xlsxwriter 36 import xlsxwriter
35 from cyvcf2 import VCF 37 from cyvcf2 import VCF
45 help='JSON file with data collected by mut2read.py.') 47 help='JSON file with data collected by mut2read.py.')
46 parser.add_argument('--sscsJson', 48 parser.add_argument('--sscsJson',
47 help='JSON file with SSCS counts collected by mut2sscs.py.') 49 help='JSON file with SSCS counts collected by mut2sscs.py.')
48 parser.add_argument('--outputFile', 50 parser.add_argument('--outputFile',
49 help='Output xlsx file with summary of mutations.') 51 help='Output xlsx file with summary of mutations.')
52 parser.add_argument('--outputFile_csv',
53 help='Output csv file with summary of mutations.')
50 parser.add_argument('--outputFile2', 54 parser.add_argument('--outputFile2',
51 help='Output xlsx file with allele frequencies of mutations.') 55 help='Output xlsx file with allele frequencies of mutations.')
52 parser.add_argument('--outputFile3', 56 parser.add_argument('--outputFile3',
53 help='Output xlsx file with examples of the tier classification.') 57 help='Output xlsx file with examples of the tier classification.')
54 parser.add_argument('--thresh', type=int, default=0, 58 parser.add_argument('--thresh', type=int, default=0,
57 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') 61 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.')
58 parser.add_argument('--trim', type=int, default=10, 62 parser.add_argument('--trim', type=int, default=10,
59 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') 63 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.')
60 parser.add_argument('--chimera_correction', action="store_true", 64 parser.add_argument('--chimera_correction', action="store_true",
61 help='Count chimeric variants and correct the variant frequencies') 65 help='Count chimeric variants and correct the variant frequencies')
66
67
62 return parser 68 return parser
63 69
64 70
65 def safe_div(x, y): 71 def safe_div(x, y):
66 if y == 0: 72 if y == 0:
76 json_file = args.inputJson 82 json_file = args.inputJson
77 sscs_json = args.sscsJson 83 sscs_json = args.sscsJson
78 outfile = args.outputFile 84 outfile = args.outputFile
79 outfile2 = args.outputFile2 85 outfile2 = args.outputFile2
80 outfile3 = args.outputFile3 86 outfile3 = args.outputFile3
87 outputFile_csv = args.outputFile_csv
81 thresh = args.thresh 88 thresh = args.thresh
82 phred_score = args.phred 89 phred_score = args.phred
83 trim = args.trim 90 trim = args.trim
84 chimera_correction = args.chimera_correction 91 chimera_correction = args.chimera_correction
85 92
225 if len(value) < thresh: 232 if len(value) < thresh:
226 pure_tags_dict_short[key] = value 233 pure_tags_dict_short[key] = value
227 else: 234 else:
228 pure_tags_dict_short = pure_tags_dict 235 pure_tags_dict_short = pure_tags_dict
229 236
237 csv_data = open(outputFile_csv, "w")
238 csv_writer = csv.writer(csv_data)
239
230 # output summary with threshold 240 # output summary with threshold
231 workbook = xlsxwriter.Workbook(outfile) 241 workbook = xlsxwriter.Workbook(outfile)
232 workbook2 = xlsxwriter.Workbook(outfile2) 242 workbook2 = xlsxwriter.Workbook(outfile2)
233 workbook3 = xlsxwriter.Workbook(outfile3) 243 workbook3 = xlsxwriter.Workbook(outfile3)
234 ws1 = workbook.add_worksheet("Results") 244 ws1 = workbook.add_worksheet("Results")
253 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', 263 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba',
254 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', 264 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba',
255 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 265 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba',
256 'in phase', 'chimeric tag') 266 'in phase', 'chimeric tag')
257 ws1.write_row(0, 0, header_line) 267 ws1.write_row(0, 0, header_line)
268 csv_writer.writerow(header_line)
258 counter_tier11 = 0 269 counter_tier11 = 0
259 counter_tier12 = 0 270 counter_tier12 = 0
260 counter_tier21 = 0 271 counter_tier21 = 0
261 counter_tier22 = 0 272 counter_tier22 = 0
262 counter_tier23 = 0 273 counter_tier23 = 0
680 read_pos2 = read_len_median2 = None 691 read_pos2 = read_len_median2 = None
681 if (read_pos3 == -1): 692 if (read_pos3 == -1):
682 read_pos3 = read_len_median3 = None 693 read_pos3 = read_len_median3 = None
683 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) 694 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera)
684 ws1.write_row(row, 0, line) 695 ws1.write_row(row, 0, line)
696 csv_writer.writerow(line)
685 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) 697 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera)
686 ws1.write_row(row + 1, 0, line) 698 ws1.write_row(row + 1, 0, line)
699 csv_writer.writerow(line)
687 700
688 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), 701 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2),
689 {'type': 'formula', 702 {'type': 'formula',
690 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), 703 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1),
691 'format': format1, 704 'format': format1,
902 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) 915 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)})
903 row += 3 916 row += 3
904 workbook.close() 917 workbook.close()
905 workbook2.close() 918 workbook2.close()
906 workbook3.close() 919 workbook3.close()
920 csv_data.close()
907 921
908 922
909 if __name__ == '__main__': 923 if __name__ == '__main__':
910 sys.exit(read2mut(sys.argv)) 924 sys.exit(read2mut(sys.argv))