# HG changeset patch # User mheinzl # Date 1614003537 0 # Node ID 02bf6425fc2548aa1a3119f644e9a5b6229186c4 # Parent 7a418148319da152cd27e645c56da2ef9992a1af planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8 diff -r 7a418148319d -r 02bf6425fc25 read2mut.py --- a/read2mut.py Tue Feb 16 07:08:25 2021 +0000 +++ b/read2mut.py Mon Feb 22 14:18:57 2021 +0000 @@ -23,12 +23,14 @@ from __future__ import division import argparse +import csv import json import operator import os import re import sys + import numpy as np import pysam import xlsxwriter @@ -47,6 +49,8 @@ help='JSON file with SSCS counts collected by mut2sscs.py.') parser.add_argument('--outputFile', help='Output xlsx file with summary of mutations.') + parser.add_argument('--outputFile_csv', + help='Output csv file with summary of mutations.') parser.add_argument('--outputFile2', help='Output xlsx file with allele frequencies of mutations.') parser.add_argument('--outputFile3', @@ -59,6 +63,8 @@ help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') parser.add_argument('--chimera_correction', action="store_true", help='Count chimeric variants and correct the variant frequencies') + + return parser @@ -78,6 +84,7 @@ outfile = args.outputFile outfile2 = args.outputFile2 outfile3 = args.outputFile3 + outputFile_csv = args.outputFile_csv thresh = args.thresh phred_score = args.phred trim = args.trim @@ -227,6 +234,9 @@ else: pure_tags_dict_short = pure_tags_dict + csv_data = open(outputFile_csv, "w") + csv_writer = csv.writer(csv_data) + # output summary with threshold workbook = xlsxwriter.Workbook(outfile) workbook2 = xlsxwriter.Workbook(outfile2) @@ -255,6 +265,7 @@ 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 'in phase', 'chimeric tag') ws1.write_row(0, 0, header_line) + csv_writer.writerow(header_line) counter_tier11 = 0 counter_tier12 = 0 counter_tier21 = 0 @@ -682,8 +693,10 @@ read_pos3 = read_len_median3 = None line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) ws1.write_row(row, 0, line) + csv_writer.writerow(line) line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) ws1.write_row(row + 1, 0, line) + csv_writer.writerow(line) ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), {'type': 'formula', @@ -904,6 +917,7 @@ workbook.close() workbook2.close() workbook3.close() + csv_data.close() if __name__ == '__main__': diff -r 7a418148319d -r 02bf6425fc25 read2mut.xml --- a/read2mut.xml Tue Feb 16 07:08:25 2021 +0000 +++ b/read2mut.xml Mon Feb 22 14:18:57 2021 +0000 @@ -1,5 +1,5 @@ - + Looks for reads with mutation at known positions and calculates frequencies and stats. va_macros.xml @@ -20,6 +20,7 @@ --trim '$trim' $chimera_correction --outputFile '$output_xlsx' + --outputFile_csv '$outputFile_csv' --outputFile2 '$output_xlsx2' --outputFile3 '$output_xlsx3' ]]> @@ -36,6 +37,7 @@ + @@ -50,6 +52,7 @@ +