comparison vsnp_statistics.py @ 23:b34843f09f9f draft

Uploaded
author greg
date Fri, 27 Aug 2021 19:56:20 +0000
parents 377c1a96aae9
children b908bb18008a
comparison
equal deleted inserted replaced
22:61239720da38 23:b34843f09f9f
98 98
99 99
100 def output_statistics(read1_stats, read2_stats, idxstats_file, metrics_file, output_file): 100 def output_statistics(read1_stats, read2_stats, idxstats_file, metrics_file, output_file):
101 paired_reads = read2_stats is not None 101 paired_reads = read2_stats is not None
102 if paired_reads: 102 if paired_reads:
103 columns = ['Reference', 'Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 103 columns = ['Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality',
104 'Reads Passing Q30', 'Read2 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 104 'Reads Passing Q30', 'Read2 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality',
105 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 105 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads',
106 'Unmapped Reads Percentage of Total', 'Reference with Coverage', 'Average Depth of Coverage', 106 'Unmapped Reads Percentage of Total', 'Reference with Coverage', 'Average Depth of Coverage',
107 'Good SNP Count'] 107 'Good SNP Count', 'Reference']
108 else: 108 else:
109 columns = ['Reference', 'FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 109 columns = ['FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30',
110 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total', 110 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total',
111 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count'] 111 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count', 'Reference']
112 with open(output_file, "w") as outfh: 112 with open(output_file, "w") as outfh:
113 # Make sure the header starts with a # so 113 # Make sure the header starts with a # so
114 # MultiQC can properly handle the output. 114 # MultiQC can properly handle the output.
115 outfh.write("# %s\n" % "\t".join(columns)) 115 outfh.write("%s\n" % "\t".join(columns))
116 line_items = [] 116 line_items = []
117 # Get the current stats and associated files. 117 # Get the current stats and associated files.
118 # Get and output the statistics. 118 # Get and output the statistics.
119 line_items.append(read1_stats.reference)
120 line_items.append(read1_stats.fastq_file) 119 line_items.append(read1_stats.fastq_file)
121 line_items.append(read1_stats.file_size) 120 line_items.append(read1_stats.file_size)
122 if paired_reads: 121 if paired_reads:
123 line_items.append(read1_stats.total_reads) 122 line_items.append(read1_stats.total_reads)
124 line_items.append(read1_stats.mean_read_length) 123 line_items.append(read1_stats.mean_read_length)
150 # Reference with Coverage 149 # Reference with Coverage
151 ref_with_coverage, avg_depth_of_coverage, good_snp_count = process_metrics_file(metrics_file) 150 ref_with_coverage, avg_depth_of_coverage, good_snp_count = process_metrics_file(metrics_file)
152 line_items.append(ref_with_coverage) 151 line_items.append(ref_with_coverage)
153 line_items.append(avg_depth_of_coverage) 152 line_items.append(avg_depth_of_coverage)
154 line_items.append(good_snp_count) 153 line_items.append(good_snp_count)
154 line_items.append(read1_stats.reference)
155 outfh.write('%s\n' % '\t'.join(str(x) for x in line_items)) 155 outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))
156 156
157 157
158 def process_idxstats_file(idxstats_file): 158 def process_idxstats_file(idxstats_file):
159 all_mapped_reads = 0 159 all_mapped_reads = 0