Mercurial > repos > greg > vsnp_statistics
changeset 23:b34843f09f9f draft
Uploaded
author | greg |
---|---|
date | Fri, 27 Aug 2021 19:56:20 +0000 |
parents | 61239720da38 |
children | 39ab5405b509 |
files | .shed.yml test-data/vsnp_statistics1.tabular test-data/vsnp_statistics2.tabular test-data/vsnp_statistics4.tabular vsnp_statistics.py |
diffstat | 5 files changed, 25 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Fri Aug 27 19:56:20 2021 +0000 @@ -0,0 +1,13 @@ +name: vsnp_statistics +owner: greg +description: | + Contains a tool that produces an Excel spreadsheet containing statistics for samples and associated metrics files. +homepage_url: https://github.com/USDA-VS/vSNP +long_description: | + Contains a tool Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples + along with associated SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them + to produce an Excel spreadsheet containing statistics for each sample. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_statistics +type: unrestricted +categories: + - Sequence Analysis
--- a/test-data/vsnp_statistics1.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics1.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference FASTQ File Size Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB 121.0 29.7 0.53 4317 17063 223 0.05 8.27% 0.439436 36 +FASTQ File Size Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB 121.0 29.7 0.53 4317 17063 223 0.05 8.27% 0.439436 36 89
--- a/test-data/vsnp_statistics2.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics2.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 25 100.0 65.7 1.00 13-1941-6_S4_L001_R2_600000_fastq_gz 8.5 KB 25 100.0 66.3 1.00 50 45 5 0.10 98.74% 10.338671 611 +Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 25 100.0 65.7 1.00 13-1941-6_S4_L001_R2_600000_fastq_gz 8.5 KB 25 100.0 66.3 1.00 50 45 5 0.10 98.74% 10.338671 611 89
--- a/test-data/vsnp_statistics4.tabular Tue Aug 24 12:44:52 2021 +0000 +++ b/test-data/vsnp_statistics4.tabular Fri Aug 27 19:56:20 2021 +0000 @@ -1,2 +1,2 @@ -# Reference Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count -89 Unnamed Collection_R1 8.7 KB 25 100.0 65.7 1.00 Unnamed Collection_R2 8.5 KB 25 100.0 66.3 1.00 50 46 4 0.08 0.16% 0.002146 0 +Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference +Unnamed Collection_R1 8.7 KB 25 100.0 65.7 1.00 Unnamed Collection_R2 8.5 KB 25 100.0 66.3 1.00 50 46 4 0.08 0.16% 0.002146 0 89
--- a/vsnp_statistics.py Tue Aug 24 12:44:52 2021 +0000 +++ b/vsnp_statistics.py Fri Aug 27 19:56:20 2021 +0000 @@ -100,23 +100,22 @@ def output_statistics(read1_stats, read2_stats, idxstats_file, metrics_file, output_file): paired_reads = read2_stats is not None if paired_reads: - columns = ['Reference', 'Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', + columns = ['Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Read2 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total', 'Reference with Coverage', 'Average Depth of Coverage', - 'Good SNP Count'] + 'Good SNP Count', 'Reference'] else: - columns = ['Reference', 'FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', + columns = ['FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total', - 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count'] + 'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count', 'Reference'] with open(output_file, "w") as outfh: # Make sure the header starts with a # so # MultiQC can properly handle the output. - outfh.write("# %s\n" % "\t".join(columns)) + outfh.write("%s\n" % "\t".join(columns)) line_items = [] # Get the current stats and associated files. # Get and output the statistics. - line_items.append(read1_stats.reference) line_items.append(read1_stats.fastq_file) line_items.append(read1_stats.file_size) if paired_reads: @@ -152,6 +151,7 @@ line_items.append(ref_with_coverage) line_items.append(avg_depth_of_coverage) line_items.append(good_snp_count) + line_items.append(read1_stats.reference) outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))