Mercurial > repos > greg > vsnp_statistics

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Fri Aug 27 19:56:20 2021 +0000
@@ -0,0 +1,13 @@
+name: vsnp_statistics
+owner: greg
+description: |
+  Contains a tool that produces an Excel spreadsheet containing statistics for samples and associated metrics files.
+homepage_url: https://github.com/USDA-VS/vSNP
+long_description: |
+  Contains a tool Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples
+  along with associated SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them
+  to produce an Excel spreadsheet containing statistics for each sample.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_statistics
+type: unrestricted
+categories:
+  - Sequence Analysis
--- a/test-data/vsnp_statistics1.tabular	Tue Aug 24 12:44:52 2021 +0000
+++ b/test-data/vsnp_statistics1.tabular	Fri Aug 27 19:56:20 2021 +0000
@@ -1,2 +1,2 @@
-# Reference	FASTQ	File Size	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count
-89	Mcap_Deer_DE_SRR650221_fastq_gz	1.6 MB	121.0	29.7	      0.53	4317	17063	223	      0.05	8.27%	0.439436	36
+FASTQ	File Size	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count	Reference
+Mcap_Deer_DE_SRR650221_fastq_gz	1.6 MB	121.0	29.7	      0.53	4317	17063	223	      0.05	8.27%	0.439436	36	89
--- a/test-data/vsnp_statistics2.tabular	Tue Aug 24 12:44:52 2021 +0000
+++ b/test-data/vsnp_statistics2.tabular	Fri Aug 27 19:56:20 2021 +0000
@@ -1,2 +1,2 @@
-# Reference	Read1 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Read2 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count
-89	13-1941-6_S4_L001_R1_600000_fastq_gz	8.7 KB	25	100.0	65.7	      1.00	13-1941-6_S4_L001_R2_600000_fastq_gz	8.5 KB	25	100.0	66.3	      1.00	50	45	5	      0.10	98.74%	10.338671	611
+Read1 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Read2 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count	Reference
+13-1941-6_S4_L001_R1_600000_fastq_gz	8.7 KB	25	100.0	65.7	      1.00	13-1941-6_S4_L001_R2_600000_fastq_gz	8.5 KB	25	100.0	66.3	      1.00	50	45	5	      0.10	98.74%	10.338671	611	89
--- a/test-data/vsnp_statistics4.tabular	Tue Aug 24 12:44:52 2021 +0000
+++ b/test-data/vsnp_statistics4.tabular	Fri Aug 27 19:56:20 2021 +0000
@@ -1,2 +1,2 @@
-# Reference	Read1 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Read2 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count
-89	Unnamed Collection_R1	8.7 KB	25	100.0	65.7	      1.00	Unnamed Collection_R2	8.5 KB	25	100.0	66.3	      1.00	50	46	4	      0.08	0.16%	0.002146	0
+Read1 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Read2 FASTQ	File Size	Reads	Mean Read Length	Mean Read Quality	Reads Passing Q30	Total Reads	All Mapped Reads	Unmapped Reads	Unmapped Reads Percentage of Total	Reference with Coverage	Average Depth of Coverage	Good SNP Count	Reference
+Unnamed Collection_R1	8.7 KB	25	100.0	65.7	      1.00	Unnamed Collection_R2	8.5 KB	25	100.0	66.3	      1.00	50	46	4	      0.08	0.16%	0.002146	0	89
--- a/vsnp_statistics.py	Tue Aug 24 12:44:52 2021 +0000
+++ b/vsnp_statistics.py	Fri Aug 27 19:56:20 2021 +0000
@@ -100,23 +100,22 @@
 def output_statistics(read1_stats, read2_stats, idxstats_file, metrics_file, output_file):
     paired_reads = read2_stats is not None
     if paired_reads:
-        columns = ['Reference', 'Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality',
+        columns = ['Read1 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality',
                    'Reads Passing Q30', 'Read2 FASTQ', 'File Size', 'Reads', 'Mean Read Length', 'Mean Read Quality',
                    'Reads Passing Q30', 'Total Reads', 'All Mapped Reads', 'Unmapped Reads',
                    'Unmapped Reads Percentage of Total', 'Reference with Coverage', 'Average Depth of Coverage',
-                   'Good SNP Count']
+                   'Good SNP Count', 'Reference']
     else:
-        columns = ['Reference', 'FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30',
+        columns = ['FASTQ', 'File Size', 'Mean Read Length', 'Mean Read Quality', 'Reads Passing Q30',
                    'Total Reads', 'All Mapped Reads', 'Unmapped Reads', 'Unmapped Reads Percentage of Total',
-                   'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count']
+                   'Reference with Coverage', 'Average Depth of Coverage', 'Good SNP Count', 'Reference']
     with open(output_file, "w") as outfh:
         # Make sure the header starts with a # so
         # MultiQC can properly handle the output.
-        outfh.write("# %s\n" % "\t".join(columns))
+        outfh.write("%s\n" % "\t".join(columns))
         line_items = []
         # Get the current stats and associated files.
         # Get and output the statistics.
-        line_items.append(read1_stats.reference)
         line_items.append(read1_stats.fastq_file)
         line_items.append(read1_stats.file_size)
         if paired_reads:
@@ -152,6 +151,7 @@
         line_items.append(ref_with_coverage)
         line_items.append(avg_depth_of_coverage)
         line_items.append(good_snp_count)
+        line_items.append(read1_stats.reference)
         outfh.write('%s\n' % '\t'.join(str(x) for x in line_items))