Mercurial > repos > greg > vsnp_statistics
changeset 25:b908bb18008a draft
Uploaded
author | greg |
---|---|
date | Thu, 16 Sep 2021 00:56:07 +0000 |
parents | 39ab5405b509 |
children | 4373cc94b777 |
files | .shed.yml test-data/vsnp_statistics1.tabular test-data/vsnp_statistics2.tabular test-data/vsnp_statistics4.tabular vsnp_statistics.py vsnp_statistics.xml |
diffstat | 6 files changed, 24 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Thu Sep 16 00:56:07 2021 +0000 @@ -0,0 +1,13 @@ +name: vsnp_statistics +owner: greg +description: | + Contains a tool that produces an Excel spreadsheet containing statistics for samples and associated metrics files. +homepage_url: https://github.com/USDA-VS/vSNP +long_description: | + Contains a tool Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples + along with associated SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them + to produce an Excel spreadsheet containing statistics for each sample. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_statistics +type: unrestricted +categories: + - Sequence Analysis
--- a/test-data/vsnp_statistics1.tabular Fri Aug 27 20:04:26 2021 +0000 +++ b/test-data/vsnp_statistics1.tabular Thu Sep 16 00:56:07 2021 +0000 @@ -1,2 +1,2 @@ FASTQ File Size Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference -Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB 121.0 29.7 0.53 4317 17063 223 0.05 8.27% 0.439436 36 89 +Mcap_Deer_DE_SRR650221_fastq_gz 1.6 MB
--- a/test-data/vsnp_statistics2.tabular Fri Aug 27 20:04:26 2021 +0000 +++ b/test-data/vsnp_statistics2.tabular Thu Sep 16 00:56:07 2021 +0000 @@ -1,2 +1,2 @@ Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference -13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 25 100.0 65.7 1.00 13-1941-6_S4_L001_R2_600000_fastq_gz 8.5 KB 25 100.0 66.3 1.00 50 45 5 0.10 98.74% 10.338671 611 89 +13-1941-6_S4_L001_R1_600000_fastq_gz 8.7 KB 100
--- a/test-data/vsnp_statistics4.tabular Fri Aug 27 20:04:26 2021 +0000 +++ b/test-data/vsnp_statistics4.tabular Thu Sep 16 00:56:07 2021 +0000 @@ -1,2 +1,2 @@ Read1 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Read2 FASTQ File Size Reads Mean Read Length Mean Read Quality Reads Passing Q30 Total Reads All Mapped Reads Unmapped Reads Unmapped Reads Percentage of Total Reference with Coverage Average Depth of Coverage Good SNP Count Reference -Unnamed Collection_R1 8.7 KB 25 100.0 65.7 1.00 Unnamed Collection_R2 8.5 KB 25 100.0 66.3 1.00 50 46 4 0.08 0.16% 0.002146 0 89 +Unnamed Collection_R1 8.7 KB 100
--- a/vsnp_statistics.py Fri Aug 27 20:04:26 2021 +0000 +++ b/vsnp_statistics.py Thu Sep 16 00:56:07 2021 +0000 @@ -64,11 +64,14 @@ # Starting at row 3, keep every 4 row # random sample specified number of rows. file_size = nice_size(os.path.getsize(fastq_file)) - total_reads = int(len(fastq_df.index) / 4) + total_reads = len(seqs) # Mean Read Length if sampling_size > total_reads: sampling_size = total_reads - fastq_df = fastq_df.iloc[3::4].sample(sampling_size) + try: + fastq_df = fastq_df.iloc[3::4].sample(sampling_size) + except ValueError: + fastq_df = fastq_df.iloc[3::4].sample(sampling_size, replace=True) dict_mean = {} list_length = [] i = 0
--- a/vsnp_statistics.xml Fri Aug 27 20:04:26 2021 +0000 +++ b/vsnp_statistics.xml Thu Sep 16 00:56:07 2021 +0000 @@ -76,7 +76,7 @@ <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> - <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/> + <output name="output" file="vsnp_statistics1.tabular" ftype="tabular" compare="contains"/> </test> <!-- A set of paired fastq files --> <test expect_num_outputs="1"> @@ -85,7 +85,7 @@ <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> - <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/> + <output name="output" file="vsnp_statistics2.tabular" ftype="tabular" compare="contains"/> </test> <!-- A collection of paired fastq files --> <test expect_num_outputs="1"> @@ -98,7 +98,7 @@ </param> <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/> <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/> - <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/> + <output name="output" file="vsnp_statistics4.tabular" ftype="tabular" compare="contains"/> </test> </tests> <help>