Mercurial > repos > iuc > vsnp_statistics
diff vsnp_statistics.xml @ 4:a2f69b1598e0 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author | iuc |
---|---|
date | Fri, 10 Jun 2022 06:09:36 +0000 |
parents | b960f47c57a1 |
children |
line wrap: on
line diff
--- a/vsnp_statistics.xml Mon Dec 06 18:27:24 2021 +0000 +++ b/vsnp_statistics.xml Fri Jun 10 06:09:36 2022 +0000 @@ -3,13 +3,6 @@ <macros> <import>macros.xml</import> </macros> - <requirements> - <expand macro="biopython_requirement"/> - <expand macro="numpy_requirement"/> - <expand macro="openpyxl_requirement"/> - <expand macro="pandas_requirement"/> - <expand macro="xlrd_requirement"/> - </requirements> <command detect_errors="exit_code"><![CDATA[ #import re @@ -17,10 +10,14 @@ #set read1 = $input_type_cond.read1 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) ln -s '${read1}' '${read1_identifier}' && + #set read1_seqkit_stats = $input_type_cond.read1_seqkit_stats + #set read1_seqkit_fx2tab = $input_type_cond.read1_seqkit_fx2tab #if $input_type_cond.input_type == "pair": #set read2 = $input_type_cond.read2 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) ln -s '${read2}' '${read2_identifier}' && + #set read2_seqkit_stats = $input_type_cond.read2_seqkit_stats + #set read2_seqkit_fx2tab = $input_type_cond.read2_seqkit_fx2tab #end if #else: #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.reads_collection.element_identifier)) @@ -30,19 +27,23 @@ #set read2 = $input_type_cond.reads_collection.reverse #set read2_identifier = $identifier + '_R2' ln -s '${read2}' '${read2_identifier}' && + #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_stats_collection.element_identifier)) + #set read1_seqkit_stats = $input_type_cond.seqkit_stats_collection.forward + #set read2_seqkit_stats = $input_type_cond.seqkit_stats_collection.reverse + #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_fx2tab_collection.element_identifier)) + #set read1_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.forward + #set read2_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.reverse #end if python '$__tool_directory__/vsnp_statistics.py' --read1 '${read1_identifier}' -#if $input_type_cond.input_type in ["pair", "paired"]: - --read2 '${read2_identifier}' +--read1_seqkit_stats '$read1_seqkit_stats' +--read1_seqkit_fx2tab '$read1_seqkit_fx2tab' +#if $input_type_cond.input_type in ['pair', 'paired']: + --read2 '${read2_identifier}' + --read2_seqkit_stats '$read2_seqkit_stats' + --read2_seqkit_fx2tab '$read2_seqkit_fx2tab' #end if -#if $read1.is_of_type('fastqsanger.gz'): - --gzipped -#end if ---dbkey '$samtools_idxstats.metadata.dbkey' ---samtools_idxstats '$samtools_idxstats' ---vsnp_azc_metrics '$vsnp_azc_metrics' --output '$output' ]]></command> <inputs> @@ -53,18 +54,24 @@ <option value="pair">Paired reads in separate data sets</option> </param> <when value="single"> - <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Fastq file"/> + <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected Fastq file"/> + <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected Fastq file"/> </when> <when value="paired"> <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + <param name="seqkit_stats_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit statistics files"/> + <param name="seqkit_fx2tab_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit fx2tab files"/> </when> <when value="pair"> - <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> - <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Forward read fastq file"/> + <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Reverse read fastq file"/> + <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected forward read"/> + <param name="read2_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected reverse read"/> + <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected forward read"/> + <param name="read2_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected reverse read"/> </when> </conditional> - <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/> - <param name="vsnp_azc_metrics" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/> </inputs> <outputs> <data name="output" format="tabular"/> @@ -73,27 +80,21 @@ <!-- A single fastq file --> <test expect_num_outputs="1"> <param name="input_type" value="single"/> - <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> - <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> - <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> - <output name="output" ftype="tabular"> - <assert_contents> - <has_size value="332"/> - </assert_contents> - </output> + <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> + <param name="read1_seqkit_stats" value="r1_seqkit_stats1.tabular" ftype="tabular"/> + <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab1.tabular" ftype="tabular"/> + <output name="output" file="statistics_output1.tabular" ftype="tabular"/> </test> <!-- A set of paired fastq files --> <test expect_num_outputs="1"> <param name="input_type" value="pair"/> - <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> - <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> - <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> - <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> - <output name="output" ftype="tabular"> - <assert_contents> - <has_size value="500"/> - </assert_contents> - </output> + <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> + <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> + <param name="read1_seqkit_stats" value="r1_seqkit_stats2.tabular" ftype="tabular"/> + <param name="read2_seqkit_stats" value="r2_seqkit_stats2.tabular" ftype="tabular"/> + <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/> + <param name="read2_seqkit_fx2tab" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/> + <output name="output" file="statistics_output2.tabular" ftype="tabular"/> </test> <!-- A collection of paired fastq files --> <test expect_num_outputs="1"> @@ -104,23 +105,29 @@ <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> </collection> </param> - <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/> - <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/> - <output name="output" ftype="tabular"> - <assert_contents> - <has_size value="466"/> - </assert_contents> - </output> + <param name="seqkit_stats_collection"> + <collection type="paired"> + <element name="forward" value="r1_seqkit_stats2.tabular" ftype="tabular"/> + <element name="reverse" value="r2_seqkit_stats2.tabular" ftype="tabular"/> + </collection> + </param> + <param name="seqkit_fx2tab" value="seqkit_fx2tab3.tabular" ftype="tabular"/> + <param name="seqkit_fx2tab_collection"> + <collection type="paired"> + <element name="forward" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/> + <element name="reverse" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/> + </collection> + </param> + <output name="output" file="statistics_output3.tabular" ftype="tabular"/> </test> </tests> <help> **What it does** -Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them -to produce a tabular file containing statistics for each sample. The samples can be a single read, a single set of paired reads in -separate datasets or a collection of paired reads. The output statistics include reference, file size, mean read length, mean read quality, -reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth -of coverage and good SNP count. +Accepts fastq samples and SeqKit stats and fx2tab files produced from the samples and extracts information from them to produce a tabular +file containing statistics for each sample. The samples can be a single read, a single set of paired reads in separate datasets or a +collection of paired reas. The output statistics include file size, read count, sum / avg / max read length, Q1, Q2, Q3, sum gap, N50, +reads passing Q20 / Q30, and average read quality. </help> <expand macro="citations"/> </tool>