Mercurial > repos > iuc > vsnp_statistics
diff vsnp_statistics.xml @ 0:50f539302bf4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
author | iuc |
---|---|
date | Fri, 27 Aug 2021 11:46:52 +0000 |
parents | |
children | b960f47c57a1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vsnp_statistics.xml Fri Aug 27 11:46:52 2021 +0000 @@ -0,0 +1,205 @@ +<tool id="vsnp_statistics" name="vSNP: statistics" version="@WRAPPER_VERSION@.1+galaxy0" profile="@PROFILE@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="1.79">biopython</requirement> + <requirement type="package" version="1.21.1">numpy</requirement> + <requirement type="package" version="3.0.7">openpyxl</requirement> + <requirement type="package" version="1.3.0">pandas</requirement> + <requirement type="package" version="2.0.1">xlrd</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +#import re +#set input_idxstats_dir = 'input_idxstats' +#set input_metrics_dir = 'input_metrics' +#set input_reads_dir = 'input_reads' +mkdir -p $input_idxstats_dir && +mkdir -p $input_metrics_dir && +mkdir -p $input_reads_dir && + +#if $input_type_cond.input_type == 'single_files': + #set read1 = $input_type_cond.read_type_cond.read1 + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) + ln -s '${read1}' '${read1_identifier}' && + #if $input_type_cond.read_type_cond.read_type == 'pair': + #set read2 = $input_type_cond.read_type_cond.read2 + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) + ln -s '${read2}' '${read2_identifier}' && + #else: + #set read2 = None + #end if +#else: + #if $input_type_cond.collection_type_cond.collection_type == 'single': + #for $i in $input_type_cond.collection_type_cond.reads_collection: + #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) + ln -s '${i.file_name}' '$input_reads_dir/${identifier}' && + #end for + #else: + #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward'] + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name)) + ln -s '${read1}' '$input_reads_dir/${read1_identifier}' && + #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse'] + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name)) + ln -s '${read2}' '$input_reads_dir/${read2_identifier}' && + #end if + #for $i in $input_type_cond.samtools_idxstats: + #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) + ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' && + #end for + #for $i in $input_type_cond.vsnp_azc: + #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) + ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' && + #end for +#end if + +python '$__tool_directory__/vsnp_statistics.py' +#if $input_type_cond.input_type == 'single_files': + --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey' + #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'): + --gzipped + #end if + --read1 '${read1_identifier}' + #if $input_type_cond.read_type_cond.read_type == 'pair': + --read2 '${read2_identifier}' + #end if + --samtools_idxstats '$input_type_cond.samtools_idxstats' + --vsnp_azc '$input_type_cond.vsnp_azc' +#else: + --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey' + #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'): + --gzipped + #end if + #if $input_type_cond.collection_type_cond.collection_type == 'paired': + --list_paired + #end if + --input_idxstats_dir '$input_idxstats_dir' + --input_metrics_dir '$input_metrics_dir' + --input_reads_dir '$input_reads_dir' +#end if +--output '$output' +]]></command> + <inputs> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> + <option value="single_files" selected="true">Single files</option> + <option value="collections">Collections of files</option> + </param> + <when value="single_files"> + <conditional name="read_type_cond"> + <param name="read_type" type="select" label="Choose the read type"> + <option value="single" selected="true">Single reads</option> + <option value="pair">Paired reads</option> + </param> + <when value="single"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + </when> + <when value="pair"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> + </when> + </conditional> + <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/> + <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/> + </when> + <when value="collections"> + <conditional name="collection_type_cond"> + <param name="collection_type" type="select" label="Collections of single reads or paired reads?"> + <option value="single" selected="true">Single reads</option> + <option value="paired">Paired reads in separate datasets</option> + </param> + <when value="single"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/> + </when> + <when value="paired"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + </when> + </conditional> + <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/> + <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <!-- A single fastq file --> + <test expect_num_outputs="1"> + <param name="input_type" value="single_files"/> + <param name="read_type" value="single"/> + <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> + <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> + <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> + <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/> + </test> + <!-- A set of paired fastq files --> + <test expect_num_outputs="1"> + <param name="input_type" value="single_files"/> + <param name="read_type" value="pair"/> + <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> + <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> + <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> + <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> + <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/> + </test> + <!-- A collection of SE fastq files --> + <test expect_num_outputs="1"> + <param name="input_type" value="collections"/> + <param name="read_type" value="single"/> + <param name="reads_collection"> + <collection type="list"> + <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/> + <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/> + </collection> + </param> + <param name="samtools_idxstats"> + <collection type="list"> + <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/> + <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/> + </collection> + </param> + <param name="vsnp_azc"> + <collection type="list"> + <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/> + <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/> + </collection> + </param> + <output name="output" file="vsnp_statistics3.tabular" ftype="tabular"/> + </test> + <!-- A collection of PE fastq files --> + <test expect_num_outputs="1"> + <param name="input_type" value="collections"/> + <param name="collection_type" value="paired"/> + <param name="reads_collection"> + <collection type="paired"> + <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> + <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> + </collection> + </param> + <param name="samtools_idxstats"> + <collection type="list"> + <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/> + </collection> + </param> + <param name="vsnp_azc"> + <collection type="list"> + <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/> + </collection> + </param> + <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/> + </test> + </tests> + <help> +**What it does** + +Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them +to produce an Excel spreadsheet containing statistics for each sample. The samples can be single or paired reads, and all associated inputs +can be either single files or collections of files. The output statistics include reference, file size, mean read length, mean read quality, +reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth +of coverage and good SNP count. + </help> + <expand macro="citations"/> +</tool> +