diff vsnp_statistics.xml @ 0:50f539302bf4 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 92f46d4bb55b582f05ac3c4b094307f114cbf98f"
author iuc
date Fri, 27 Aug 2021 11:46:52 +0000
parents
children b960f47c57a1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vsnp_statistics.xml	Fri Aug 27 11:46:52 2021 +0000
@@ -0,0 +1,205 @@
+<tool id="vsnp_statistics" name="vSNP: statistics" version="@WRAPPER_VERSION@.1+galaxy0" profile="@PROFILE@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.79">biopython</requirement>
+        <requirement type="package" version="1.21.1">numpy</requirement>
+        <requirement type="package" version="3.0.7">openpyxl</requirement>
+        <requirement type="package" version="1.3.0">pandas</requirement>
+        <requirement type="package" version="2.0.1">xlrd</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+#import re
+#set input_idxstats_dir = 'input_idxstats'
+#set input_metrics_dir = 'input_metrics'
+#set input_reads_dir = 'input_reads'
+mkdir -p $input_idxstats_dir &&
+mkdir -p $input_metrics_dir &&
+mkdir -p $input_reads_dir &&
+
+#if $input_type_cond.input_type  == 'single_files':
+    #set read1 = $input_type_cond.read_type_cond.read1
+    #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
+    ln -s '${read1}' '${read1_identifier}' &&
+    #if $input_type_cond.read_type_cond.read_type == 'pair':
+        #set read2 = $input_type_cond.read_type_cond.read2
+        #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
+        ln -s '${read2}' '${read2_identifier}' &&
+    #else:
+        #set read2 = None
+    #end if
+#else:
+    #if $input_type_cond.collection_type_cond.collection_type == 'single':
+        #for $i in $input_type_cond.collection_type_cond.reads_collection:
+            #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
+            ln -s '${i.file_name}' '$input_reads_dir/${identifier}' &&
+        #end for
+    #else:
+        #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward']
+        #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name))
+        ln -s '${read1}' '$input_reads_dir/${read1_identifier}' &&
+        #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse']
+        #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name))
+        ln -s '${read2}' '$input_reads_dir/${read2_identifier}' &&
+    #end if
+    #for $i in $input_type_cond.samtools_idxstats:
+        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
+        ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' &&
+    #end for
+    #for $i in $input_type_cond.vsnp_azc:
+        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
+        ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' &&
+    #end for
+#end if
+
+python '$__tool_directory__/vsnp_statistics.py'
+#if $input_type_cond.input_type == 'single_files':
+    --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey'
+    #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'):
+        --gzipped
+    #end if
+    --read1 '${read1_identifier}'
+    #if $input_type_cond.read_type_cond.read_type == 'pair':
+      --read2 '${read2_identifier}'
+    #end if
+    --samtools_idxstats '$input_type_cond.samtools_idxstats'
+    --vsnp_azc '$input_type_cond.vsnp_azc'
+#else:
+    --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey'
+    #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'):
+        --gzipped
+    #end if
+    #if $input_type_cond.collection_type_cond.collection_type == 'paired':
+        --list_paired
+    #end if
+    --input_idxstats_dir '$input_idxstats_dir'
+    --input_metrics_dir '$input_metrics_dir'
+    --input_reads_dir '$input_reads_dir'
+#end if
+--output '$output'
+]]></command>
+    <inputs>
+        <conditional name="input_type_cond">
+            <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
+                <option value="single_files" selected="true">Single files</option>
+                <option value="collections">Collections of files</option>
+            </param>
+            <when value="single_files">
+                <conditional name="read_type_cond">
+                    <param name="read_type" type="select" label="Choose the read type">
+                        <option value="single" selected="true">Single reads</option>
+                         <option value="pair">Paired reads</option>
+                    </param>
+                     <when value="single">
+                        <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+                    </when>
+                    <when value="pair">
+                        <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+                        <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
+                    </when>
+                </conditional>
+                <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
+                <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
+            </when>
+            <when value="collections">
+                <conditional name="collection_type_cond">
+                    <param name="collection_type" type="select" label="Collections of single reads or paired reads?">
+                        <option value="single" selected="true">Single reads</option>
+                        <option value="paired">Paired reads in separate datasets</option>
+                    </param>
+                    <when value="single">
+                        <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
+                    </when>
+                    <when value="paired">
+                        <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+                    </when>
+                </conditional>
+                <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
+                <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <!-- A single fastq file -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="single_files"/>
+            <param name="read_type" value="single"/>
+            <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
+            <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/>
+            <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
+            <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/>
+        </test>
+        <!-- A set of paired fastq files -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="single_files"/>
+            <param name="read_type" value="pair"/>
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
+            <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
+            <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/>
+            <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
+            <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/>
+        </test>
+        <!-- A collection of SE fastq files -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="collections"/>
+            <param name="read_type" value="single"/>
+            <param name="reads_collection">
+                <collection type="list">
+                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/>
+                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="samtools_idxstats">
+                <collection type="list">
+                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/>
+                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="vsnp_azc">
+                <collection type="list">
+                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/>
+                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/>
+                </collection>
+            </param>
+            <output name="output" file="vsnp_statistics3.tabular" ftype="tabular"/>
+        </test>
+        <!-- A collection of PE fastq files -->
+        <test expect_num_outputs="1">
+            <param name="input_type" value="collections"/>
+            <param name="collection_type" value="paired"/>
+            <param name="reads_collection">
+                <collection type="paired">
+                    <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+                    <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
+                </collection>
+            </param>
+            <param name="samtools_idxstats">
+                <collection type="list">
+                    <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="vsnp_azc">
+                <collection type="list">
+                    <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/>
+                </collection>
+            </param>
+            <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them
+to produce an Excel spreadsheet containing statistics for each sample.  The samples can be single or paired reads, and all associated inputs
+can be either single files or collections of files.  The output statistics include reference, file size, mean read length, mean read quality,
+reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth
+of coverage and good SNP count.
+    </help>
+    <expand macro="citations"/>
+</tool>
+