diff vsnp_statistics.xml @ 4:a2f69b1598e0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author iuc
date Fri, 10 Jun 2022 06:09:36 +0000
parents b960f47c57a1
children
line wrap: on
line diff
--- a/vsnp_statistics.xml	Mon Dec 06 18:27:24 2021 +0000
+++ b/vsnp_statistics.xml	Fri Jun 10 06:09:36 2022 +0000
@@ -3,13 +3,6 @@
     <macros>
         <import>macros.xml</import>
     </macros>
-    <requirements>
-        <expand macro="biopython_requirement"/>
-        <expand macro="numpy_requirement"/>
-        <expand macro="openpyxl_requirement"/>
-        <expand macro="pandas_requirement"/>
-        <expand macro="xlrd_requirement"/>
-    </requirements>
     <command detect_errors="exit_code"><![CDATA[
 #import re
 
@@ -17,10 +10,14 @@
     #set read1 = $input_type_cond.read1
     #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
     ln -s '${read1}' '${read1_identifier}' &&
+    #set read1_seqkit_stats = $input_type_cond.read1_seqkit_stats
+    #set read1_seqkit_fx2tab = $input_type_cond.read1_seqkit_fx2tab
     #if $input_type_cond.input_type == "pair":
         #set read2 = $input_type_cond.read2
         #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
         ln -s '${read2}' '${read2_identifier}' &&
+        #set read2_seqkit_stats = $input_type_cond.read2_seqkit_stats
+        #set read2_seqkit_fx2tab = $input_type_cond.read2_seqkit_fx2tab
     #end if
 #else:
     #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.reads_collection.element_identifier))
@@ -30,19 +27,23 @@
     #set read2 = $input_type_cond.reads_collection.reverse
     #set read2_identifier = $identifier + '_R2'
     ln -s '${read2}' '${read2_identifier}' &&
+    #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_stats_collection.element_identifier))
+    #set read1_seqkit_stats = $input_type_cond.seqkit_stats_collection.forward
+    #set read2_seqkit_stats = $input_type_cond.seqkit_stats_collection.reverse
+    #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_fx2tab_collection.element_identifier))
+    #set read1_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.forward
+    #set read2_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.reverse
 #end if
 
 python '$__tool_directory__/vsnp_statistics.py'
 --read1 '${read1_identifier}'
-#if $input_type_cond.input_type in ["pair", "paired"]:
-  --read2 '${read2_identifier}'
+--read1_seqkit_stats '$read1_seqkit_stats'
+--read1_seqkit_fx2tab '$read1_seqkit_fx2tab'
+#if $input_type_cond.input_type in ['pair', 'paired']:
+    --read2 '${read2_identifier}'
+    --read2_seqkit_stats '$read2_seqkit_stats'
+    --read2_seqkit_fx2tab '$read2_seqkit_fx2tab'
 #end if
-#if $read1.is_of_type('fastqsanger.gz'):
-    --gzipped
-#end if
---dbkey '$samtools_idxstats.metadata.dbkey'
---samtools_idxstats '$samtools_idxstats'
---vsnp_azc_metrics '$vsnp_azc_metrics'
 --output '$output'
 ]]></command>
     <inputs>
@@ -53,18 +54,24 @@
                 <option value="pair">Paired reads in separate data sets</option>
             </param>
             <when value="single">
-                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Fastq file"/>
+                <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected Fastq file"/>
+                <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected Fastq file"/>
             </when>
             <when value="paired">
                 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+                <param name="seqkit_stats_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit statistics files"/>
+                <param name="seqkit_fx2tab_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit fx2tab files"/>
             </when>
             <when value="pair">
-                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
-                <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Forward read fastq file"/>
+                <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Reverse read fastq file"/>
+                <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected forward read"/>
+                <param name="read2_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected reverse read"/>
+                <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected forward read"/>
+                <param name="read2_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected reverse read"/>
             </when>
         </conditional>
-        <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
-        <param name="vsnp_azc_metrics" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
     </inputs>
     <outputs>
         <data name="output" format="tabular"/>
@@ -73,27 +80,21 @@
         <!-- A single fastq file -->
         <test expect_num_outputs="1">
             <param name="input_type" value="single"/>
-            <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
-            <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/>
-            <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
-            <output name="output" ftype="tabular">
-                <assert_contents>
-                    <has_size value="332"/>
-                </assert_contents>
-            </output>
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="read1_seqkit_stats" value="r1_seqkit_stats1.tabular" ftype="tabular"/>
+            <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab1.tabular" ftype="tabular"/>
+            <output name="output" file="statistics_output1.tabular" ftype="tabular"/>
         </test>
         <!-- A set of paired fastq files -->
         <test expect_num_outputs="1">
             <param name="input_type" value="pair"/>
-            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
-            <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
-            <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/>
-            <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
-            <output name="output" ftype="tabular">
-                <assert_contents>
-                    <has_size value="500"/>
-                </assert_contents>
-            </output>
+            <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
+            <param name="read1_seqkit_stats" value="r1_seqkit_stats2.tabular" ftype="tabular"/>
+            <param name="read2_seqkit_stats" value="r2_seqkit_stats2.tabular" ftype="tabular"/>
+            <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/>
+            <param name="read2_seqkit_fx2tab" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/>
+            <output name="output" file="statistics_output2.tabular" ftype="tabular"/>
         </test>
         <!-- A collection of paired fastq files -->
         <test expect_num_outputs="1">
@@ -104,23 +105,29 @@
                     <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
                 </collection>
             </param>
-            <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/>
-            <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/>
-            <output name="output" ftype="tabular">
-                <assert_contents>
-                    <has_size value="466"/>
-                </assert_contents>
-            </output>
+            <param name="seqkit_stats_collection">
+                <collection type="paired">
+                    <element name="forward" value="r1_seqkit_stats2.tabular" ftype="tabular"/>
+                    <element name="reverse" value="r2_seqkit_stats2.tabular" ftype="tabular"/>
+                </collection>
+            </param>
+            <param name="seqkit_fx2tab" value="seqkit_fx2tab3.tabular" ftype="tabular"/>
+            <param name="seqkit_fx2tab_collection">
+                <collection type="paired">
+                    <element name="forward" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/>
+                    <element name="reverse" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/>
+                </collection>
+            </param>
+            <output name="output" file="statistics_output3.tabular" ftype="tabular"/>
         </test>
     </tests>
     <help>
 **What it does**
 
-Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them
-to produce a tabular file containing statistics for each sample.  The samples can be a single read, a single set of paired reads in
-separate datasets or a collection of paired reads.  The output statistics include reference, file size, mean read length, mean read quality,
-reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth
-of coverage and good SNP count.
+Accepts fastq samples and SeqKit stats and fx2tab files produced from the samples and extracts information from them to produce a tabular
+file containing statistics for each sample.  The samples can be a single read, a single set of paired reads in separate datasets or a
+collection of paired reas.  The output statistics include file size, read count, sum / avg / max read length, Q1, Q2, Q3, sum gap, N50,
+reads passing Q20 / Q30, and average read quality.
     </help>
     <expand macro="citations"/>
 </tool>