diff purge_dups.xml @ 3:76d4cbefff85 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit 5d56aa02b0f905507e1d98a2d74f0629b7591cd3"
author iuc
date Mon, 14 Jun 2021 18:01:05 +0000
parents 17b378303f2d
children a315c25dc813
line wrap: on
line diff
--- a/purge_dups.xml	Tue Apr 27 20:48:51 2021 +0000
+++ b/purge_dups.xml	Mon Jun 14 18:01:05 2021 +0000
@@ -2,13 +2,72 @@
     <description>and haplotigs in an assembly based on read depth (purge_dups)</description>
     <macros>
         <token name="@TOOL_VERSION@">1.2.5</token>
-        <token name="@VERSION_SUFFIX@">2</token>
+        <token name="@VERSION_SUFFIX@">3</token>
+        <xml name="trimmers">
+            <section name="section_hist" title="Histogram plot options" >
+                <!--<param name="cutoffs_his" type="data" optional="true" format="txt" label="Read depth cutoffs file" />-->
+                <param argument="--ymin" type="integer" optional="true" min="0" label="Specify a minimum for the Y axis"/>
+                <param argument="--ymax" type="integer" optional="true" label="Specify a maximum for the Y axis"/>
+                <param argument="--xmin" type="integer" optional="true" min="0" label="Specify a minimum for the X axis"/>
+                <param argument="--xmax" type="integer" optional="true" label="Specify a maximum for the X axis"/>
+                <param argument="--title" type="text" value="Read depth histogram plot" label="Histogram title"/>
+            </section>
+        </xml>
+        <token name="@HIST_PLOT@"><![CDATA[
+            python '$__tool_directory__/hist_plot.py' 
+            --cutoffs cutoffs.tsv
+            #if $function_select.section_hist.ymin
+                --ymin $function_select.section_hist.ymin
+            #end if
+            #if $function_select.section_hist.ymax
+                --ymax $function_select.section_hist.ymax
+            #end if
+            #if $function_select.section_hist.xmin
+                --xmin $function_select.section_hist.xmin
+            #end if
+            #if $function_select.section_hist.xmax
+                --xmax $function_select.section_hist.xmax
+            #end if
+            #if $function_select.section_hist.title
+                --title '${function_select.section_hist.title}'
+            #end if
+            depth.stat hist.png
+        ]]></token>
+        <token name="@CALCUTS@"><![CDATA[
+            calcuts
+            #if $function_select.section_calcuts.min_depth:
+                -f $function_select.section_calcuts.min_depth
+            #end if
+            #if $function_select.section_calcuts.low_depth:
+                -l $function_select.section_calcuts.low_depth
+            #end if
+            #if $function_select.section_calcuts.transition:
+                -m $function_select.section_calcuts.transition
+            #end if
+            #if $function_select.section_calcuts.upper_depth:
+                -u $function_select.section_calcuts.upper_depth
+            #end if
+            $function_select.section_calcuts.ploidy
+        ]]></token>
+        <xml name="calcuts">
+            <section name="section_calcuts" title="Calcuts options">
+                <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/>
+                <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/>
+                <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/>
+                <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/>
+                <param name="ploidy" argument="-d" type="select" label="Ploidy">
+                    <option value="-d 0" selected="true">Diploid [0]</option>
+                    <option value="-d 1">Haploid [1]</option>
+                </param>
+            </section>
+        </xml>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement>
+        <requirement type="package" version="3.4.2">matplotlib-base</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        #if $function_select.functions == "purge_dups":
+        #if $function_select.functions == 'purge_dups':
             #for $i, $file in enumerate($function_select.input):
                 #if $file.is_of_type("paf"):
                     gzip -c '${file}' > '${i}.gz' &&
@@ -18,10 +77,10 @@
             #end for
             purge_dups
             #if $function_select.coverage:
-                -c '$function_select.coverage'
+                -c '${function_select.coverage}'
             #end if
             #if $function_select.cutoffs:
-                -T '$function_select.cutoffs'
+                -T '${function_select.cutoffs}'
             #end if
             #if $function_select.min_bad:
                 -f $function_select.min_bad
@@ -38,7 +97,7 @@
             #if $function_select.max_gap:
                 -M $function_select.max_gap
             #end if
-            #if $function_select.double_chain.chaining_rounds == "two":
+            #if $function_select.double_chain.chaining_rounds == 'two':
                 -2
                 #if $function_select.double_chain.max_gap_2:
                     -G $function_select.double_chain.max_gap_2
@@ -54,15 +113,15 @@
                 '${i}.gz'
             #end for
             > dups.bed 2> purge_dups.log
-        #else if $function_select.functions == "split_fa":
+        #else if $function_select.functions == 'split_fa':
             split_fa
             #if $function_select.split:
                 -n $function_select.split
             #end if
-            '$function_select.input' > split.fasta
-        #else if $function_select.functions == "pbcstat":
+            '${function_select.input}' > split.fasta
+        #else if $function_select.functions == 'pbcstat':
             #for $i, $file in enumerate($function_select.input):
-                #if $file.is_of_type("paf"):
+                #if $file.is_of_type('paf'):
                     gzip -c '${file}' > '${i}.gz' &&
                 #else
                     ln -s '${file}' '${i}.gz' &&
@@ -82,10 +141,15 @@
                 -l $function_select.flank
             #end if
             $function_select.primary_alignments
+ 
             #for $i, $file in enumerate($function_select.input):
                 '${i}.gz'
-            #end for 
-        #else if $function_select.functions == "ngscstat":
+            #end for
+            && mv PB.stat depth.stat
+            && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log
+            && @HIST_PLOT@
+
+        #else if $function_select.functions == 'ngscstat':
             ngscstat
             #if $function_select.min_align_qual:
                 -q $function_select.min_align_qual
@@ -96,24 +160,15 @@
             #if $function_select.max_insert:
                 -L $function_select.max_insert
             #end if
-            '$function_select.input'
-        #else if $function_select.functions == "calcuts":
-            calcuts
-            #if $function_select.min_depth:
-                -f $function_select.min_depth
-            #end if
-            #if $function_select.low_depth:
-                -l $function_select.low_depth
-            #end if
-            #if $function_select.transition:
-                -m $function_select.transition
-            #end if
-            #if $function_select.upper_depth:
-                -u $function_select.upper_depth
-            #end if
-            $function_select.ploidy
-            '$function_select.input' > cutoffs.tsv 2>calcuts.log
-        #else if $function_select.functions == "get_seqs":
+            '${function_select.input}'
+            && mv TX.stat depth.stat
+            && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log
+            && @HIST_PLOT@ 
+
+        #else if $function_select.functions == 'calcuts':
+            @CALCUTS@ '${function_select.input}' > cutoffs.tsv 2>calcuts.log
+
+        #else if $function_select.functions == 'get_seqs':
             get_seqs
             $function_select.coverage
             $function_select.haplotigs
@@ -128,18 +183,18 @@
             #if $function_select.min_gap:
                 -g $function_select.min_gap
             #end if
-            '$function_select.bed_input' '$function_select.fasta_input'
+            '${function_select.bed_input}' '${function_select.fasta_input}'
         #end if
     ]]></command>
     <inputs>
         <conditional name="function_select">
             <param type="select" name="functions" label="Select the purge_dups function">
-                <option value="purge_dups">purge haplotigs and overlaps for an assembly</option>
-                <option value="split_fa">split FASTA file by 'N's</option>
-                <option value="pbcstat">create read depth histogram and base-level read depth for pacbio data</option>
-                <option value="ngscstat">create read depth histogram and base-level read detph for illumina data</option>
-                <option value="calcuts">calculate coverage cutoffs</option>
-                <option value="get_seqs">obtain seqeuences after purging</option>
+                <option value="purge_dups">Purge haplotigs and overlaps for an assembly (purge_dups)</option>
+                <option value="split_fa">Split FASTA file by 'N's (split_fa)</option>
+                <option value="pbcstat">Calculate coverage cutoff and create read depth histogram and base-levelread depth for PacBio data (calcuts+pbcstats)</option>
+                <option value="ngscstat">Calculate coverage cutoff and create read depth histogram and base-level read detph for Illumina data (calcuts+ngscstat)</option>
+                <option value="calcuts">calculate coverage cutoffs (calcuts)</option>
+                <option value="get_seqs">Obtain seqeuences after purging (get_seqs)</option>
             </param>
             <when value="purge_dups">
                 <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/>
@@ -174,6 +229,8 @@
                 <param name="min_map_qual" type="integer"  argument="-q" optional="true" label="Minimum mapping quality"/>
                 <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" />
                 <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" />
+                <expand macro="calcuts" />
+                <expand macro="trimmers"/>
             </when>
             <when value="ngscstat">
                 <param name="input" type="data" format="bam" label="BAM input file"/>
@@ -181,18 +238,15 @@
                 <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? -->
                 <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> -->
                 <param name="max_insert" type="integer"  argument="-L" optional="true" label="Maximum insert size"/>
+                <expand macro="calcuts" />
+                <expand macro="trimmers"/>
             </when>
+
             <when value="calcuts">
                 <param name="input" type="data" format="tabular" label="STAT input file"/>
-                <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/>
-                <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/>
-                <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/>
-                <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/>
-                <param name="ploidy" argument="-d" type="select" label="Ploidy">
-                    <option value="-d 0" selected="true">Diploid [0]</option>
-                    <option value="-d 1">Haploid [1]</option>
-                </param>
+                <expand macro="calcuts" />
             </when>
+
             <when value="get_seqs">
                 <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/>
                 <param name="bed_input" type="data" format="bed" label="Bed input file"/>
@@ -222,8 +276,8 @@
         <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file">
             <filter>function_select['functions'] == 'ngscstat'</filter>
         </data>
-        <data name="ngscstat_stat" format="tabular" from_work_dir="TX.stat"  label="${tool.name} on ${on_string}: ngscstat stat file">
-            <filter>function_select['functions'] == 'ngscstat'</filter>
+        <data name="stat_file" format="tabular" from_work_dir="depth.stat"  label="${tool.name} on ${on_string}: stat file">
+            <filter>function_select['functions'] == 'ngscstat' or function_select['functions'] == 'pbcstat'</filter>
         </data>
         <!-- Pbcstat -->
         <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov"  label="${tool.name} on ${on_string}: pbcstat base coverage file">
@@ -232,15 +286,17 @@
         <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file">
             <filter>function_select['functions'] == 'pbcstat'</filter>
         </data>
-        <data name="pbcstat_stat" format="tabular" from_work_dir="PB.stat" label="${tool.name} on ${on_string}: stat file">
-            <filter>function_select['functions'] == 'pbcstat'</filter>
+
+        <data name="hist" format="png" from_work_dir="hist.png" label="${tool.name} on ${on_string}: histogram plot">
+            <filter>function_select['functions'] == 'pbcstat' or function_select['functions'] == 'ngscstat'</filter>
         </data>
+
         <!-- Calcuts -->
         <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file">
-            <filter>function_select['functions'] == 'calcuts'</filter>
+            <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter>
         </data>
         <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file">
-            <filter>function_select['functions'] == 'calcuts'</filter>
+            <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter>
         </data>
         <!-- Purge dups -->
         <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file">
@@ -311,7 +367,7 @@
             <output name="split_fasta" value="split_out.fasta"/>
         </test>
         <!-- pbcstat -->
-        <test expect_num_outputs="3">
+        <test expect_num_outputs="6">
             <conditional name="function_select">
                 <param name="functions" value="pbcstat"/>
                 <param name="input" value="test.paf"/>
@@ -320,12 +376,22 @@
                 <param name="min_map_qual" value="1"/>
                 <param name="flank" value="1"/>
                 <param name="primary_alignments" value="-p"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
             </conditional>
+            <output name="calcuts_tab" value="calcuts_out.tsv"/>
             <output name="pbcstat_cov" value="out.cov"/>
             <output name="pbcstat_wig" value="out.wig"/>
+            <output name="stat_file" value="pbcstats.tabular"/>
+            <output name="hist" value="hist.png" ftype="png" compare="sim_size"/>
         </test>
         <!-- pbcstat gzip -->
-        <test expect_num_outputs="3">
+        <test expect_num_outputs="6">
             <conditional name="function_select">
                 <param name="functions" value="pbcstat"/>
                 <param name="input" value="test.paf.gz" ftype="paf.gz"/>
@@ -334,39 +400,68 @@
                 <param name="min_map_qual" value="1"/>
                 <param name="flank" value="1"/>
                 <param name="primary_alignments" value="-p"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
             </conditional>
+            <output name="calcuts_tab" value="calcuts_out.tsv"/>
             <output name="pbcstat_cov" value="out.cov"/>
             <output name="pbcstat_wig" value="out.wig"/>
         </test>
-                <!-- Pbcstat multiple input -->
-        <test expect_num_outputs="3">
+        <!-- Pbcstat multiple input -->
+        <test expect_num_outputs="6">
             <conditional name="function_select">
                 <param name="functions" value="pbcstat"/>
                 <param name="input" value="test.paf,test2.paf.gz"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
             </conditional>
+            <output name="calcuts_tab" value="calcuts_out.tsv"/>
             <output name="pbcstat_cov" value="out2.cov"/>
-            <output name="pbcstat_wig" value="out2.wig"/>        
+            <output name="stat_file" value="pbcstats2.tabular"/>
+            <output name="pbcstat_wig" value="out2.wig"/>
         </test>
         <!-- ngscstat -->
-        <test expect_num_outputs="2">
+        <test expect_num_outputs="5">
             <conditional name="function_select">
                 <param name="functions" value="ngscstat"/>
                 <param name="input" value="test.bam"/>
                 <param name="min_align_qual" value="10"/>
                 <param name="max_insert" value="100"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
             </conditional>
+            <output name="calcuts_tab" value="calcuts_out.tsv"/>
             <output name="ngscstat_cov" value="ngsc_out.cov"/>
+            <output name="stat_file" value="tx_stats.tabular"/> 
+            <output name="hist" value="hist.png" ftype="png" compare="sim_size"/>
         </test>
         <!-- Calcuts -->
         <test expect_num_outputs="2">
             <conditional name="function_select">
                 <param name="functions" value="calcuts"/>
                 <param name="input" value="test.stat"/>
-                <param name="min_depth" value="0.01"/>
-                <param name="low_depth" value="1"/>
-                <param name="transition" value="1"/>
-                <param name="upper_depth" value="100"/>
-                <param name="ploidy" value="-d 0"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
             </conditional>
             <output name="calcuts_tab" value="calcuts_out.tsv"/>
         </test>
@@ -386,6 +481,35 @@
             </conditional>
             <output name="get_seqs_purged" value="purged_out.fa"/>
         </test>
+        <!-- pbcstat histogram options-->
+        <test expect_num_outputs="6">
+            <conditional name="function_select">
+                <param name="functions" value="pbcstat"/>
+                <param name="input" value="test.paf"/>
+                <param name="max_cov" value="1000"/>
+                <param name="min_map_ratio" value="0.01"/>
+                <param name="min_map_qual" value="1"/>
+                <param name="flank" value="1"/>
+                <param name="primary_alignments" value="-p"/>
+                <section name="section_calcuts">
+                    <param name="min_depth" value="0.01"/>
+                    <param name="low_depth" value="1"/>
+                    <param name="transition" value="1"/>
+                    <param name="upper_depth" value="100"/>
+                    <param name="ploidy" value="-d 0"/>
+                </section>
+                <section name="section_hist">
+                    <param name="ymax" value="100"/>
+                    <param name="xmax" value="100"/>
+                    <param name="cutoffs_his" value="calcuts_out.tsv"/>
+                </section>
+            </conditional>
+            <output name="calcuts_tab" value="calcuts_out.tsv"/>
+            <output name="pbcstat_cov" value="out_hist_options.cov"/>
+            <output name="pbcstat_wig" value="out_hist_options.wig"/>
+            <output name="stat_file" value="pbcstats_hist_options.tabular"/>
+            <output name="hist" value="hist_options.png" ftype="png" compare="sim_size"/>
+        </test>
     </tests>
     <help><![CDATA[
         .. class:: infomark