Repository 'pbccs'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxy-australia/pbccs

Changeset 0:f159c3da2ffc (2022-03-18)
Commit message:
"planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/pbccs commit 5ba0af2de1e1ce7b7a8ed3eb4042f5fec366c234"
added:
README.md
macros.xml
pbccs.xml
test-data/input.subreads.bam
test-data/log.txt
test-data/output.bam
test-data/report.txt
b
diff -r 000000000000 -r f159c3da2ffc README.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md Fri Mar 18 02:10:52 2022 +0000
[
@@ -0,0 +1,7 @@
+ccs combines multiple subreads of the same SMRTbell molecule using a statistical model to produce one highly accurate consensus sequence, also called a HiFi read, along with base quality values. This tool powers the Circular Consensus Sequencing workflow in SMRT Link.
+
+To process the old version of bam generated by the RSII chemistry of PacBio. This Galaxy wrapper is using ccs version 3.4.1.  
+
+The latest version of ccs (pbccs) is 6.2.0 and can be found on [conda](https://anaconda.org/bioconda/pbccs) 
+
+See [doc](https://ccs.how/) for more details.
b
diff -r 000000000000 -r f159c3da2ffc macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Mar 18 02:10:52 2022 +0000
[
@@ -0,0 +1,29 @@
+
+<macros>
+    <token name="@TOOL_VERSION@">3.4.1</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">pbccs</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>@HEADLESS@ ccs --version</version_command>
+    </xml>
+    <token name="@HEADLESS@"><![CDATA[export QT_QPA_PLATFORM='offscreen' &&]]></token>
+    <token name="@PBCCS_OVERVIEW@">
+
+**Generate Highly Accurate Single-Molecule Consensus Reads (CCS) Overview**
+
+CCS combines multiple subreads of the same SMRTbell molecule using a statistical model to produce one highly accurate consensus sequence, also called a HiFi read, along with base quality values. This tool powers the Circular Consensus Sequencing workflow in SMRT Link.
+
+
+CCS works with PacBio subreads bam (subreads.bam) files. For more information about this file format, see here_
+
+.. _here: https://ccs.how/how-does-ccs-work.html
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1016/j.gpb.2015.08.002</citation>
+        </citations>
+    </xml>
+</macros>
b
diff -r 000000000000 -r f159c3da2ffc pbccs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pbccs.xml Fri Mar 18 02:10:52 2022 +0000
[
@@ -0,0 +1,143 @@
+<tool id="pbccs" name="CCS" version="0.1.0" python_template_version="3.5">
+    <description>Generate accurate consensus sequences from subreads</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"> <![CDATA[
+    mkdir -p ./tmp &&
+    cp '$input_file' ./tmp/input.bam &&
+    cd ./tmp/ &&
+    ccs
+    -j 4
+    #if $input_filter_options.minLength:
+        --minLength '$input_filter_options.minLength'
+    #end if
+    #if $input_filter_options.maxLength:
+        --maxLength '$input_filter_options.maxLength'
+    #end if
+    #if $input_filter_options.minPasses:
+        --minPasses '$input_filter_options.minPasses'
+    #end if
+    #if $input_filter_options.minIdentity:
+        --minIdentity '$input_filter_options.minIdentity'
+    #end if
+    #if $input_filter_options.zmws:
+        --zmws '$input_filter_options.zmws'
+    #end if
+    #if $model_override_options.modelPath:
+        --modelPath '$model_override_options.modelPath'
+    #end if
+    #if $model_override_options.modelSpec:
+        --modelSpec '$model_override_options.modelSpec'
+    #end if
+    #if $output_filter_options.minPredictedAccuracy:
+        --minPredictedAccuracy '$output_filter_options.minPredictedAccuracy'
+    #end if
+    #if $output_filter_options.minReadScore:
+        --minReadScore '$output_filter_options.minReadScore'
+    #end if
+    #if $output_filter_options.maxDropFraction:
+        --maxDropFraction  '$output_filter_options.maxDropFraction'
+    #end if
+    #if $processing_options.byStrand:
+        --byStrand '$processing_options.byStrand'
+    #end if
+    #if $processing_options.noPolish:
+ --noPolish '$processing_options.noPolish'
+    #end if
+    #if $processing_options.richQVs:
+        --richQVs '$processing_options.richQVs'
+    #end if 
+    #if $if_log.log_options == "yes":
+        --logFile  '$log_output'
+        --logLevel '$if_log.loglevel'
+    #end if
+    #if $output_options.reportFile:
+        --reportFile ccs_report.txt
+    #end if
+    input.bam
+    output.bam 
+    && mv output.bam '$bam_output'
+    && mv ccs_report.txt '$report_output' 
+    2>&1
+    
+    ]]></command>
+    <inputs>
+ <param name="input_file" type="data" format="bam" label="Input Subreads" help="Subreads (subreads.bam)"/>
+ <section name="input_filter_options" title="Input Filter Options" expanded="False">
+        <param argument="--minLength" type="integer" value="10" label="Minimum length" help="Minimum draft length before polishing. [10]" />
+        <param argument="--maxLength" type="integer" value="21000"  label="Maximum length" help="Maximum draft length before polishing. [21000]"/>
+        <param argument="--minPasses" type="integer" value="3" label="Minimum number of subreads" help="Minimum number of subreads required to generate CCS. [3]" />
+        <param argument="--minIdentity" type="float" value="0.82" label="top N passes" help="Minimum identity of a subread aligned to the draft consensus to use it for polishing. 0 disables this filter. [0.82]" />
+        <param argument="--minSnr" type="float" value="2.5" label="Minimum SNR of subreads" help="Minimum SNR of subreads to use for generating CCS [2.5]"/>
+        <param argument="--zmws" type="text" value="all" label="Generate CCS for the provided comma-separated hole number ranges only." help="Default=all" />
+       </section>
+       <section name="model_override_options" title="Model Override Options" expanded="False">
+        <param argument="--modelPath" type="data" format="txt" optional="True" label="Model File" help="a model file or directory containing model files." />
+               <param argument="--modelSpec" type="text" value="" optional="True"  label="Chemistry" help="Name of chemistry or model to use, overriding default selection."/>
+       </section>
+ <section name="processing_options" title="Processing Options" expanded="False">
+     <param argument="--byStrand" type="boolean" truevalue="--byStrand" falsevalue="" checked="false" label="Generate a consensus for each strand." />
+     <param argument="--noPolish" type="boolean" truevalue="--noPolish" falsevalue="" checked="false" label="Only output the initial template derived from the POA (faster, less accurate)." />
+     <param argument="--richQVs" type="boolean" truevalue="--richQVs" falsevalue="" checked="false" label="Emit dq, iq, and sq rich quality tracks." />
+        </section>
+ <section name="output_filter_options" title="Output Flter Options" expanded="False">
+        <param argument="--minPredictedAccuracy" type="float" value="0.9" label="Mininum predicted accurary" help="i.e 0.9"/>
+               <param argument="--minReadScore" type="float" value="0.75"  label="Minimum read score of input subreads" help="i.e 0.75"/>
+               <param argument="--maxDropFraction" type="float" value="0.34" label="Maximum fraction of subreads dropped by polishing (not input filters) before skipping ZMW." help="i.e 0.34" />
+ </section>
+ <section name="output_options" title="Output Files Options" expanded="False">
+            <param argument="--reportFile" type="boolean" truevalue="True" falsevalue="False" checked="False" label="write the results report."/>
+        </section>
+ <conditional name="if_log">
+ <param type="select" name="log_options" label="show log option">
+       <option value="yes">Yes</option>
+       <option value="no" selected="true">No</option> 
+         </param>
+ <when value="yes">
+                      <param argument="--logLevel" name="loglevel" type="select" label="Log Level">
+                        <option selected="True" value="WARN">WARN</option>
+                        <option value="TRACE">TRACE</option>
+                        <option value="DEBUG">DEBUG</option>
+                        <option value="INFO">INFO</option>
+ <option value="FATAL">FATAL</option>
+       </param>
+         </when>
+ <when value="no">
+ </when>
+        </conditional>
+    </inputs>
+    <outputs>
+     <data format="bam" name="bam_output" label="${tool.name} on ${on_string}: CCS reads in BAM format"/>
+     <data format="txt" name="report_output" label="${tool.name} on ${on_string}: CCS reads report"/>
+     <data format="txt" name="log_output" label="${tool.name} on ${on_string}: log" >
+   <filter>if_log['log_options'] == 'yes' </filter>
+     </data>
+    </outputs>
+    <tests>
+        <test><!-- test with default settings -->
+                 <param name="input_file" ftype="bam" value="input.subreads.bam"/>
+                 <output name="bam_output" ftype="bam" file="output.bam" />
+                 <output name="log_output" ftype="txt" file="log.txt" />
+                 <output name="report_output" ftype="txt" file="report.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+@PBCCS_OVERVIEW@
+
+For detailed information on psp-gen, click here_, or view the license_.
+
+.. _here: https://github.com/nlhepler/pbccs
+.. _license: https://github.com/nlhepler/pbccs/blob/master/LICENSE
+
+**Command Documentation**
+
+CCS will generate consensus sequence.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
+
b
diff -r 000000000000 -r f159c3da2ffc test-data/input.subreads.bam
b
Binary file test-data/input.subreads.bam has changed
b
diff -r 000000000000 -r f159c3da2ffc test-data/log.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/log.txt Fri Mar 18 02:10:52 2022 +0000
b
@@ -0,0 +1,14 @@
+ZMW Yield
+Success (without retry) -- CCS generated,2,100.00%
+Success (with retry)    -- CCS generated,0,0.00%
+Failed -- Below SNR threshold,0,0.00%
+Failed -- No usable subreads,0,0.00%
+Failed -- Insert size too long,0,0.00%
+Failed -- Insert size too small,0,0.00%
+Failed -- Not enough full passes,0,0.00%
+Failed -- Too many unusable subreads,0,0.00%
+Failed -- CCS did not converge,0,0.00%
+Failed -- CCS below minimum predicted accuracy,0,0.00%
+Failed -- Unknown error during processing,0,0.00%
+
+
b
diff -r 000000000000 -r f159c3da2ffc test-data/output.bam
b
Binary file test-data/output.bam has changed
b
diff -r 000000000000 -r f159c3da2ffc test-data/report.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/report.txt Fri Mar 18 02:10:52 2022 +0000
b
@@ -0,0 +1,14 @@
+ZMW Yield
+Success (without retry) -- CCS generated,2,100.00%
+Success (with retry)    -- CCS generated,0,0.00%
+Failed -- Below SNR threshold,0,0.00%
+Failed -- No usable subreads,0,0.00%
+Failed -- Insert size too long,0,0.00%
+Failed -- Insert size too small,0,0.00%
+Failed -- Not enough full passes,0,0.00%
+Failed -- Too many unusable subreads,0,0.00%
+Failed -- CCS did not converge,0,0.00%
+Failed -- CCS below minimum predicted accuracy,0,0.00%
+Failed -- Unknown error during processing,0,0.00%
+
+