Mercurial > repos > iuc > nanopolishcomp_eventaligncollapse

diff eventaligncollapse.xml @ 0:6847a625db4d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ commit e96954b0926211a1da3292c00aada6d366a2b25d"
author: iuc
date: Thu, 30 Apr 2020 05:56:29 -0400
children: ad011fc670d6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/eventaligncollapse.xml	Thu Apr 30 05:56:29 2020 -0400
@@ -0,0 +1,166 @@
+<?xml version="1.0"?>
+<tool id="nanopolishcomp_eventaligncollapse" name="Eventalign Collapse" version="@TOOL_VERSION@+@WRAPPER_VERSION@">
+    <description>by kmers rather than by event</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <stdio></stdio>
+    <version_command>NanopolishComp --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        ## initialize
+        ## requires a minimum of 3 threads
+        threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) &&
+
+        ## run
+        NanopolishComp
+            Eventalign_collapse
+            -i '$i'
+            -s
+            -r $r
+            -f 
+            #for $current in $f
+                $current 
+            #end for
+            -o 'results'
+            -p 'out'
+            -t \$threads
+            -v
+        ]]></command>
+    <inputs>
+        <param argument="-i" type="data" format="tabular" label="Select nanopolish eventalign file"/>
+        <param argument="-s" type="boolean" truevalue="-s" falsevalue="" label="Should raw samples be written?" help="You need to run nanopolish eventalign with --sample option to make use of this feature."/>
+        <param argument="-r" type="integer" value="0" min="0" label="Set maximum number of reads to parse" help="Use 0 to deactivate this option."/>
+        <param argument="-f" type="select" multiple="true" label="Select statistical fields to compute" help="You need to run nanopolish eventalign with --sample option to make use of this feature.">
+            <option value="mean" selected="true">Mean</option>
+            <option value="std">Std</option>
+            <option value="median" selected="true">Median</option>
+            <option value="mad">Mad</option>
+            <option value="num_signals" selected="true">Number of signals</option>
+        </param>
+        <param name="out" type="select" multiple="true" label="Select output file(s)" help="">
+            <option value="eventalign_collapse" selected="true">Eventalign Collapse</option>
+            <option value="index" selected="true">Index</option>
+            <option value="log">Log</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out_eventalign_collapse" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv" label="${tool.name} on ${on_string}: Eventalign Collapse">
+            <filter>'eventalign_collapse' in out</filter>
+        </data>
+        <data name="out_index" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv.idx" label="${tool.name} on ${on_string}: Index">
+            <filter>'index' in out</filter>
+        </data>
+        <data name="out_log" format="txt" from_work_dir="results/out_eventalign_collapse.log" label="${tool.name} on ${on_string}: log">
+            <filter>'log' in out</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="3">
+            <param name="i" value="sample.tsv"/>
+            <param name="out" value="eventalign_collapse,index,log"/>
+            <output name="out_eventalign_collapse">
+                <assert_contents>
+                    <has_n_lines n="236"/>
+                    <has_text_matching expression="ref_pos&#009;ref_kmer"/>
+                    <has_text_matching expression="22102&#009;GGAAA"/>
+                </assert_contents>
+            </output>
+            <output name="out_index">
+                <assert_contents>
+                    <has_n_lines n="60"/>
+                    <has_text_matching expression="ref_id&#009;ref_start"/>
+                    <has_text_matching expression="chr&#009;22102"/>
+                </assert_contents>
+            </output>
+            <output name="out_log">
+                <assert_contents>
+                    <has_n_lines n="13"/>
+                    <has_line line="General options:"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="2">
+            <param name="i" value="sample.tsv"/>
+            <param name="s" value="true"/>
+            <param name="r" value="10"/>
+            <param name="f" value="mean,std,median,mad,num_signals"/>
+            <param name="out" value="eventalign_collapse,index"/>
+            <output name="out_eventalign_collapse">
+                <assert_contents>
+                    <has_n_lines n="236"/>
+                    <has_text_matching expression="ref_pos&#009;ref_kmer"/>
+                    <has_text_matching expression="22102&#009;GGAAA"/>
+                </assert_contents>
+            </output>
+            <output name="out_index">
+                <assert_contents>
+                    <has_n_lines n="60"/>
+                    <has_text_matching expression="ref_id&#009;ref_start"/>
+                    <has_text_matching expression="chr&#009;22102"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+Eventalign_collapse collapses the raw file generated by nanopolish eventalign by kmers rather than by event.
+
+**Input**
+
+A nanopolish eventalign tabular output file. 
+
+**Output**
+
+Contrary to nanopolish eventalign output text file, in Eventalign_collapse the reads are separated by a hashtag headers containing the read_id and ref_id. This reduces the redundancy and makes it easier to find the start and end of a read.
+
+::
+
+    Example : #7ef1d7b9-5824-4382-b23b-78d82c07ebbd YHR055C.
+
+The main data file contains the following fields:
+
+- ref_pos: Reference sequence ID (contig).
+- ref_kmer: Sequence of the reference kmers.
+- -num_events: Number of events for this kmer before collapsing.
+- dwell_time: dwell time for this kmer in seconds
+- NNNNN_dwell_time: dwell time of events for this kmers with a model sequence "NNNNN" (events ignored by nanopolish HMM).
+- mismatch_dwell_time: dwell time of events for this kmers with a model sequence different from the reference kmer
+- start_idx: Only if nanopolish eventalign called with --signal_idx. Start coordinate on original raw signal in fast5 file
+- end_idx: Only if nanopolish eventalign called with --signal_idx. End coordinate on original raw signal in fast5 file
+- mean: Only if nanopolish eventalign called with --samples. Mean of the normalised signal values provided by Nanopolish eventalign
+- median: Only if nanopolish eventalign called with --samples. Median of the normalised signal values provided by Nanopolish eventalign
+- std: Only if nanopolish eventalign called with --samples. Standard deviation of the normalised signal values provided by Nanopolish eventalign
+- mad: Only if nanopolish eventalign called with --samples. Median absolute deviation of the normalised signal values provided by Nanopolish eventalign
+- num_signals: Only if nanopolish eventalign called with --samples. Number of raw signal points.
+- samples: Only if nanopolish eventalign called with --samples and Eventalign_collapse called with --write_samples. List of normalised signal intensity values for this kmer
+
+In addition Eventalign_collapse also generates an useful index file containing reads level information. It contains the following fields:
+
+- read_id: Name or index of the read
+- ref_id: Name of the reference sequence the read was aligned on (contig)
+- ref_start: Start coordinate of the alignment on the reference sequence
+- ref_end: End coordinate of the alignment on the reference sequence
+- dwell_time: Cumulative dwell time in seconds for the entire resquiggled sequence
+- kmers: Overall number of resquiggled kmers
+- NNNNN_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence was "NNNNN"
+- mismatching_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence diverged from the reference sequence
+- missing_kmers: Number of skipped/missing reference positions in nanopolish output
+- byte_offset: Number of characters before the start of the sequence in the main output file. This can be used in conjunction with file.seek() to directly access the start of a read. An example is provided in the Usage notebook.
+- byte_len: Length of characters after byte_offset to the end of the read, excluding the last newline. This can be used in conjunction with read() to read all the text chunk corresponding to the read.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
author	iuc
date	Thu, 30 Apr 2020 05:56:29 -0400
parents
children	ad011fc670d6