changeset 0:0fbeff566070 draft default tip

"planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/ont_fast5_api commit 9fb27995131a4106053eb85d687857d2cbd2a5c6"
author artbio
date Sat, 22 May 2021 15:11:43 +0000
parents
children
files compress_fast5.xml fast5_subset.xml macros.xml multi_to_single_fast5.xml single_to_multi_fast5.xml test-data/filename_mapping_1.txt test-data/filename_mapping_2.txt test-data/filename_mapping_4.txt test-data/filename_mapping_5.txt test-data/filename_mapping_7.txt test-data/filename_mapping_8.txt test-data/filename_mapping_9.txt test-data/list.txt test-data/multi.fast5.tar test-data/single.fast5.tar
diffstat 15 files changed, 499 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/compress_fast5.xml	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,99 @@
+<?xml version="1.0"?>
+<tool id="ont_fast5_api_compress_fast5" name="@TOOL_NAME@ Compress" version="@TOOL_VERSION@+galaxy0" profile="18.01">
+    <description>multi read file(s)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command><![CDATA[compress_fast5 -v]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+@INITIALIZE@
+
+## run
+compress_fast5
+--recursive
+## required
+@INPUTPATH@
+@SAVEPATH@
+--compression '$compression'
+## optional
+@THREADS@
+
+## create tarball
+@TARBALL@
+    ]]></command>
+    <inputs>
+        <expand macro="input"/>
+        <param argument="compression" type="select" label="Select output compression type">
+            <option value="gzip" selected="true">GZIP</option>
+            <option value="vbz_legacy_v0">VBZ legacy v0</option>
+            <option value="vbz">VBZ</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results">
+            <change_format>
+                <when input="compression" value="gzip" format="fast5.tar.gz"/>
+                <when input="compression" value="vbz_legacy_v0" format="fast5.tar.bz2"/>
+                <when input="compression" value="vbz" format="fast5.tar.bz2"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="1">
+            <param name="input_path" value="multi.fast5.tar"/>
+            <param name="compression" value="vbz"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="1">
+            <param name="input_path" value="multi.fast5.tar"/>
+            <param name="compression" value="vbz_legacy_v0"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 -->
+        <test expect_num_outputs="1">
+            <param name="input_path" value="multi.fast5.tar"/>
+            <param name="compression" value="gzip"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+*compress_fast5* copies and converts raw data between vbz and gzip compression formats.
+
+**Input**
+
+Multi read file(s) in FAST5 format that are stored in a flat TAR.
+
+**Output**
+
+Multi read FAST5 file(s) containing compressed reads that are stored in a flat TAR. Further detail of HDF5 data management strategies can be found `here <https://support.hdfgroup.org/HDF5/doc/Advanced/FileSpaceManagement/FileSpaceManagement.pdf>`_.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fast5_subset.xml	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,112 @@
+<?xml version="1.0"?>
+<tool id="ont_fast5_api_fast5_subset" name="@TOOL_NAME@ Subset" version="@TOOL_VERSION@+galaxy2" profile="18.01">
+    <description>of multi read file(s)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <!-- no specific version command for subcommand fast5_subset available -->
+    <version_command><![CDATA[compress_fast5 -v]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+mkdir -p './data' &&
+tar -xf '$input' -C './data' &&
+
+## run
+fast5_subset
+--recursive
+## required
+--input './data'
+@SAVEPATH@
+--read_id_list '$read_id_list'
+## optional
+@COMPRESSION@
+@BATCHSIZE@
+@THREADS@
+
+## create tarball
+@TARBALL@
+    ]]></command>
+    <inputs>
+        <expand macro="input" argument="--input"/>
+        <param argument="--read_id_list" type="data" format="txt,tabular" label="Select file with read IDs" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/>
+        <expand macro="batch_size"/>
+        <expand macro="compression"/>
+    </inputs>
+    <outputs>
+        <expand macro="output"/>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="2">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="30720"/>
+                </assert_contents>
+            </output>
+            <output name="summary" ftype="txt" value="filename_mapping_1.txt" compare="sim_size" delta="1" />
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="2">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="batch_size" value="2"/>
+            <param name="compression" value="gzip"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="51200"/>
+                </assert_contents>
+            </output>
+            <output name="summary" ftype="txt" value="filename_mapping_2.txt" compare="sim_size" delta="1" />
+        </test>
+        <!-- #3 -->
+        <test expect_num_outputs="2">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="compression" value="vbz"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #4 -->
+        <test expect_num_outputs="2">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="compression" value="vbz_legacy_v0"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+            <output name="summary" ftype="txt" value="filename_mapping_4.txt" compare="sim_size" delta="1" />
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+*fast5_subset* extracts reads from multi read FAST5 file(s) based on a list of read IDs.
+
+**Input**
+
+Multi read file(s) in FAST5 format, that are stored in a flat TAR, and a list of read IDs that should be extracted.
+
+**Output**
+
+Multi read file(s) in FAST5 format containing a subset of the input file(s). The rseults are are stored in a flat TAR.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,91 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">3.1.3</token>
+    <token name="@TOOL_NAME@">ont_fast5_api:</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">ont-fast5-api</requirement>
+            <requirement type="package" version="1.10.5">hdf5</requirement>
+        </requirements>
+    </xml>
+
+    <!-- command -->
+    <token name="@BATCHSIZE@"><![CDATA[
+--batch_size $batch_size
+    ]]></token>
+    <token name="@COMPRESSION@"><![CDATA[
+#if $compression != 'none'
+    --compression '$compression'
+#end if
+    ]]></token>
+    <token name="@INITIALIZE@"><![CDATA[
+mkdir -p './data' &&
+tar -xf '$input_path' -C './data' &&
+    ]]></token>
+    <token name="@INPUTPATH@"><![CDATA[
+--input_path './data'
+    ]]></token>
+    <token name="@SAVEPATH@"><![CDATA[
+--save_path './results'
+    ]]></token>
+    <token name="@TARBALL@"><![CDATA[
+&& find './results' -type f -name '*.fast5' | tar --transform 's/.*\///g' -cvf './results.fast5.tar' --files-from=/dev/stdin
+    ]]></token>
+    <token name="@THREADS@"><![CDATA[
+--threads \${GALAXY_SLOTS:-4}
+    ]]></token>
+
+    <!-- input -->
+    <xml name="input" token_argument="--input_path" token_label="multi">
+        <param argument="@ARGUMENT@" type="data" format="fast5.tar" label="Select @LABEL@ read input file"/>
+    </xml>
+    <xml name="batch_size">
+        <param argument="--batch_size" type="integer" value="4000" min="1" label="Set batch size" help="Number of single reads to include in each multi read file"/>
+    </xml>
+    <xml name="compression">
+        <param argument="compression" type="select" label="Select output compression type">
+            <option value="none" selected="true">None</option>
+            <option value="gzip">GZIP</option>
+            <option value="vbz_legacy_v0">VBZ legacy v0</option>
+            <option value="vbz">VBZ</option>
+        </param>
+    </xml>
+
+    <!-- output -->
+    <xml name="output">
+        <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results">
+            <change_format>
+                <when input="compression" value="gzip" format="fast5.tar.gz"/>
+                <when input="compression" value="vbz_legacy_v0" format="fast5.tar.bz2"/>
+                <when input="compression" value="vbz" format="fast5.tar.bz2"/>
+            </change_format>
+        </data>
+        <data name="summary" format="txt" from_work_dir="./results/*.txt" label="filename_mapping.txt" />
+    </xml>
+
+   <!-- help -->
+    <token name="@WID@"><![CDATA[
+*ont_fast5_api* is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format.
+
+- concrete implementation of the FAST5 file schema using the generic h5py library
+- plain-english-named methods to interact with and reflect the FAST5 file schema
+- tools to convert between multi_read and single_read formats
+- tools to compress/decompress raw data in files
+    ]]></token>
+    <token name="@REFERENCES@"><![CDATA[
+More information are available on `github <https://github.com/nanoporetech/ont_fast5_api>`_.
+    ]]></token>
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @online{ont_fast5_api,
+                author = {Oxford Nanopore Technologies },
+                title = {ont_fast5_api},
+                year = 2020,
+                url = {https://github.com/nanoporetech/ont_fast5_api},
+                urldate = {2020-06-01}
+            }</citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multi_to_single_fast5.xml	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<tool id="ont_fast5_api_multi_to_single_fast5" name="@TOOL_NAME@ Multi to single" version="@TOOL_VERSION@+galaxy0" profile="18.01">
+    <description>read file(s)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command><![CDATA[multi_to_single_fast5 -v]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+@INITIALIZE@
+
+## run
+multi_to_single_fast5
+--recursive
+## required
+@INPUTPATH@
+@SAVEPATH@
+## optional
+@THREADS@
+
+## create tarball
+@TARBALL@
+    ]]></command>
+    <inputs>
+        <expand macro="input"/>
+    </inputs>
+    <outputs>
+        <expand macro="output"/>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="2">
+            <param name="input_path" value="multi.fast5.tar"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="71680"/>
+                </assert_contents>
+            </output>
+            <output name="summary" format="txt" value="filename_mapping_8.txt" />
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+*multi_to_single_fast5* converts multi read FAST5 file(s) into single read FAST5 files.
+
+**Input**
+
+Multi read file(s) in FAST5 format that are stored in a flat TAR.
+
+**Output**
+
+Single read FAST5 files that are stored in a flat TAR.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/single_to_multi_fast5.xml	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,104 @@
+<?xml version="1.0"?>
+<tool id="ont_fast5_api_single_to_multi_fast5" name="@TOOL_NAME@ Single to multi" version="@TOOL_VERSION@+galaxy0" profile="18.01">
+    <description>read file(s)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command><![CDATA[single_to_multi -v]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+@INITIALIZE@
+
+## run
+single_to_multi_fast5
+--recursive
+## required
+@INPUTPATH@
+@SAVEPATH@
+## optional
+@COMPRESSION@
+@BATCHSIZE@
+@THREADS@
+
+## create tarball
+@TARBALL@
+    ]]></command>
+    <inputs>
+        <expand macro="input" label="single"/>
+        <expand macro="batch_size"/>
+        <expand macro="compression"/>
+    </inputs>
+    <outputs>
+        <expand macro="output"/>
+    </outputs>
+    <tests>
+        <!-- #1 default -->
+        <test expect_num_outputs="2">
+            <param name="input_path" value="single.fast5.tar"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+            <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" />
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="2">
+            <param name="input_path" value="single.fast5.tar"/>
+            <param name="batch_size" value="3"/>
+            <param name="compression" value="vbz"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="51200"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 -->
+        <test expect_num_outputs="2">
+            <param name="input_path" value="single.fast5.tar"/>
+            <param name="compression" value="vbz_legacy_v0"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+            <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" />
+        </test>
+        <!-- #4 -->
+        <test expect_num_outputs="2">
+            <param name="input_path" value="single.fast5.tar"/>
+            <param name="compression" value="gzip"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
+            </output>
+            <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" />
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+*single_to_multi_fast5* converts single read FAST5 files into multi read FAST5 file(s).
+
+**Input**
+
+Single read files in FAST5 format that are stored in a flat TAR.
+
+**Output**
+
+Multi read file(s) in FAST5 format that are stored in a flat TAR.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_1.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe85b517-62ee-4a33-8767-41cab5d5ab39	batch0.fast5
+fe8a3026-d1f4-46b3-8daa-e610f27acde1	batch0.fast5
+fe849dd3-63bc-4044-8910-14e1686273bb	batch0.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_2.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe849dd3-63bc-4044-8910-14e1686273bb	batch0.fast5
+fe8a3026-d1f4-46b3-8daa-e610f27acde1	batch0.fast5
+fe85b517-62ee-4a33-8767-41cab5d5ab39	batch1.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_4.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe85b517-62ee-4a33-8767-41cab5d5ab39	batch0.fast5
+fe8a3026-d1f4-46b3-8daa-e610f27acde1	batch0.fast5
+fe849dd3-63bc-4044-8910-14e1686273bb	batch0.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_5.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe849dd3-63bc-4044-8910-14e1686273bb	batch0.fast5
+fe85b517-62ee-4a33-8767-41cab5d5ab39	batch0.fast5
+fe8a3026-d1f4-46b3-8daa-e610f27acde1	batch1.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_7.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe849dd3-63bc-4044-8910-14e1686273bb	batch0.fast5
+fe85b517-62ee-4a33-8767-41cab5d5ab39	batch0.fast5
+fe8a3026-d1f4-46b3-8daa-e610f27acde1	batch0.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_8.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,4 @@
+./data/multi.fast5	fe849dd3-63bc-4044-8910-14e1686273bb.fast5
+./data/multi.fast5	fe85b517-62ee-4a33-8767-41cab5d5ab39.fast5
+./data/multi.fast5	fe8a3026-d1f4-46b3-8daa-e610f27acde1.fast5
+./data/multi.fast5	fe9374ee-b86a-4ca4-81dc-ac06e3297728.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filename_mapping_9.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,4 @@
+single1.fast5	./results/batch_0.fast5
+single2.fast5	./results/batch_0.fast5
+single3.fast5	./results/batch_0.fast5
+single4.fast5	./results/batch_0.fast5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list.txt	Sat May 22 15:11:43 2021 +0000
@@ -0,0 +1,3 @@
+fe849dd3-63bc-4044-8910-14e1686273bb
+fe85b517-62ee-4a33-8767-41cab5d5ab39
+fe8a3026-d1f4-46b3-8daa-e610f27acde1
Binary file test-data/multi.fast5.tar has changed
Binary file test-data/single.fast5.tar has changed