Mercurial > repos > artbio > ont_fast5_api
changeset 0:0fbeff566070 draft default tip
"planemo upload for repository https://github.com/artbio/tools-artbio/tree/master/tools/ont_fast5_api commit 9fb27995131a4106053eb85d687857d2cbd2a5c6"
author | artbio |
---|---|
date | Sat, 22 May 2021 15:11:43 +0000 |
parents | |
children | |
files | compress_fast5.xml fast5_subset.xml macros.xml multi_to_single_fast5.xml single_to_multi_fast5.xml test-data/filename_mapping_1.txt test-data/filename_mapping_2.txt test-data/filename_mapping_4.txt test-data/filename_mapping_5.txt test-data/filename_mapping_7.txt test-data/filename_mapping_8.txt test-data/filename_mapping_9.txt test-data/list.txt test-data/multi.fast5.tar test-data/single.fast5.tar |
diffstat | 15 files changed, 499 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/compress_fast5.xml Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,99 @@ +<?xml version="1.0"?> +<tool id="ont_fast5_api_compress_fast5" name="@TOOL_NAME@ Compress" version="@TOOL_VERSION@+galaxy0" profile="18.01"> + <description>multi read file(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command><![CDATA[compress_fast5 -v]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +## initialize +@INITIALIZE@ + +## run +compress_fast5 +--recursive +## required +@INPUTPATH@ +@SAVEPATH@ +--compression '$compression' +## optional +@THREADS@ + +## create tarball +@TARBALL@ + ]]></command> + <inputs> + <expand macro="input"/> + <param argument="compression" type="select" label="Select output compression type"> + <option value="gzip" selected="true">GZIP</option> + <option value="vbz_legacy_v0">VBZ legacy v0</option> + <option value="vbz">VBZ</option> + </param> + </inputs> + <outputs> + <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results"> + <change_format> + <when input="compression" value="gzip" format="fast5.tar.gz"/> + <when input="compression" value="vbz_legacy_v0" format="fast5.tar.bz2"/> + <when input="compression" value="vbz" format="fast5.tar.bz2"/> + </change_format> + </data> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="1"> + <param name="input_path" value="multi.fast5.tar"/> + <param name="compression" value="vbz"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + </test> + <!-- #2 --> + <test expect_num_outputs="1"> + <param name="input_path" value="multi.fast5.tar"/> + <param name="compression" value="vbz_legacy_v0"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + </test> + <!-- #3 --> + <test expect_num_outputs="1"> + <param name="input_path" value="multi.fast5.tar"/> + <param name="compression" value="gzip"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +*compress_fast5* copies and converts raw data between vbz and gzip compression formats. + +**Input** + +Multi read file(s) in FAST5 format that are stored in a flat TAR. + +**Output** + +Multi read FAST5 file(s) containing compressed reads that are stored in a flat TAR. Further detail of HDF5 data management strategies can be found `here <https://support.hdfgroup.org/HDF5/doc/Advanced/FileSpaceManagement/FileSpaceManagement.pdf>`_. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fast5_subset.xml Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,112 @@ +<?xml version="1.0"?> +<tool id="ont_fast5_api_fast5_subset" name="@TOOL_NAME@ Subset" version="@TOOL_VERSION@+galaxy2" profile="18.01"> + <description>of multi read file(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <!-- no specific version command for subcommand fast5_subset available --> + <version_command><![CDATA[compress_fast5 -v]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +## initialize +mkdir -p './data' && +tar -xf '$input' -C './data' && + +## run +fast5_subset +--recursive +## required +--input './data' +@SAVEPATH@ +--read_id_list '$read_id_list' +## optional +@COMPRESSION@ +@BATCHSIZE@ +@THREADS@ + +## create tarball +@TARBALL@ + ]]></command> + <inputs> + <expand macro="input" argument="--input"/> + <param argument="--read_id_list" type="data" format="txt,tabular" label="Select file with read IDs" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/> + <expand macro="batch_size"/> + <expand macro="compression"/> + </inputs> + <outputs> + <expand macro="output"/> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="2"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <output name="out_results"> + <assert_contents> + <has_size value="30720"/> + </assert_contents> + </output> + <output name="summary" ftype="txt" value="filename_mapping_1.txt" compare="sim_size" delta="1" /> + </test> + <!-- #2 --> + <test expect_num_outputs="2"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <param name="batch_size" value="2"/> + <param name="compression" value="gzip"/> + <output name="out_results"> + <assert_contents> + <has_size value="51200"/> + </assert_contents> + </output> + <output name="summary" ftype="txt" value="filename_mapping_2.txt" compare="sim_size" delta="1" /> + </test> + <!-- #3 --> + <test expect_num_outputs="2"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <param name="compression" value="vbz"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + </test> + <!-- #4 --> + <test expect_num_outputs="2"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <param name="compression" value="vbz_legacy_v0"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + <output name="summary" ftype="txt" value="filename_mapping_4.txt" compare="sim_size" delta="1" /> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +*fast5_subset* extracts reads from multi read FAST5 file(s) based on a list of read IDs. + +**Input** + +Multi read file(s) in FAST5 format, that are stored in a flat TAR, and a list of read IDs that should be extracted. + +**Output** + +Multi read file(s) in FAST5 format containing a subset of the input file(s). The rseults are are stored in a flat TAR. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,91 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">3.1.3</token> + <token name="@TOOL_NAME@">ont_fast5_api:</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ont-fast5-api</requirement> + <requirement type="package" version="1.10.5">hdf5</requirement> + </requirements> + </xml> + + <!-- command --> + <token name="@BATCHSIZE@"><![CDATA[ +--batch_size $batch_size + ]]></token> + <token name="@COMPRESSION@"><![CDATA[ +#if $compression != 'none' + --compression '$compression' +#end if + ]]></token> + <token name="@INITIALIZE@"><![CDATA[ +mkdir -p './data' && +tar -xf '$input_path' -C './data' && + ]]></token> + <token name="@INPUTPATH@"><![CDATA[ +--input_path './data' + ]]></token> + <token name="@SAVEPATH@"><![CDATA[ +--save_path './results' + ]]></token> + <token name="@TARBALL@"><![CDATA[ +&& find './results' -type f -name '*.fast5' | tar --transform 's/.*\///g' -cvf './results.fast5.tar' --files-from=/dev/stdin + ]]></token> + <token name="@THREADS@"><![CDATA[ +--threads \${GALAXY_SLOTS:-4} + ]]></token> + + <!-- input --> + <xml name="input" token_argument="--input_path" token_label="multi"> + <param argument="@ARGUMENT@" type="data" format="fast5.tar" label="Select @LABEL@ read input file"/> + </xml> + <xml name="batch_size"> + <param argument="--batch_size" type="integer" value="4000" min="1" label="Set batch size" help="Number of single reads to include in each multi read file"/> + </xml> + <xml name="compression"> + <param argument="compression" type="select" label="Select output compression type"> + <option value="none" selected="true">None</option> + <option value="gzip">GZIP</option> + <option value="vbz_legacy_v0">VBZ legacy v0</option> + <option value="vbz">VBZ</option> + </param> + </xml> + + <!-- output --> + <xml name="output"> + <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results"> + <change_format> + <when input="compression" value="gzip" format="fast5.tar.gz"/> + <when input="compression" value="vbz_legacy_v0" format="fast5.tar.bz2"/> + <when input="compression" value="vbz" format="fast5.tar.bz2"/> + </change_format> + </data> + <data name="summary" format="txt" from_work_dir="./results/*.txt" label="filename_mapping.txt" /> + </xml> + + <!-- help --> + <token name="@WID@"><![CDATA[ +*ont_fast5_api* is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format. + +- concrete implementation of the FAST5 file schema using the generic h5py library +- plain-english-named methods to interact with and reflect the FAST5 file schema +- tools to convert between multi_read and single_read formats +- tools to compress/decompress raw data in files + ]]></token> + <token name="@REFERENCES@"><![CDATA[ +More information are available on `github <https://github.com/nanoporetech/ont_fast5_api>`_. + ]]></token> + + <xml name="citations"> + <citations> + <citation type="bibtex"> + @online{ont_fast5_api, + author = {Oxford Nanopore Technologies }, + title = {ont_fast5_api}, + year = 2020, + url = {https://github.com/nanoporetech/ont_fast5_api}, + urldate = {2020-06-01} + }</citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multi_to_single_fast5.xml Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,67 @@ +<?xml version="1.0"?> +<tool id="ont_fast5_api_multi_to_single_fast5" name="@TOOL_NAME@ Multi to single" version="@TOOL_VERSION@+galaxy0" profile="18.01"> + <description>read file(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command><![CDATA[multi_to_single_fast5 -v]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +## initialize +@INITIALIZE@ + +## run +multi_to_single_fast5 +--recursive +## required +@INPUTPATH@ +@SAVEPATH@ +## optional +@THREADS@ + +## create tarball +@TARBALL@ + ]]></command> + <inputs> + <expand macro="input"/> + </inputs> + <outputs> + <expand macro="output"/> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="2"> + <param name="input_path" value="multi.fast5.tar"/> + <output name="out_results"> + <assert_contents> + <has_size value="71680"/> + </assert_contents> + </output> + <output name="summary" format="txt" value="filename_mapping_8.txt" /> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +*multi_to_single_fast5* converts multi read FAST5 file(s) into single read FAST5 files. + +**Input** + +Multi read file(s) in FAST5 format that are stored in a flat TAR. + +**Output** + +Single read FAST5 files that are stored in a flat TAR. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/single_to_multi_fast5.xml Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,104 @@ +<?xml version="1.0"?> +<tool id="ont_fast5_api_single_to_multi_fast5" name="@TOOL_NAME@ Single to multi" version="@TOOL_VERSION@+galaxy0" profile="18.01"> + <description>read file(s)</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <version_command><![CDATA[single_to_multi -v]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +## initialize +@INITIALIZE@ + +## run +single_to_multi_fast5 +--recursive +## required +@INPUTPATH@ +@SAVEPATH@ +## optional +@COMPRESSION@ +@BATCHSIZE@ +@THREADS@ + +## create tarball +@TARBALL@ + ]]></command> + <inputs> + <expand macro="input" label="single"/> + <expand macro="batch_size"/> + <expand macro="compression"/> + </inputs> + <outputs> + <expand macro="output"/> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="2"> + <param name="input_path" value="single.fast5.tar"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" /> + </test> + <!-- #2 --> + <test expect_num_outputs="2"> + <param name="input_path" value="single.fast5.tar"/> + <param name="batch_size" value="3"/> + <param name="compression" value="vbz"/> + <output name="out_results"> + <assert_contents> + <has_size value="51200"/> + </assert_contents> + </output> + </test> + <!-- #3 --> + <test expect_num_outputs="2"> + <param name="input_path" value="single.fast5.tar"/> + <param name="compression" value="vbz_legacy_v0"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" /> + </test> + <!-- #4 --> + <test expect_num_outputs="2"> + <param name="input_path" value="single.fast5.tar"/> + <param name="compression" value="gzip"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> + </output> + <output name="summary" format="txt" value="filename_mapping_9.txt" sort="true" /> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +*single_to_multi_fast5* converts single read FAST5 files into multi read FAST5 file(s). + +**Input** + +Single read files in FAST5 format that are stored in a flat TAR. + +**Output** + +Multi read file(s) in FAST5 format that are stored in a flat TAR. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_1.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,3 @@ +fe85b517-62ee-4a33-8767-41cab5d5ab39 batch0.fast5 +fe8a3026-d1f4-46b3-8daa-e610f27acde1 batch0.fast5 +fe849dd3-63bc-4044-8910-14e1686273bb batch0.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_2.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,3 @@ +fe849dd3-63bc-4044-8910-14e1686273bb batch0.fast5 +fe8a3026-d1f4-46b3-8daa-e610f27acde1 batch0.fast5 +fe85b517-62ee-4a33-8767-41cab5d5ab39 batch1.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_4.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,3 @@ +fe85b517-62ee-4a33-8767-41cab5d5ab39 batch0.fast5 +fe8a3026-d1f4-46b3-8daa-e610f27acde1 batch0.fast5 +fe849dd3-63bc-4044-8910-14e1686273bb batch0.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_5.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,3 @@ +fe849dd3-63bc-4044-8910-14e1686273bb batch0.fast5 +fe85b517-62ee-4a33-8767-41cab5d5ab39 batch0.fast5 +fe8a3026-d1f4-46b3-8daa-e610f27acde1 batch1.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_7.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,3 @@ +fe849dd3-63bc-4044-8910-14e1686273bb batch0.fast5 +fe85b517-62ee-4a33-8767-41cab5d5ab39 batch0.fast5 +fe8a3026-d1f4-46b3-8daa-e610f27acde1 batch0.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_8.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,4 @@ +./data/multi.fast5 fe849dd3-63bc-4044-8910-14e1686273bb.fast5 +./data/multi.fast5 fe85b517-62ee-4a33-8767-41cab5d5ab39.fast5 +./data/multi.fast5 fe8a3026-d1f4-46b3-8daa-e610f27acde1.fast5 +./data/multi.fast5 fe9374ee-b86a-4ca4-81dc-ac06e3297728.fast5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filename_mapping_9.txt Sat May 22 15:11:43 2021 +0000 @@ -0,0 +1,4 @@ +single1.fast5 ./results/batch_0.fast5 +single2.fast5 ./results/batch_0.fast5 +single3.fast5 ./results/batch_0.fast5 +single4.fast5 ./results/batch_0.fast5