Mercurial > repos > iuc > ont_fast5_api_fast5_subset

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fast5_subset.xml	Mon Jun 08 15:57:10 2020 -0400
@@ -0,0 +1,81 @@
+<?xml version="1.0"?>
+<tool id="ont_fast5_api_fast5_subset" name="Fast5 subset" version="@TOOL_VERSION@+galaxy0" profile="18.01">
+    <description>of multi read file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <version_command></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+mkdir data &&
+#for $num, $current in enumerate($input):
+    ln -s '$current' './data/batch${num}.fast5' &&
+#end for
+
+## run
+fast5_subset
+## required
+--input ./data
+-s ./results
+--read_id_list '$read_id_list'
+## optional
+--batch_size $batch_size
+-t \${GALAXY_SLOTS:-4}
+    ]]></command>
+    <inputs>
+        <param argument="--input" type="data" format="fast5" multiple="true"
+            label="Select multi read input file(s)"/>
+        <param argument="--read_id_list" type="data" format="tabular"
+            label="Select file with read_ids" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/>
+        <param argument="--batch_size" type="integer" value="4000" min="1"
+            label="Set batch size" help="Number of single reads to include in each multi read file"/>
+    </inputs>
+    <outputs>
+        <data name="out_results" format="fast5">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fast5" format="fast5" directory="results" assign_primary_output="true" visible="true"/>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="batch.fast5"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="batch_size" value="2"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="23304"/>
+                </assert_contents>
+                <!-- batch0 is represented by out_results -->
+                <discovered_dataset designation="batch1" ftype="fast5">
+                    <assert_contents>
+                        <has_size value="17328"/>
+                    </assert_contents>
+                </discovered_dataset>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+fast5_subset extracts reads from multi_read_fast5_file(s) based on a list of read IDs.
+
+**Input**
+
+A multi read file in FAST5 format and a list of read IDs that should be extracted.
+
+**Output**
+
+A multi read file in FAST5 format containing a subset of the input file.
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 08 15:57:10 2020 -0400
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">3.1.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">ont-fast5-api</requirement>
+        </requirements>
+    </xml>
+	<xml name="citations">
+        <citations>
+            <citation type="bibtex">@online{ont_fast5_api,
+              author = {Oxford Nanopore Technologies },
+              title = {ont_fast5_api},
+              year = 2020,
+              url = {https://github.com/nanoporetech/ont_fast5_api},
+              urldate = {2020-06-01}
+		    }</citation>
+        </citations>
+    </xml>
+    <token name="@WID@"><![CDATA[
+ont_fast5_api is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format.
+
+- Concrete implementation of the FAST5 file schema using the generic h5py library
+- Plain-english-named methods to interact with and reflect the FAST5 file schema
+- Tools to convert between multi_read and single_read formats
+- Tools to compress/decompress raw data in files
+    ]]></token>
+    <token name="@REFERENCES@"><![CDATA[
+More information are available on `github <https://github.com/nanoporetech/ont_fast5_api>`_.
+    ]]></token>
+</macros>
\ No newline at end of file
Binary file test-data/batch.fast5 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/list.txt	Mon Jun 08 15:57:10 2020 -0400
@@ -0,0 +1,3 @@
+fe849dd3-63bc-4044-8910-14e1686273bb
+fe85b517-62ee-4a33-8767-41cab5d5ab39
+fe8a3026-d1f4-46b3-8daa-e610f27acde1