Mercurial > repos > iuc > ont_fast5_api_fast5_subset

--- a/fast5_subset.xml	Mon Jun 08 15:57:10 2020 -0400
+++ b/fast5_subset.xml	Fri Jun 12 15:08:23 2020 -0400
@@ -1,56 +1,86 @@
 <?xml version="1.0"?>
-<tool id="ont_fast5_api_fast5_subset" name="Fast5 subset" version="@TOOL_VERSION@+galaxy0" profile="18.01">
-    <description>of multi read file</description>
+<tool id="ont_fast5_api_fast5_subset" name="@TOOL_NAME@ Subset" version="@TOOL_VERSION@+galaxy1" profile="18.01">
+    <description>of multi read file(s)</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
-    <version_command></version_command>
+    <!-- no specific version command for subcommand fast5_subset available -->
+    <version_command><![CDATA[compress_fast5 -v]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
 ## initialize
-mkdir data &&
-#for $num, $current in enumerate($input):
-    ln -s '$current' './data/batch${num}.fast5' &&
-#end for
+mkdir -p './data' &&
+tar -xf '$input' -C './data' &&

 ## run
 fast5_subset
 ## required
---input ./data
--s ./results
+--input './data'
+@SAVEPATH@
 --read_id_list '$read_id_list'
 ## optional
---batch_size $batch_size
--t \${GALAXY_SLOTS:-4}
+@COMPRESSION@
+@BATCHSIZE@
+@THREADS@
+
+## create tarball
+@TARBALL@
     ]]></command>
     <inputs>
-        <param argument="--input" type="data" format="fast5" multiple="true"
-            label="Select multi read input file(s)"/>
-        <param argument="--read_id_list" type="data" format="tabular"
-            label="Select file with read_ids" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/>
-        <param argument="--batch_size" type="integer" value="4000" min="1"
-            label="Set batch size" help="Number of single reads to include in each multi read file"/>
+        <expand macro="input" argument="--input"/>
+        <param argument="--read_id_list" type="data" format="tabular" label="Select file with read IDs" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/>
+        <expand macro="batch_size"/>
+        <expand macro="compression">
+            <option value="none" selected="true">None</option>
+            <option value="vbz">VBZ</option>
+        </expand>
     </inputs>
     <outputs>
-        <data name="out_results" format="fast5">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fast5" format="fast5" directory="results" assign_primary_output="true" visible="true"/>
-        </data>
+        <expand macro="output"/>
     </outputs>
     <tests>
+        <!-- #1 default -->
         <test expect_num_outputs="1">
-            <param name="input" value="batch.fast5"/>
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="30720"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #2 -->
+        <test expect_num_outputs="1">
+            <param name="input" value="multi.fast5.tar"/>
             <param name="read_id_list" value="list.txt"/>
             <param name="batch_size" value="2"/>
+            <param name="compression" value="gzip"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="51200"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- #3 -->
+        <test expect_num_outputs="1">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="compression" value="vbz"/>
             <output name="out_results">
                 <assert_contents>
-                    <has_size value="23304"/>
+                    <has_size value="40960"/>
                 </assert_contents>
-                <!-- batch0 is represented by out_results -->
-                <discovered_dataset designation="batch1" ftype="fast5">
-                    <assert_contents>
-                        <has_size value="17328"/>
-                    </assert_contents>
-                </discovered_dataset>
+            </output>
+        </test>
+        <!-- #4 -->
+        <test expect_num_outputs="1">
+            <param name="input" value="multi.fast5.tar"/>
+            <param name="read_id_list" value="list.txt"/>
+            <param name="compression" value="vbz_legacy_v0"/>
+            <output name="out_results">
+                <assert_contents>
+                    <has_size value="40960"/>
+                </assert_contents>
             </output>
         </test>
     </tests>
@@ -61,15 +91,15 @@

 @WID@

-fast5_subset extracts reads from multi_read_fast5_file(s) based on a list of read IDs.
+*fast5_subset* extracts reads from multi read FAST5 file(s) based on a list of read IDs.

 **Input**

-A multi read file in FAST5 format and a list of read IDs that should be extracted.
+Multi read file(s) in FAST5 format, that are stored in a flat TAR, and a list of read IDs that should be extracted.

 **Output**

-A multi read file in FAST5 format containing a subset of the input file.
+Multi read file(s) in FAST5 format containing a subset of the input file(s). The rseults are are stored in a flat TAR.

 .. class:: infomark

@@ -78,4 +108,4 @@
 @REFERENCES@
     ]]></help>
     <expand macro="citations"/>
-</tool>
+</tool>
\ No newline at end of file
--- a/macros.xml	Mon Jun 08 15:57:10 2020 -0400
+++ b/macros.xml	Fri Jun 12 15:08:23 2020 -0400
@@ -1,31 +1,83 @@
 <?xml version="1.0"?>
 <macros>
     <token name="@TOOL_VERSION@">3.1.3</token>
+    <token name="@TOOL_NAME@">ont_fast5_api:</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">ont-fast5-api</requirement>
+            <requirement type="package" version="1.10.5">hdf5</requirement>
         </requirements>
     </xml>
-	<xml name="citations">
-        <citations>
-            <citation type="bibtex">@online{ont_fast5_api,
-              author = {Oxford Nanopore Technologies },
-              title = {ont_fast5_api},
-              year = 2020,
-              url = {https://github.com/nanoporetech/ont_fast5_api},
-              urldate = {2020-06-01}
-		    }</citation>
-        </citations>
+
+    <!-- command -->
+    <token name="@BATCHSIZE@"><![CDATA[
+--batch_size $batch_size
+    ]]></token>
+    <token name="@COMPRESSION@"><![CDATA[
+#if $compression != 'none'
+    --compression '$compression'
+#end if
+    ]]></token>
+    <token name="@INITIALIZE@"><![CDATA[
+mkdir -p './data' &&
+tar -xf '$input_path' -C './data' &&
+    ]]></token>
+    <token name="@INPUTPATH@"><![CDATA[
+--input_path './data'
+    ]]></token>
+    <token name="@SAVEPATH@"><![CDATA[
+--save_path './results'
+    ]]></token>
+    <token name="@TARBALL@"><![CDATA[
+&& find './results' -type f -name '*.fast5' | tar --transform 's/.*\///g' -cvf './results.fast5.tar' --files-from=/dev/stdin
+    ]]></token>
+    <token name="@THREADS@"><![CDATA[
+--threads \${GALAXY_SLOTS:-4}
+    ]]></token>
+
+    <!-- input -->
+    <xml name="input" token_argument="--input_path" token_label="multi">
+        <param argument="@ARGUMENT@" type="data" format="fast5.tar" label="Select @LABEL@ read input file"/>
     </xml>
-    <token name="@WID@"><![CDATA[
-ont_fast5_api is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format.
+    <xml name="batch_size">
+        <param argument="--batch_size" type="integer" value="4000" min="1" label="Set batch size" help="Number of single reads to include in each multi read file"/>
+    </xml>
+    <xml name="compression">
+        <param argument="compression" type="select" label="Select output compression type">
+            <yield/>
+            <option value="vbz_legacy_v0">VBZ legacy v0</option>
+            <option value="gzip">GZIP</option>
+        </param>
+    </xml>

-- Concrete implementation of the FAST5 file schema using the generic h5py library
-- Plain-english-named methods to interact with and reflect the FAST5 file schema
-- Tools to convert between multi_read and single_read formats
-- Tools to compress/decompress raw data in files
+    <!-- output -->
+    <xml name="output">
+        <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results"/>
+    </xml>
+
+   <!-- help -->
+    <token name="@WID@"><![CDATA[
+*ont_fast5_api* is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format.
+
+- concrete implementation of the FAST5 file schema using the generic h5py library
+- plain-english-named methods to interact with and reflect the FAST5 file schema
+- tools to convert between multi_read and single_read formats
+- tools to compress/decompress raw data in files
     ]]></token>
     <token name="@REFERENCES@"><![CDATA[
 More information are available on `github <https://github.com/nanoporetech/ont_fast5_api>`_.
     ]]></token>
-</macros>
\ No newline at end of file
+
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">
+                @online{ont_fast5_api,
+                author = {Oxford Nanopore Technologies },
+                title = {ont_fast5_api},
+                year = 2020,
+                url = {https://github.com/nanoporetech/ont_fast5_api},
+                urldate = {2020-06-01}
+            }</citation>
+        </citations>
+    </xml>
+</macros>
Binary file test-data/batch.fast5 has changed
Binary file test-data/multi.fast5.tar has changed
Binary file test-data/single.fast5.tar has changed