Repository 'kodoja'
hg clone https://toolshed.g2.bx.psu.edu/repos/abaizan/kodoja

Changeset 3:d4111d1de76f (2018-09-14)
Previous changeset 2:ee917702dbd8 (2018-09-10)
Commit message:
v0.0.8, expose kodoja_VRL.tsv output
modified:
README.rst
kodoja_search.xml
tool-data/kraken_databases.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
added:
test-data/read_table_PE_fastq.tabular
b
diff -r ee917702dbd8 -r d4111d1de76f README.rst
--- a/README.rst Mon Sep 10 09:16:13 2018 -0400
+++ b/README.rst Fri Sep 14 09:55:56 2018 -0400
b
@@ -54,17 +54,29 @@
     $ wget https://zenodo.org/record/1406071/files/kodojaDB_v1.0.tar.gz
     $ tar -zxvf kodojaDB_v1.0.tar.gz
 
-Then update your Galaxy configuration in ``tool-data/kraken_databases.loc``
-to add a line like this::
+Each installed version of Kodoja (or Kraken or Kaiju) will have its own
+``*.loc`` files, which Galaxy will merge into a single list. e.g.::
+
+    $ find /path/to/galaxy/tool-data -name kaiju_databases.loc
+    $ find /path/to/galaxy/tool-data -name kraken_databases.loc
+
+Edit a ``kraken_databases.loc`` file to add a line like this, where
+``(tab)`` represents inserting a tab character (NOT spaces)::
 
     kodojaDB_v1.0_kraken(tab)KodojaDB v1.0 (kraken), Sept 2018(tab)/mnt/shared/data/kodojaDB_v1.0/krakenDB
 
-And likewise update ``tool-data/kaiju_databases.loc`` with a line like::
+And likewise update ``kaiju_databases.loc`` with a line like::
 
     kodojaDB_v1.0_kaiju(tab)KodojaDB v1.0 (kaiju), Sept 2018(tab)/mnt/shared/data/kodojaDB_v1.0/kaijuDB
 
 At the time of writing, reloading the ``*.loc`` files required restarting
-the Galaxy server.
+the Galaxy server, or doing this explicitly via the "Data tables registry"
+available under Server Administration if logged into Galaxy as an administator.
+
+It is our personal preference to work with ``tool-data/kraken_databases.loc``
+and ``tool-data/kaiju_databases.loc``, but if these are being ignored, you
+*may* need to enable this by adding the XML data table entries from our file
+``tool_data_table_conf.xml.sample`` to ``config/tool_data_table_conf.xml``.
 
 
 History
@@ -80,6 +92,9 @@
 v0.0.7  - Minor update to call ``kodoja_search.py`` v0.0.6.
         - Update help text, including zeros in columns 6 and 7.
         - Support ``$GALAXY_SLOTS``, defaulting to using four threads.
+v0.0.8  - Minor update to call ``kodoja_search.py`` v0.0.8.
+        - Option to capture the ``kodoja_VRL.tsv`` read table.
+        - Accept ``kraken_databases.loc`` entry style for old Kraken wrapper.
 ======= ======================================================================
 
 
b
diff -r ee917702dbd8 -r d4111d1de76f kodoja_search.xml
--- a/kodoja_search.xml Mon Sep 10 09:16:13 2018 -0400
+++ b/kodoja_search.xml Fri Sep 14 09:55:56 2018 -0400
[
@@ -1,13 +1,16 @@
-<tool id="kodoja_search" name="Kodoja database search" version="0.0.7">
+<tool id="kodoja_search" name="Kodoja database search" version="0.0.8">
     <description>identify viruses from plant RNA sequencing data</description>
     <requirements>
-        <requirement type="package" version="0.0.7">kodoja</requirement>
+        <requirement type="package" version="0.0.8">kodoja</requirement>
     </requirements>
     <version_command>kodoja_search.py --version</version_command>
     <command detect_errors="exit_code"><![CDATA[
+## This if statement is for backward compatibility as early versions of the Kraken
+## wrapper assumed the UI facing field name was also part of the directory path
+if [ -d '${kraken_db.fields.path}/${kraken_db.fields.name}' ]; then export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}/${kraken_db.fields.name}'; else export KRAKEN_DEFAULT_DB='${kraken_db.fields.path}'; fi &&
 kodoja_search.py
 -t="\${GALAXY_SLOTS:-4}"
---kraken_db '${kraken_db.fields.path}'
+--kraken_db "\$KRAKEN_DEFAULT_DB"
 --kaiju_db '${kaiju_db.fields.path}'
 
 #if $single_paired.single_paired_selector == 'yes'
@@ -40,6 +43,10 @@
 -o .
 &&
 mv ./virus_table.txt '$combined_table'
+#if $capture_reads_table:
+&&
+mv ./kodoja_VRL.txt '$reads_table'
+#end if
 ]]></command>
     <inputs>
         <param label="Select a Kraken database" name="kraken_db" type="select">
@@ -68,9 +75,13 @@
                 <param format="fasta,fastq" label="Input sequences" name="input_sequences" type="data" help="FASTA or FASTQ datasets"/>
             </when>
         </conditional>
+        <param name="capture_reads_table" type="boolean" value="false" label="Capture read assignment table" help="This table can be used to filter out reads matched to (individual) viruses"/>
     </inputs>
     <outputs>
         <data name="combined_table" format="tabular" label="Kodoja species report for ${on_string}" />
+        <data name="reads_table" format="tabular" label="Kodoja read assignment for ${on_string}">
+            <filter>capture_reads_table</filter>
+        </data>
     </outputs>
     <tests>
         <test>
@@ -92,6 +103,16 @@
             <param name="kraken_db" value="kraken3viruses" />
             <param name="kaiju_db" value="kaiju3viruses" />
             <param name="single_paired_selector" value="yes" />
+            <param name="forward_input" value="testData_1.fastq" ftype="fastq" />
+            <param name="reverse_input" value="testData_2.fastq" ftype="fastq" />
+            <param name="capture_reads_table" value="true" />
+            <output name="combined_table" file="virus_table_PE_fastq.tabular" ftype="tabular" />
+            <output name="reads_table" file="read_table_PE_fastq.tabular" ftype="tabular" />
+        </test>
+        <test>
+            <param name="kraken_db" value="kraken3viruses" />
+            <param name="kaiju_db" value="kaiju3viruses" />
+            <param name="single_paired_selector" value="yes" />
             <param name="forward_input" value="testData_1.fasta" ftype="fasta" />
             <param name="reverse_input" value="testData_2.fasta" ftype="fasta" />
             <output name="combined_table" file="virus_table_PE_fasta.tabular" ftype="tabular" />
@@ -126,8 +147,19 @@
 Tobacco etch virus                         12227                21                            19 Potyvirus                0                           0
 ================================== ============= ================= ============================= ========== =============== ===========================
 
-The command line tool offers additional options not currently exposed
-in Galaxy, including::
+The second most important output, which you can optionally capture
+for use within Galaxy, is a per-read table summarising matches found
+with Kraken and/or Kaiju. The Kodoja Retrieve tool is not currently
+available within Galaxy, but you can instead use this file directly
+within Galaxy to filter out just the virus reads, or even reads
+matched to a specific taxid. See for example ``seq_filter_by_id``
+which is available via the Galaxy Tool Shed:
+
+http://toolshed.g2.bx.psu.edu/view/peterjc/seq_filter_by_id
+https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_filter_by_id
+
+The Kodoja Search command line tool offers additional options not
+currently exposed in Galaxy, including::
 
                             Number of threads
       -s, --host_subset     Subset host sequences before Kaiju
@@ -145,7 +177,6 @@
       -i KAIJU_MISMATCH, --kaiju_mismatch KAIJU_MISMATCH
                             Kaju allowed mismatches
 
-
 For more information, please see the Kodoja manual
 https://github.com/abaizan/kodoja/wiki/Kodoja-Manual
     ]]></help>
b
diff -r ee917702dbd8 -r d4111d1de76f test-data/read_table_PE_fastq.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/read_table_PE_fastq.tabular Fri Sep 14 09:55:56 2018 -0400
b
b'@@ -0,0 +1,95 @@\n+kraken_classified\tSeq_ID\tkraken_tax_ID\tkraken_seq_tax\tkaiju_classified\tkaiju_tax_ID\tkaiju_seq_tax\tcombined_result\n+C\tSRR3466597.2160184\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1867187\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1714296\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.979054\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1842453\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1967334\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.881543\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.2337588\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1545177\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1972168\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.431651\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tU\t0\t\t\n+C\tSRR3466597.1360962\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1276425\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.833605\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tU\t0\t\t\n+C\tSRR3466597.1371609\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.899074\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.2102827\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.847861\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1902260\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.29276\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tSRR3466597.1001991\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\tC\t12227\td__Viruses|f__Potyviridae|g__Potyvirus|s__Tobacco_etch_virus\t12227.0\n+C\tERR996011.39031\t946046\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Ugandan_cassava_brown_streak_virus\tC\t946046\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Ugandan_cassava_brown_streak_virus\t946046.0\n+C\tERR996011.157056\t946046\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Ugandan_cassava_brown_streak_virus\tC\t946046\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Ugandan_cassava_brown_streak_virus\t946046.0\n+C\tERR996011.182572\t946046\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Ugandan_cassava_brown_streak_virus\tC\t946046\td__Viruses|f__Po'..b'wn_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.747299\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.752745\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.756913\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.793496\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.804840\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.860827\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.909965\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.937909\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.947899\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.967303\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.994308\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1066941\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1134829\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1154101\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1172801\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1177982\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1222223\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1249443\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1295292\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1301772\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n+C\tERR996013.1308814\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\tC\t137758\td__Viruses|f__Potyviridae|g__Ipomovirus|s__Cassava_brown_streak_virus\t137758.0\n'
b
diff -r ee917702dbd8 -r d4111d1de76f tool-data/kraken_databases.loc.sample
--- a/tool-data/kraken_databases.loc.sample Mon Sep 10 09:16:13 2018 -0400
+++ b/tool-data/kraken_databases.loc.sample Fri Sep 14 09:55:56 2018 -0400
b
@@ -1,6 +1,7 @@
 # This is a tab-separated file listing any Kraken databases
 # installed locally to offer via Galaxy.
 #
+# Expect three columns, tab separated, as follows:
 # - Column 1 - value, the key which Galaxy will record in DB
 # - Column 2 - name, the text caption which Galaxy show users
 # - Column 3 - path, the directory name (which will contain
@@ -12,3 +13,12 @@
 # kodoja at https://doi.org/10.5281/zenodo.1406071
 #
 # kodojaDB_v1.0_kraken(tab)KodojaDB v1.0 (kraken), Sept 2018(tab)/mnt/shared/data/kodojaDB_v1.0/krakenDB
+#
+# Also, for backward compatibility with early versions of the
+# Galaxy wrapper for Kraken, the Kodoja wrapper will also
+# accept three columns as follows:
+# - Column 1 - value, the key which Galaxy will record in DB
+# - Column 2 - name, final part of folder name, which Galaxy
+#   will show the user in the drop down lists to choose from
+# - Column 3 - path, parent folder name where $value/$name
+#   is the folder containing ``database.idx`` etc.
b
diff -r ee917702dbd8 -r d4111d1de76f tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Mon Sep 10 09:16:13 2018 -0400
+++ b/tool_data_table_conf.xml.sample Fri Sep 14 09:55:56 2018 -0400
b
@@ -6,7 +6,7 @@
         <columns>value, name, path</columns>
         <file path="tool-data/kraken_databases.loc" />
     </table>
-    <!-- Locations of Kaiju databses -->
+    <!-- Locations of Kaiju databases -->
     <!-- This is intended to match any kaiju-only wrapper -->
     <table name="kaiju_databases" comment_char="#">
         <columns>value, name, path</columns>
b
diff -r ee917702dbd8 -r d4111d1de76f tool_data_table_conf.xml.test
--- a/tool_data_table_conf.xml.test Mon Sep 10 09:16:13 2018 -0400
+++ b/tool_data_table_conf.xml.test Fri Sep 14 09:55:56 2018 -0400
b
@@ -6,7 +6,7 @@
         <columns>value, name, path</columns>
         <file path="${__HERE__}/test-data/kraken_databases.loc" />
     </table>
-    <!-- Locations of Kaiju databses -->
+    <!-- Locations of Kaiju databases -->
     <!-- This is intended to match any kaiju-only wrapper -->
     <table name="kaiju_databases" comment_char="#">
         <columns>value, name, path</columns>